From 6d75124cfe45fe86549fcff10f9e3bfbb071c07b Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Tue, 11 Nov 2025 13:20:19 -0500 Subject: [PATCH 1/6] Add support for testing OADP with custom Velero PRs in Makefile and documentation Signed-off-by: Tiger Kaovilai --- Makefile | 88 +++++++ docs/developer/testing_with_velero_prs.md | 284 ++++++++++++++++++++++ 2 files changed, 372 insertions(+) create mode 100644 docs/developer/testing_with_velero_prs.md diff --git a/Makefile b/Makefile index 2190e3914a..afaae3483c 100644 --- a/Makefile +++ b/Makefile @@ -543,6 +543,94 @@ deploy-olm: undeploy-olm ## Build current branch operator image, bundle image, p chmod -R 777 $(DEPLOY_TMP) && rm -rf $(DEPLOY_TMP) $(OPERATOR_SDK) run bundle --security-context-config restricted $(THIS_BUNDLE_IMAGE) --namespace $(OADP_TEST_NAMESPACE) +##@ Velero PR Testing + +# Configuration for Velero PR builds +VELERO_REPO_PATH ?= $(HOME)/git/velero +# Use github.user if set, otherwise try to extract from git remote, fallback to "unknown" +GHCR_USER ?= $(shell git config github.user 2>/dev/null || (git config remote.origin.url 2>/dev/null | sed -n 's/.*github\.com[:/]\([^/]*\)\/.*/\1/p') || echo "unknown") +VELERO_IMAGE_TAG ?= pr$(VELERO_PR_NUMBER) +VELERO_IMAGE ?= ghcr.io/$(GHCR_USER)/velero:$(VELERO_IMAGE_TAG) + +.PHONY: build-velero-pr +build-velero-pr: ## Build Velero image from a PR. Usage: make build-velero-pr VELERO_PR_NUMBER=9407 +ifndef VELERO_PR_NUMBER + $(error VELERO_PR_NUMBER is required. Usage: make build-velero-pr VELERO_PR_NUMBER=9407) +endif + @echo "Building Velero from PR #$(VELERO_PR_NUMBER)..." + @echo "Velero repo path: $(VELERO_REPO_PATH)" + @echo "Target image: $(VELERO_IMAGE)" + @if [ ! -d "$(VELERO_REPO_PATH)" ]; then \ + echo "Error: Velero repository not found at $(VELERO_REPO_PATH)"; \ + echo "Please clone it first: git clone https://github.com/openshift/velero $(VELERO_REPO_PATH)"; \ + exit 1; \ + fi + @echo "Checking out oadp-dev branch..." + cd $(VELERO_REPO_PATH) && git checkout oadp-dev + @echo "Pulling latest oadp-dev..." + cd $(VELERO_REPO_PATH) && git pull openshift oadp-dev + @echo "Fetching PR #$(VELERO_PR_NUMBER) from upstream..." + cd $(VELERO_REPO_PATH) && git fetch upstream pull/$(VELERO_PR_NUMBER)/head:pr-$(VELERO_PR_NUMBER) + @echo "Cherry-picking PR commits..." + @echo "Note: If cherry-pick has conflicts, you may need to resolve them manually in $(VELERO_REPO_PATH)" + cd $(VELERO_REPO_PATH) && \ + (git cherry-pick upstream/main..pr-$(VELERO_PR_NUMBER) || \ + (if git status | grep -q "nothing to commit"; then \ + echo "Cherry-pick resulted in empty commit (changes already present), skipping..."; \ + git cherry-pick --skip; \ + else \ + echo "Cherry-pick failed with conflicts. Please resolve conflicts in $(VELERO_REPO_PATH)"; \ + exit 1; \ + fi)) + @echo "Building Velero image using Dockerfile.ubi..." + cd $(VELERO_REPO_PATH) && $(CONTAINER_TOOL) build -f Dockerfile.ubi -t "$(VELERO_IMAGE)" . + @echo "Build complete: $(VELERO_IMAGE)" + +.PHONY: push-velero-pr +push-velero-pr: ## Push Velero PR image to GHCR. Usage: make push-velero-pr VELERO_PR_NUMBER=9407 +ifndef VELERO_PR_NUMBER + $(error VELERO_PR_NUMBER is required. Usage: make push-velero-pr VELERO_PR_NUMBER=9407) +endif + @echo "Pushing $(VELERO_IMAGE) to GitHub Container Registry..." + @echo "Note: Make sure you're authenticated to ghcr.io (docker login ghcr.io)" + $(CONTAINER_TOOL) push "$(VELERO_IMAGE)" + @echo "Push complete: $(VELERO_IMAGE)" + +.PHONY: deploy-olm-velero-pr +deploy-olm-velero-pr: THIS_OPERATOR_IMAGE?=ttl.sh/oadp-operator-velero-pr$(VELERO_PR_NUMBER)-$(GIT_REV):$(TTL_DURATION) +deploy-olm-velero-pr: THIS_BUNDLE_IMAGE?=ttl.sh/oadp-operator-velero-pr$(VELERO_PR_NUMBER)-bundle-$(GIT_REV):$(TTL_DURATION) +deploy-olm-velero-pr: DEPLOY_TMP:=$(shell mktemp -d)/ +deploy-olm-velero-pr: build-velero-pr push-velero-pr undeploy-olm ## Build Velero from PR, build OADP operator with custom Velero image, and deploy via OLM. Usage: make deploy-olm-velero-pr VELERO_PR_NUMBER=9407 +ifndef VELERO_PR_NUMBER + $(error VELERO_PR_NUMBER is required. Usage: make deploy-olm-velero-pr VELERO_PR_NUMBER=9407) +endif + @make versions + @echo "DEPLOY_TMP: $(DEPLOY_TMP)" + @echo "Using custom Velero image: $(VELERO_IMAGE)" + # Copy project to temp directory and modify manager.yaml to use custom Velero image + cp -r . $(DEPLOY_TMP) && cd $(DEPLOY_TMP) && \ + $(SED) -i 's|value: quay.io/konveyor/velero:latest|value: $(VELERO_IMAGE)|g' config/manager/manager.yaml && \ + IMG=$(THIS_OPERATOR_IMAGE) BUNDLE_IMG=$(THIS_BUNDLE_IMAGE) \ + make docker-build docker-push bundle bundle-build bundle-push; \ + chmod -R 777 $(DEPLOY_TMP) && rm -rf $(DEPLOY_TMP) + $(OPERATOR_SDK) run bundle --security-context-config restricted $(THIS_BUNDLE_IMAGE) --namespace $(OADP_TEST_NAMESPACE) + @echo "" + @echo "==========================================================================" + @echo "OADP operator deployed with custom Velero image from PR #$(VELERO_PR_NUMBER)" + @echo "Velero image: $(VELERO_IMAGE)" + @echo "==========================================================================" + +.PHONY: undeploy-olm-velero-pr +undeploy-olm-velero-pr: undeploy-olm ## Cleanup OADP deployment and reset Velero repo to oadp-dev branch + @echo "Cleaning up Velero repository..." + @if [ -d "$(VELERO_REPO_PATH)" ]; then \ + echo "Resetting $(VELERO_REPO_PATH) to oadp-dev branch..."; \ + cd $(VELERO_REPO_PATH) && git checkout oadp-dev && git reset --hard openshift/oadp-dev; \ + echo "Deleting PR branch if it exists..."; \ + cd $(VELERO_REPO_PATH) && git branch -D pr-$(VELERO_PR_NUMBER) 2>/dev/null || true; \ + fi + @echo "Cleanup complete" + .PHONY: undeploy-olm undeploy-olm: login-required operator-sdk ## Uninstall current branch operator via OLM $(OC_CLI) whoami # Check if logged in diff --git a/docs/developer/testing_with_velero_prs.md b/docs/developer/testing_with_velero_prs.md new file mode 100644 index 0000000000..9c2cbe2887 --- /dev/null +++ b/docs/developer/testing_with_velero_prs.md @@ -0,0 +1,284 @@ +# Testing OADP with Custom Velero PRs + +This guide explains how to test OADP operator with a custom Velero build from an upstream PR. This is useful when you need to verify OADP compatibility with upcoming Velero changes or test bug fixes before they're merged. + +## Overview + +The `make deploy-olm-velero-pr` target automates the process of: +1. Fetching a PR from vmware-tanzu/velero +2. Cherry-picking it onto openshift/velero's oadp-dev branch +3. Building a custom Velero image +4. Deploying OADP operator with that custom Velero image + +## Prerequisites + +### 1. Velero Repository + +Clone the OpenShift Velero fork and configure remotes: + +```bash +# Clone the repository +git clone https://github.com/openshift/velero ~/git/velero +cd ~/git/velero + +# Add upstream remote +git remote add upstream https://github.com/vmware-tanzu/velero +git fetch --all +``` + +If you want to use a different location, set `VELERO_REPO_PATH`: +```bash +export VELERO_REPO_PATH=/path/to/your/velero +``` + +### 2. Container Registry Authentication + +Authenticate to GitHub Container Registry: + +```bash +# Create a GitHub Personal Access Token with 'write:packages' scope +# Then login: +docker login ghcr.io -u YOUR_GITHUB_USERNAME +# Or with podman: +podman login ghcr.io -u YOUR_GITHUB_USERNAME +``` + +### 3. OpenShift Cluster Access + +Ensure you're logged into an OpenShift cluster: + +```bash +oc login +``` + +## Usage + +### Quick Start - Full Workflow + +Deploy OADP with a custom Velero build from PR #9407: + +```bash +make deploy-olm-velero-pr VELERO_PR_NUMBER=9407 +``` + +This single command will: +1. Build the Velero image from the PR +2. Push it to `ghcr.io//velero:pr9407` +3. Build OADP operator with the custom image reference +4. Deploy via OLM to your cluster + +### Custom Parameters + +Override default settings: + +```bash +make deploy-olm-velero-pr \ + VELERO_PR_NUMBER=9407 \ + GHCR_USER=myusername \ + VELERO_REPO_PATH=/custom/path/to/velero \ + VELERO_IMAGE_TAG=custom-tag +``` + +**Available parameters:** +- `VELERO_PR_NUMBER` (required): PR number from vmware-tanzu/velero +- `VELERO_REPO_PATH` (optional): Path to velero repository (default: `~/git/velero`) +- `GHCR_USER` (optional): GitHub username for GHCR (default: from `git config user.name`) +- `VELERO_IMAGE_TAG` (optional): Custom image tag (default: `pr`) +- `VELERO_IMAGE` (optional): Full image override + +### Step-by-Step Workflow + +If you need more control over the process: + +#### 1. Build Velero Image + +```bash +make build-velero-pr VELERO_PR_NUMBER=9407 +``` + +This will: +- Checkout `oadp-dev` branch +- Fetch PR from upstream +- Cherry-pick commits +- Build using `Dockerfile.ubi` + +#### 2. Push to Registry + +```bash +make push-velero-pr VELERO_PR_NUMBER=9407 +``` + +#### 3. Deploy OADP (skip Velero build/push) + +If you've already built and pushed the image separately: + +```bash +make deploy-olm RELATED_IMAGE_VELERO=ghcr.io/myuser/velero:pr9407 +``` + +### Cleanup + +Remove the deployment and reset the Velero repository: + +```bash +make undeploy-olm-velero-pr VELERO_PR_NUMBER=9407 +``` + +This will: +- Undeploy OADP operator via OLM +- Reset Velero repository to clean `oadp-dev` branch +- Delete the PR branch + +## Troubleshooting + +### Cherry-pick Conflicts + +If the PR has conflicts with `oadp-dev`: + +```bash +cd ~/git/velero +# Resolve conflicts manually +git status +# Edit conflicting files +git add +git cherry-pick --continue +``` + +Then continue with the build: + +```bash +make build-velero-pr VELERO_PR_NUMBER=9407 +``` + +### Build Failures + +Check the Velero Dockerfile.ubi requirements: +```bash +cd ~/git/velero +cat Dockerfile.ubi +``` + +Ensure all build dependencies are available. + +### Image Push Issues + +Verify GHCR authentication: +```bash +docker login ghcr.io +# Test with a simple push +docker pull alpine:latest +docker tag alpine:latest ghcr.io/$USER/test:latest +docker push ghcr.io/$USER/test:latest +``` + +### Verify Deployment + +Check that the custom Velero image is being used: + +```bash +oc get deployment velero -n openshift-adp -o jsonpath='{.spec.template.spec.containers[0].image}' +``` + +Expected output: `ghcr.io//velero:pr9407` + +## Example Workflow + +Complete example testing Velero PR #9407: + +```bash +# 1. Ensure prerequisites +oc login https://api.my-cluster.com:6443 +docker login ghcr.io + +# 2. Deploy OADP with custom Velero +make deploy-olm-velero-pr VELERO_PR_NUMBER=9407 + +# 3. Verify deployment +oc get pods -n openshift-adp +oc get deployment velero -n openshift-adp -o yaml | grep image: + +# 4. Test your scenario +# ... run your tests ... + +# 5. Cleanup +make undeploy-olm-velero-pr VELERO_PR_NUMBER=9407 +``` + +## How It Works + +### Image Reference Replacement + +The target modifies `config/manager/manager.yaml` to replace: +```yaml +- name: RELATED_IMAGE_VELERO + value: quay.io/konveyor/velero:latest +``` + +With: +```yaml +- name: RELATED_IMAGE_VELERO + value: ghcr.io//velero:pr +``` + +This environment variable is read by the OADP operator to determine which Velero image to deploy. + +### Temporary Build Directory + +Similar to `deploy-olm`, the target uses a temporary directory for the build to avoid modifying your working tree. All changes to `config/manager/manager.yaml` are isolated to the build. + +### Bundle Image Naming + +The bundle image includes the PR number for easy identification: +``` +ttl.sh/oadp-operator-velero-pr9407-:1h +``` + +## Advanced Usage + +### Testing Multiple PRs + +To test multiple Velero PRs in sequence: + +```bash +for pr in 9407 9408 9409; do + echo "Testing PR #$pr" + make deploy-olm-velero-pr VELERO_PR_NUMBER=$pr + # Run your tests + ./run-tests.sh + make undeploy-olm-velero-pr VELERO_PR_NUMBER=$pr +done +``` + +### Using Local Velero Changes + +If you have local changes in the Velero repository: + +```bash +cd ~/git/velero +# Make your changes +git add . +git commit -m "Local changes" + +# Build and deploy without cherry-picking +cd ~/oadp-operator +make build-velero-pr VELERO_PR_NUMBER=local +make push-velero-pr VELERO_PR_NUMBER=local +``` + +### Persistent Image Tag + +Use a custom tag that doesn't include the PR number: + +```bash +make deploy-olm-velero-pr \ + VELERO_PR_NUMBER=9407 \ + VELERO_IMAGE_TAG=my-test-build +``` + +Image will be: `ghcr.io//velero:my-test-build` + +## Related Documentation + +- [Install from Source](install_from_source.md) - General development deployment guide +- [Testing Guide](testing/TESTING.md) - E2E testing documentation +- [OLM Hacking](olm_hacking.md) - Working with OLM bundles From 94a86a68af4f05cd45ee2cd2bdb9bb3dab7bc1d5 Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Tue, 11 Nov 2025 20:52:49 -0500 Subject: [PATCH 2/6] Implement DownloadRequest cleanup in tearDownDPAResources and add DeleteDownloadRequests function Signed-off-by: Tiger Kaovilai --- tests/e2e/backup_restore_suite_test.go | 7 ++++- tests/e2e/lib/apps.go | 42 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/e2e/backup_restore_suite_test.go b/tests/e2e/backup_restore_suite_test.go index 7383c91229..f6e317803f 100644 --- a/tests/e2e/backup_restore_suite_test.go +++ b/tests/e2e/backup_restore_suite_test.go @@ -331,7 +331,12 @@ func tearDownDPAResources(brCase BackupRestoreCase) { gomega.Expect(err).ToNot(gomega.HaveOccurred()) } - err := dpaCR.Delete() + // Clean up DownloadRequests to prevent accumulation of stale resources + log.Printf("Deleting DownloadRequests in namespace %s", namespace) + err := lib.DeleteDownloadRequests(dpaCR.Client, namespace) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + err = dpaCR.Delete() gomega.Expect(err).ToNot(gomega.HaveOccurred()) } diff --git a/tests/e2e/lib/apps.go b/tests/e2e/lib/apps.go index ebabfe67c7..7f6a771e8c 100755 --- a/tests/e2e/lib/apps.go +++ b/tests/e2e/lib/apps.go @@ -22,6 +22,7 @@ import ( openshiftconfigv1 "github.com/openshift/api/config/v1" security "github.com/openshift/api/security/v1" templatev1 "github.com/openshift/api/template/v1" + velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/label" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -167,6 +168,40 @@ func UninstallApplication(ocClient client.Client, file string) error { return nil } +// DeleteDownloadRequests deletes all DownloadRequest resources in a namespace. +// This is useful for cleaning up stale DownloadRequests that may have been created +// by previous test runs but never processed by the Velero server. +func DeleteDownloadRequests(ocClient client.Client, namespace string) error { + log.Printf("Deleting all DownloadRequests in namespace %s", namespace) + downloadRequestList := &velero.DownloadRequestList{} + err := ocClient.List(context.Background(), downloadRequestList, client.InNamespace(namespace)) + if err != nil { + log.Printf("Error listing DownloadRequests in namespace %s: %v", namespace, err) + return err + } + + if len(downloadRequestList.Items) == 0 { + log.Printf("No DownloadRequests found in namespace %s", namespace) + return nil + } + + log.Printf("Found %d DownloadRequests to delete in namespace %s", len(downloadRequestList.Items), namespace) + for _, dr := range downloadRequestList.Items { + err = ocClient.Delete(context.Background(), &dr) + if apierrors.IsNotFound(err) { + log.Printf("DownloadRequest %s already deleted, skipping", dr.Name) + continue + } else if err != nil { + log.Printf("Error deleting DownloadRequest %s: %v", dr.Name, err) + return err + } + log.Printf("Successfully deleted DownloadRequest %s", dr.Name) + } + + log.Printf("Finished deleting DownloadRequests in namespace %s", namespace) + return nil +} + func HasDCsInNamespace(ocClient client.Client, namespace string) (bool, error) { dcList := &ocpappsv1.DeploymentConfigList{} err := ocClient.List(context.Background(), dcList, client.InNamespace(namespace)) @@ -388,6 +423,13 @@ func RunMustGather(artifact_dir string, clusterClient client.Client) error { return err } + // Remove existing must-gather directory if it exists to avoid "Directory not empty" error + mustGatherDestDir := filepath.Join(artifact_dir, "must-gather") + err = os.RemoveAll(mustGatherDestDir) + if err != nil { + return err + } + _, err = exec.Command("mv", filepath.Dir(executablePath)+"/must-gather", artifact_dir).Output() if err != nil { return err From 48a33184dca516a1aa51d898fefe8e4d00797e9c Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Tue, 11 Nov 2025 21:27:07 -0500 Subject: [PATCH 3/6] Clean up stale DownloadRequests before must-gather to prevent validation failures Signed-off-by: Tiger Kaovilai --- tests/e2e/backup_restore_suite_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/e2e/backup_restore_suite_test.go b/tests/e2e/backup_restore_suite_test.go index f6e317803f..c6dd99b6ed 100644 --- a/tests/e2e/backup_restore_suite_test.go +++ b/tests/e2e/backup_restore_suite_test.go @@ -373,6 +373,13 @@ var _ = ginkgo.Describe("Backup and restore tests", ginkgo.Ordered, func() { // using kopia to collect more info (DaemonSet) waitOADPReadiness(lib.KOPIA) + // Clean up any stale DownloadRequests before running must-gather + // This prevents must-gather validation from failing due to unprocessed DownloadRequests + // from previous test runs that timed out or failed to be processed by Velero server + log.Printf("Cleaning up stale DownloadRequests before must-gather") + err := lib.DeleteDownloadRequests(dpaCR.Client, namespace) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + //DPT Test and MustGather should be paired together log.Printf("skipMustGather: %v", skipMustGather) if !skipMustGather { From 6b5e778a5f84f650d135a24f8c138baa042aaad9 Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Tue, 11 Nov 2025 22:10:03 -0500 Subject: [PATCH 4/6] Improve logging and error handling for stale DownloadRequests cleanup before must-gather Signed-off-by: Tiger Kaovilai --- tests/e2e/backup_restore_suite_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/e2e/backup_restore_suite_test.go b/tests/e2e/backup_restore_suite_test.go index c6dd99b6ed..6d21d644b9 100644 --- a/tests/e2e/backup_restore_suite_test.go +++ b/tests/e2e/backup_restore_suite_test.go @@ -376,8 +376,11 @@ var _ = ginkgo.Describe("Backup and restore tests", ginkgo.Ordered, func() { // Clean up any stale DownloadRequests before running must-gather // This prevents must-gather validation from failing due to unprocessed DownloadRequests // from previous test runs that timed out or failed to be processed by Velero server - log.Printf("Cleaning up stale DownloadRequests before must-gather") - err := lib.DeleteDownloadRequests(dpaCR.Client, namespace) + log.Printf("Cleaning up stale DownloadRequests in namespace %s before must-gather", dpaCR.Namespace) + err := lib.DeleteDownloadRequests(dpaCR.Client, dpaCR.Namespace) + if err != nil { + log.Printf("ERROR: Failed to delete DownloadRequests: %v", err) + } gomega.Expect(err).ToNot(gomega.HaveOccurred()) //DPT Test and MustGather should be paired together @@ -398,7 +401,7 @@ var _ = ginkgo.Describe("Backup and restore tests", ginkgo.Ordered, func() { log.Printf("Skipping MustGather and DataProtectionTest") } - err := dpaCR.Delete() + err = dpaCR.Delete() gomega.Expect(err).ToNot(gomega.HaveOccurred()) }) From d3df5a48b2309eb35bdf2cd4e13f5d07c208204d Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Tue, 11 Nov 2025 22:10:11 -0500 Subject: [PATCH 5/6] Fetch DownloadRequests before must-gather execution to ensure accurate reporting of existing requests Signed-off-by: Tiger Kaovilai --- must-gather/pkg/cli.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/must-gather/pkg/cli.go b/must-gather/pkg/cli.go index 2216fb9588..404d145ed6 100644 --- a/must-gather/pkg/cli.go +++ b/must-gather/pkg/cli.go @@ -320,16 +320,20 @@ For more information, check OADP must-gather documentation: https://docs.redhat. templates.ReplaceCloudStoragesSection(outputPath, cloudStorageList) templates.ReplaceBackupStorageLocationsSection(outputPath, backupStorageLocationList) templates.ReplaceVolumeSnapshotLocationsSection(outputPath, volumeSnapshotLocationList) - // this creates DownloadRequests CRs - templates.ReplaceBackupsSection(outputPath, backupList, clusterClient, deleteBackupRequestList, podVolumeBackupList, RequestTimeout, SkipTLS) - templates.ReplaceRestoresSection(outputPath, restoreList, clusterClient, podVolumeRestoreList, RequestTimeout, SkipTLS) + // Fetch DownloadRequests BEFORE collecting logs to avoid reporting DownloadRequests created by must-gather itself + // The ReplaceBackupsSection and ReplaceRestoresSection calls below create temporary DownloadRequests that may timeout + // We only want to report on DownloadRequests that existed before must-gather ran downloadRequestList := &velerov1.DownloadRequestList{} err = gather.AllResources(clusterClient, downloadRequestList) if err != nil { fmt.Println(err) } + // this creates DownloadRequests CRs + templates.ReplaceBackupsSection(outputPath, backupList, clusterClient, deleteBackupRequestList, podVolumeBackupList, RequestTimeout, SkipTLS) + templates.ReplaceRestoresSection(outputPath, restoreList, clusterClient, podVolumeRestoreList, RequestTimeout, SkipTLS) + templates.ReplaceSchedulesSection(outputPath, scheduleList) templates.ReplaceBackupRepositoriesSection(outputPath, backupRepositoryList) templates.ReplaceDataUploadsSection(outputPath, dataUploadList) From 4c3f5c67b93aed6457d8abd1ef33e2482db590bf Mon Sep 17 00:00:00 2001 From: Tiger Kaovilai Date: Wed, 12 Nov 2025 00:21:18 -0500 Subject: [PATCH 6/6] Refactor startup probes in MySQL configurations to use tcpSocket for improved reliability and reduce failure thresholds Signed-off-by: Tiger Kaovilai --- .../mysql-persistent/mysql-persistent-csi.yaml | 17 ++++------------- .../mysql-persistent-twovol-csi.yaml | 17 ++++------------- .../mysql-persistent/mysql-persistent.yaml | 17 ++++------------- 3 files changed, 12 insertions(+), 39 deletions(-) diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml index 08cbf922f1..619c19404c 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml @@ -111,22 +111,13 @@ items: periodSeconds: 10 timeoutSeconds: 5 startupProbe: - exec: - command: - - /usr/bin/timeout - - 1s - - /usr/bin/mysql - - $(MYSQL_DATABASE) - - -h - - 127.0.0.1 - - -u$(MYSQL_USER) - - -p$(MYSQL_PASSWORD) - - -e EXIT + tcpSocket: + port: mysql initialDelaySeconds: 5 - periodSeconds: 30 + periodSeconds: 10 timeoutSeconds: 2 successThreshold: 1 - failureThreshold: 40 # 40x30sec before restart pod + failureThreshold: 60 # 60x10sec = 10 minutes before restart pod - image: docker.io/curlimages/curl:8.5.0 name: curl-tool command: ["/bin/sleep", "infinity"] diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml index e7b758e271..b1df3f6a61 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml @@ -103,22 +103,13 @@ items: periodSeconds: 10 timeoutSeconds: 5 startupProbe: - exec: - command: - - /usr/bin/timeout - - 1s - - /usr/bin/mysql - - $(MYSQL_DATABASE) - - -h - - 127.0.0.1 - - -u$(MYSQL_USER) - - -p$(MYSQL_PASSWORD) - - -e EXIT + tcpSocket: + port: mysql initialDelaySeconds: 5 - periodSeconds: 30 + periodSeconds: 10 timeoutSeconds: 2 successThreshold: 1 - failureThreshold: 40 # 40x30sec before restart pod + failureThreshold: 60 # 60x10sec = 10 minutes before restart pod - image: docker.io/curlimages/curl:8.5.0 name: curl-tool command: ["/bin/sleep", "infinity"] diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml index d658c9e082..a72f02a8a3 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml @@ -124,22 +124,13 @@ items: periodSeconds: 10 timeoutSeconds: 5 startupProbe: - exec: - command: - - /usr/bin/timeout - - 1s - - /usr/bin/mysql - - $(MYSQL_DATABASE) - - -h - - 127.0.0.1 - - -u$(MYSQL_USER) - - -p$(MYSQL_PASSWORD) - - -e EXIT + tcpSocket: + port: mysql initialDelaySeconds: 5 - periodSeconds: 30 + periodSeconds: 10 timeoutSeconds: 2 successThreshold: 1 - failureThreshold: 40 # 40x30sec before restart pod + failureThreshold: 60 # 60x10sec = 10 minutes before restart pod - image: docker.io/curlimages/curl:8.5.0 name: curl-tool command: ["/bin/sleep", "infinity"]