diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml new file mode 100644 index 000000000..b364fdbcf --- /dev/null +++ b/.github/workflows/PR-close.yaml @@ -0,0 +1,116 @@ +name: PR Workflow + +on: + pull_request: + types: + - closed + +defaults: + run: + working-directory: go/src/github.com/stackrox/infra + +concurrency: pr-${{ github.ref }} + +env: + CLUSTER_NAME: infra-pr-${{ github.event.pull_request.number }} + GH_TOKEN: ${{ secrets.RHACS_BOT_GITHUB_TOKEN }} + +jobs: + destroy: + runs-on: ubuntu-latest + env: + KUBECONFIG: /tmp/kubeconfig + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} + INFRACTL: bin/infractl -k -e localhost:8443 + USE_GKE_GCLOUD_AUTH_PLUGIN: "True" + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + path: go/src/github.com/stackrox/infra + + - name: Authenticate to GCloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.INFRA_CI_AUTOMATION_GCP_SA }} + + - name: Set up Cloud SDK + uses: "google-github-actions/setup-gcloud@v2" + with: + install_components: "gke-gcloud-auth-plugin" + + - name: Download production infractl + uses: stackrox/actions/infra/install-infractl@v1 + + - name: Wait for cluster + run: | + set -xo pipefail + for I in {1..60}; do + ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? + sleep 60 + done + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ + kubectl get nodes -o wide || true + for I in {1..5}; do + kubectl -n infra rollout status deploy/infra-server-deployment --timeout=300s && break || echo $? + sleep 60 + done + + - name: Download branch infractl + run: | + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + kubectl -n infra logs -l app=infra-server --tail=-1 + + make pull-infractl-from-dev-server + $INFRACTL version + + kill %1 + + - name: Destroy created clusters still running + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + echo 'For 30 minutes, list and delete child clusters that are not failed.' + for I in {1..30}; do + $INFRACTL list --all --quiet + count=0 + # Continue if running or destroying + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING,DESTROYING'); do + echo "$(( ++count )): ${cluster}" + done + if [[ $count -gt 0 ]]; then + echo 'Active clusters found. Deleting ...' | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'No active clusters found.' + break + fi + # shellcheck disable=SC2086 + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING'); do + $INFRACTL get "${cluster}" \ + && $INFRACTL delete "${cluster}" + done \ + | tee -a "$GITHUB_STEP_SUMMARY" + echo "(sleep 1 minute then check again. try $I/30)" + sleep 60 + done + + kill %1 + + - name: Destroy PR dev cluster + run: | + ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo "Deleted PR dev cluster ${CLUSTER_NAME}" | tee -a "$GITHUB_STEP_SUMMARY" + + - name: Comment on PR + run: | + gh pr comment "${{ github.event.number }}" \ + --body "Development cluster deleted." diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index dc9670ad9..21ff23a1c 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -44,9 +44,6 @@ jobs: - build-and-push - create-dev-cluster runs-on: ubuntu-latest - container: - image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 - steps: - name: Checkout uses: actions/checkout@v4 @@ -66,10 +63,8 @@ jobs: - build-and-push - create-dev-cluster runs-on: ubuntu-latest - container: - image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 env: - KUBECONFIG: /github/home/artifacts/kubeconfig + KUBECONFIG: /tmp/kubeconfig INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} INFRACTL: bin/infractl -k -e localhost:8443 USE_GKE_GCLOUD_AUTH_PLUGIN: "True" @@ -101,7 +96,7 @@ jobs: - name: Download artifacts run: | - /github/home/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" kubectl get nodes -o wide || true - name: Deploy infra to dev cluster @@ -143,6 +138,11 @@ jobs: kill %1 + - name: Comment on PR + run: | + gh pr comment "${{ github.event.number }}" \ + --body "Deployment to development cluster completed." + - name: Install Argo CLI run: | ARGO_VERSION=$(grep "github.com/argoproj/argo-workflows/v3" go.mod | awk '{ print $2 }') @@ -157,10 +157,89 @@ jobs: run: | make argo-workflow-lint + - name: create example clusters to delete + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set +e + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + $INFRACTL create gke-default tstlonglifer --lifespan 8h + $INFRACTL create gke-default tstshortlifer --lifespan 30s + $INFRACTL create gke-default tstdestroyed --lifespan 2h + + $INFRACTL delete tstdestroyed --json + + $INFRACTL list --all --quiet + + kill %1 + - name: Run Go e2e tests env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 + GOPATH=$(go env GOPATH) + export GOPATH make go-e2e-tests + + - name: Wait for cluster + if: always() + run: | + set -xo pipefail + for I in {1..60}; do + ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? + sleep 60 + done + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ + kubectl get nodes -o wide || true + for I in {1..5}; do + kubectl -n infra rollout status deploy/infra-server-deployment --timeout=300s && break || echo $? + sleep 60 + done + + - name: Destroy created clusters still running + if: always() + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + echo 'For 30 minutes, list and delete child clusters that are not failed.' + for I in {1..30}; do + $INFRACTL list --all + count=0 + # Continue if running or destroying + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING,DESTROYING'); do + echo "$(( ++count )): ${cluster}" + done + if [[ $count -gt 0 ]]; then + echo 'Active clusters found. Deleting ...' | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'No active clusters found.' + break + fi + # shellcheck disable=SC2086 + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING'); do + $INFRACTL get "${cluster}" \ + && $INFRACTL delete "${cluster}" + done \ + | tee -a "$GITHUB_STEP_SUMMARY" + echo "(sleep 1 minute then check again. try $I/30)" + sleep 60 + done + + kill %1 + + - name: Destroy PR dev cluster + if: always() + run: | + ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo "Deleted PR dev cluster ${CLUSTER_NAME}" | tee -a "$GITHUB_STEP_SUMMARY" + gh pr comment "${{ github.event.number }}" \ + --body "Development cluster deleted." diff --git a/Makefile b/Makefile index 0ccafb20b..f48d7c51a 100644 --- a/Makefile +++ b/Makefile @@ -188,7 +188,7 @@ pull-infractl-from-dev-server: @mkdir -p bin @rm -f bin/infractl set -o pipefail; \ - curl --retry 3 --insecure --silent --show-error --fail --location https://localhost:8443/v1/cli/$(shell go env GOOS)/$(shell go env GOARCH)/upgrade \ + curl --retry 3 --retry-all-errors --retry-delay 5 --insecure --silent --show-error --fail --location https://localhost:8443/v1/cli/$(shell go env GOOS)/$(shell go env GOARCH)/upgrade \ | jq -r ".result.fileChunk" \ | base64 -d \ > bin/infractl diff --git a/scripts/add-PR-comment-for-deploy-to-dev.sh b/scripts/add-PR-comment-for-deploy-to-dev.sh index 9a3ebd926..da452f40c 100755 --- a/scripts/add-PR-comment-for-deploy-to-dev.sh +++ b/scripts/add-PR-comment-for-deploy-to-dev.sh @@ -21,14 +21,14 @@ add_PR_comment_for_deploy_to_dev() { local tmpfile tmpfile=$(mktemp) - cat > "$tmpfile" <<- EOT -A single node development cluster ({{.Env.DEV_CLUSTER_NAME}}) was allocated in production infra for this PR. + cat > "$tmpfile" <