diff --git a/.gitignore b/.gitignore index 959638d0a0e55a09b3426994f77362d8511f059d..d0e17d54b564d127b96dd758f56319455ee05591 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ node_modules/ **/.yarn/install-state.gz **/.pnp.js -.idea \ No newline at end of file +.idea +terraform*.log diff --git a/.gitlab/ci/deploy.yml b/.gitlab/ci/deploy.yml index 47dc9dc6b4e7a6dbd8a8e7173952588d69d66d56..2a9f80f00a66d746dcf6b466721bc27eb51cb829 100644 --- a/.gitlab/ci/deploy.yml +++ b/.gitlab/ci/deploy.yml @@ -60,6 +60,13 @@ deploy:review:stop: needs: - "apps:build" # all the /dist folders, so that terraform can archive stuff - "deploy:review:infra" # the terraform hcl sources + artifacts: + paths: + - "terraform/environments/crash.log" # optional, only available in case of a crash/panic + - "terraform/environments/terraform-*.log" # separate log for every step/command + name: "${CI_JOB_NAME}_${CI_JOB_ID}" + when: on_failure + expire_in: 1 week script: - terraform/environments/scripts/destroy-env.sh $CI_ENVIRONMENT_NAME when: manual diff --git a/terraform/environments/scripts/destroy-env.sh b/terraform/environments/scripts/destroy-env.sh index e2aaab7fab7e9eb3a06852256bbcd46afafe1fa0..ddb158ff58ba1fdac1b0ccb26d26d7469e790890 100755 --- a/terraform/environments/scripts/destroy-env.sh +++ b/terraform/environments/scripts/destroy-env.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env sh +#!/usr/bin/env bash # exit when any command fails set -ex @@ -6,11 +6,36 @@ set -ex # enable debug output in terraform export TF_LOG=DEBUG -cd terraform/environments +# retry logic for destroy: sometimes, a full workspace destroy does not work. This can be due to e.g.: +# * implicit dependencies between terraform resources not declared with depends_on, +# * unclean shutdown of resources, e.g. service does not close db connections, db still sees clients connected, +# * GCP stuff not allowing our resources to be deleted. +# Most of the time, retrying a destroy fixes these causes. +retry() { + for i in {1..3}; do + set +e + "$@" + retval=$? + set -e + if [ "$retval" -ne "0" ]; then + if [ "$i" -lt "3" ]; then + echo "command '$*' failed in try $i, retrying after 60 seconds" + sleep 60 # let things settle a bit + else + echo "command '$*' failed in try $i, giving up" + exit $retval + fi + else + break # success + fi + done +} + +cd "$(dirname "$0")"/.. TF_LOG_PATH=terraform-init.log terraform init TF_LOG_PATH=terraform-version.log terraform version TF_LOG_PATH=terraform-workspace.log terraform workspace select -or-create=true "$1" -TF_LOG_PATH=terraform-destroy.log terraform destroy -auto-approve -var="backend_image_tag=dummy" -var "frontend_image_tag=dumm" -var "federator_image_tag=dummy" +TF_LOG_PATH=terraform-destroy.log retry terraform destroy -auto-approve -var="backend_image_tag=dummy" -var "frontend_image_tag=dummy" -var "federator_image_tag=dummy" TF_LOG_PATH=terraform-ws-default.log terraform workspace select default TF_LOG_PATH=terraform-ws-delete.log terraform workspace delete "$1"