diff --git a/.gitignore b/.gitignore index 133ae09859ba003be1a2a45e01c67bb65307f54a..3e3d850ca7100199bb3f35f92b827f8958d67fda 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .envrc.local coverage +terraform*.log diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7d54ef19af7ea93fa93f1c485de9b34cb0b10501..03f7cab2eb862a044bda634b6d05033226beb535 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -120,6 +120,13 @@ review_destroy: name: review/$CI_COMMIT_REF_SLUG action: stop dependencies: [] # explicitly disable artifact usage + artifacts: + paths: + - "terraform/environments/crash.log" # optional, only available in case of a crash/panic + - "terraform/environments/terraform-*.log" # separate log for every step/command + name: "${CI_JOB_NAME}_${CI_JOB_ID}" + when: on_failure + expire_in: 1 week script: # branch may have been deleted, so we clone and checkout main - git clone $CI_REPOSITORY_URL main-clone diff --git a/terraform/environments/scripts/destroy-env.sh b/terraform/environments/scripts/destroy-env.sh index 8feb7be2d8fb66fdc05c7485e05a4091b8f97396..d8137694745e9a6bcded13142bf6a5e666b55e6c 100755 --- a/terraform/environments/scripts/destroy-env.sh +++ b/terraform/environments/scripts/destroy-env.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env sh +#!/usr/bin/env bash # exit when any command fails set -ex @@ -6,11 +6,36 @@ set -ex # enable debug output in terraform export TF_LOG=DEBUG -cd terraform/environments +# retry logic for destroy: sometimes, a full workspace destroy does not work. This can be due to e.g.: +# * implicit dependencies between terraform resources not declared with depends_on, +# * unclean shutdown of resources, e.g. service does not close db connections, db still sees clients connected, +# * GCP stuff not allowing our resources to be deleted. +# Most of the time, retrying a destroy fixes these causes. +retry() { + for i in {1..3}; do + set +e + "$@" + retval=$? + set -e + if [ "$retval" -ne "0" ]; then + if [ "$i" -lt "3" ]; then + echo "command '$*' failed in try $i, retrying after 60 seconds" + sleep 60 # let things settle a bit + else + echo "command '$*' failed in try $i, giving up" + exit $retval + fi + else + break # success + fi + done +} + +cd "$(dirname "$0")"/.. TF_LOG_PATH=terraform-init.log terraform init TF_LOG_PATH=terraform-version.log terraform version -TF_LOG_PATH=terraform-workspace.log terraform workspace new "$1" || terraform workspace select "$1" -TF_LOG_PATH=terraform-destroy.log terraform destroy -auto-approve -var="image_tag=dummy" +TF_LOG_PATH=terraform-workspace.log terraform workspace select -or-create=true "$1" +TF_LOG_PATH=terraform-destroy.log retry terraform destroy -auto-approve -var="image_tag=dummy" TF_LOG_PATH=terraform-ws-default.log terraform workspace select default TF_LOG_PATH=terraform-ws-delete.log terraform workspace delete "$1"