diff --git a/.envrc-push-gateway.local.template b/.envrc-push-gateway.local.template index 33ce292960747a3bc52dd21a4b8e014fa62bcb3e..cb4fcb355f3702159f5e7cea47aafe743f46dd64 100644 --- a/.envrc-push-gateway.local.template +++ b/.envrc-push-gateway.local.template @@ -6,4 +6,4 @@ export APNS_KEY_ID= export APNS_TEAM_ID= export APNS_TOPIC= -export FCM_KEYFILE= \ No newline at end of file +export FCM_KEYFILE=/data/fcm_keyfile.json \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6176d17d326419af4b67edd210e6004d7d80dc9c..64841e93a797c2b420a8ed47ccffa242e386d04a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -28,15 +28,15 @@ build_docker: PUSH_GATEWAY_ARTIFACT_IMAGE: 'europe-north1-docker.pkg.dev/holi-shared/docker/holi-chat-push-gateway' script: - echo '===> building chat server image' - - docker pull $ARTIFACT_IMAGE || true # Allows us to use --cache-from, we need to tag with latest in the next command for this to work - - docker build --cache-from $ARTIFACT_IMAGE -t $ARTIFACT_IMAGE:latest -t $ARTIFACT_IMAGE:$CI_COMMIT_SHA -t $ARTIFACT_IMAGE:$CI_COMMIT_REF_SLUG . - - docker push $ARTIFACT_IMAGE:$CI_COMMIT_SHA # this is the tag that is used for deployment - - docker push $ARTIFACT_IMAGE:$CI_COMMIT_REF_SLUG # just for easily knowing which is the last image for a branch + - docker pull "$ARTIFACT_IMAGE" || true # Allows us to use --cache-from, we need to tag with latest in the next command for this to work + - docker build --cache-from "$ARTIFACT_IMAGE" -t "$ARTIFACT_IMAGE":latest -t "$ARTIFACT_IMAGE":"$CI_COMMIT_SHA" -t "$ARTIFACT_IMAGE":"$CI_COMMIT_REF_SLUG" . + - docker push "$ARTIFACT_IMAGE":"$CI_COMMIT_SHA" # this is the tag that is used for deployment + - docker push "$ARTIFACT_IMAGE":"$CI_COMMIT_REF_SLUG" # just for easily knowing which is the last image for a branch - echo '===> building push gateway image' - - docker pull $PUSH_GATEWAY_ARTIFACT_IMAGE || true # Allows us to use --cache-from, we need to tag with latest in the next command for this to work - - docker build --cache-from $PUSH_GATEWAY_ARTIFACT_IMAGE -t $PUSH_GATEWAY_ARTIFACT_IMAGE:latest -t $PUSH_GATEWAY_ARTIFACT_IMAGE:$CI_COMMIT_SHA -t $PUSH_GATEWAY_ARTIFACT_IMAGE:$CI_COMMIT_REF_SLUG -f push-gateway.Dockerfile . - - docker push $PUSH_GATEWAY_ARTIFACT_IMAGE:$CI_COMMIT_SHA # this is the tag that is used for deployment - - docker push $PUSH_GATEWAY_ARTIFACT_IMAGE:$CI_COMMIT_REF_SLUG # just for easily knowing which is the last image for a branch + - docker pull "$PUSH_GATEWAY_ARTIFACT_IMAGE" || true # Allows us to use --cache-from, we need to tag with latest in the next command for this to work + - docker build --cache-from "$PUSH_GATEWAY_ARTIFACT_IMAGE" -t "$PUSH_GATEWAY_ARTIFACT_IMAGE":latest -t "$PUSH_GATEWAY_ARTIFACT_IMAGE":"$CI_COMMIT_SHA" -t "$PUSH_GATEWAY_ARTIFACT_IMAGE":"$CI_COMMIT_REF_SLUG" -f push-gateway.Dockerfile . + - docker push "$PUSH_GATEWAY_ARTIFACT_IMAGE":"$CI_COMMIT_SHA" # this is the tag that is used for deployment + - docker push "$PUSH_GATEWAY_ARTIFACT_IMAGE":"$CI_COMMIT_REF_SLUG" # just for easily knowing which is the last image for a branch .deploy: stage: "deploy" @@ -55,8 +55,8 @@ build_docker: #when: on_failure # can't do that for api base url, but can't define multiple artifacts expire_in: 1 week script: - - terraform/environments/scripts/create-or-update-env.sh $ENVIRONMENT_ID $CI_COMMIT_SHA - - echo "$(terraform/scripts/get-output.sh api_domain)" > $API_DOMAIN_PATH + - terraform/environments/scripts/create-or-update-env.sh "$ENVIRONMENT_ID" "$CI_COMMIT_SHA" + - echo "$(terraform/scripts/get-output.sh api_domain)" > "$API_DOMAIN_PATH" resource_group: $ENVIRONMENT_ID # never execute terraform in parallel on the same environment interruptible: false @@ -64,8 +64,8 @@ build_docker: stage: "deploy" image: 'europe-north1-docker.pkg.dev/holi-shared/docker-hub-remote/archlinux:base' script: - - API_BASE_URL=`cat $API_DOMAIN_PATH` - - echo "e2e tests against $CI_ENVIRONMENT_SLUG environment go here and against $API_BASE_URL" + - API_BASE_URL=`cat "$API_DOMAIN_PATH"` + - echo "e2e tests against '$CI_ENVIRONMENT_SLUG' environment go here and against '$API_BASE_URL'" - terraform/scripts/wait-for-ssl.sh "https://${API_BASE_URL}" staging_deploy: diff --git a/Dockerfile b/Dockerfile index a838274fca3f1a610328849047fcf707d52d39f6..9b90989addf43be99cd8433e680be56fb4255a4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM europe-north1-docker.pkg.dev/holi-shared/docker-hub-remote/matrixdotorg/synapse +FROM europe-north1-docker.pkg.dev/holi-shared/docker-hub-remote/matrixdotorg/synapse:v1.118.0 RUN apt-get update && apt-get upgrade -y && apt-get install -y gettext-base diff --git a/apns-test/package-lock.json b/apns-test/package-lock.json index fc59a82d97e0266ad12e81553fb55b51f7a42c9a..bce3503478d0e085396a12572c64f060f37260e4 100644 --- a/apns-test/package-lock.json +++ b/apns-test/package-lock.json @@ -36,14 +36,21 @@ } }, "node_modules/jsonwebtoken": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.1.tgz", - "integrity": "sha512-K8wx7eJ5TPvEjuiVSkv167EVboBDv9PZdDoF7BgeQnBLVvZWW9clr2PsQHVJDTKaEIH5JBIwHujGcHp7GgI2eg==", + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz", + "integrity": "sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==", + "license": "MIT", "dependencies": { "jws": "^3.2.2", - "lodash": "^4.17.21", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", "ms": "^2.1.1", - "semver": "^7.3.8" + "semver": "^7.5.4" }, "engines": { "node": ">=12", @@ -69,10 +76,47 @@ "safe-buffer": "^5.0.1" } }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==", + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==", + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==", + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==", + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==", + "license": "MIT" + }, + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==", + "license": "MIT" }, "node_modules/lru-cache": { "version": "6.0.0", diff --git a/docker-compose.yaml b/docker-compose.yaml index 90e7fbf4724a6dabb9571d7e083fd5a2b365f8e8..395e91b2d59390151e741e2e7ccde8aedd7273fe 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -36,7 +36,7 @@ services: driver: "local" chat-db: - image: postgres + image: postgres:17 container_name: chat-db restart: always volumes: diff --git a/push-gateway.Dockerfile b/push-gateway.Dockerfile index 9a4f0c910b36607f6f02c68e58a01f4404316a63..a396aabce7d98cbf77e9c73aba80cfa5bfcf91c2 100644 --- a/push-gateway.Dockerfile +++ b/push-gateway.Dockerfile @@ -1,4 +1,4 @@ -FROM europe-north1-docker.pkg.dev/holi-shared/docker-hub-remote/matrixdotorg/sygnal +FROM europe-north1-docker.pkg.dev/holi-shared/docker-hub-remote/matrixdotorg/sygnal:v0.15.1 RUN apt-get update && apt-get upgrade -y && apt-get install -y gettext-base diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000000000000000000000000000000000000..20f1885839af86ba18a844f96f8c2c9c24cb4afe --- /dev/null +++ b/renovate.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended", + ":configMigration", + ":label('renovatebot')", + ":enableVulnerabilityAlertsWithLabel('renovatebot/security')", + ":semanticCommits", + ":automergePatch", + ":automergeTesters", + ":automergeLinters" + ], + "packageRules": [ + { + "matchSourceUrls": [ + "https://github.com/hashicorp/terraform" + ], + "groupName": "terraform" + }, + { + "matchManagers": [ + "dockerfile", + "docker-compose" + ], + "groupName": "docker-tags" + } + ], + "reviewers": [ + "alexander.timmermann" + ] +} diff --git a/terraform/environments/deployment_push_gateway.tf b/terraform/environments/deployment_push_gateway.tf index 411f6ee083ec5a4d0984900ff0317a6abb886143..38833f7d2a382d4ba9fb5aa81b7f9d8fa6b6b725 100644 --- a/terraform/environments/deployment_push_gateway.tf +++ b/terraform/environments/deployment_push_gateway.tf @@ -105,7 +105,7 @@ resource "google_cloud_run_service" "chat_push_gateway" { } } } - container_concurrency = 0 # 0 means thread safe, no restriction on max concurrency + container_concurrency = 80 # 0 means thread safe, no restriction on max concurrency } # template annotations: https://cloud.google.com/run/docs/reference/rpc/google.cloud.run.v1#revisiontemplate metadata { diff --git a/terraform/environments/deployment_server.tf b/terraform/environments/deployment_server.tf index c4959dbc307b62e66092ab8e3568552d0ed5bf30..4f79c3330008e3884d6a1637ec7eb2c2c37ab279 100644 --- a/terraform/environments/deployment_server.tf +++ b/terraform/environments/deployment_server.tf @@ -127,7 +127,7 @@ resource "google_cloud_run_service" "chat_server" { } } } - container_concurrency = 0 # 0 means thread safe, no restriction on max concurrency + container_concurrency = 80 # 0 means thread safe, no restriction on max concurrency } # template annotations: https://cloud.google.com/run/docs/reference/rpc/google.cloud.run.v1#revisiontemplate metadata { @@ -139,7 +139,7 @@ resource "google_cloud_run_service" "chat_server" { # possible values: all-traffic/private-ranges-only(default) https://cloud.google.com/sdk/gcloud/reference/run/services/update#--vpc-egress # this needs to be set to all-traffic in order to route a cloud run url correctly, since it does resolve to a non-private ip address. "run.googleapis.com/vpc-access-egress" = "all-traffic" - "run.googleapis.com/startup-cpu-boost" = "true" + "run.googleapis.com/startup-cpu-boost" = "true" } # labels set on the revision level labels = { diff --git a/terraform/environments/init.tf b/terraform/environments/init.tf index c6220f09f5585fe604ed95b30d1f4d0ff099e163..7fe737b4e9b827b39fdb567e08a6ae173fcb8d5f 100644 --- a/terraform/environments/init.tf +++ b/terraform/environments/init.tf @@ -23,9 +23,11 @@ data "terraform_remote_state" "holi_chat_server_common_state" { # provider google including beta features provider "google" { - region = local.default_region + project = "holi-shared" # string, because referencing google_project.holi_shared would be a cycle + region = local.default_region } provider "google-beta" { - region = local.default_region + project = "holi-shared" # string, because referencing google_project.holi_shared would be a cycle + region = local.default_region } diff --git a/terraform/environments/monitoring.tf b/terraform/environments/monitoring.tf new file mode 100644 index 0000000000000000000000000000000000000000..71c318c28ce293a5a65ab9c086ac58d0fa1f6388 --- /dev/null +++ b/terraform/environments/monitoring.tf @@ -0,0 +1,79 @@ +resource "google_monitoring_alert_policy" "chat_server_cpu_utilization" { + count = local.environment == "production" ? 1 : 0 + display_name = "COMO: Production chat server CPU utilization exceeded" + notification_channels = [data.terraform_remote_state.holi_infra_state.outputs.monitoring_notification_channel_rocket_chat_production_id] + severity = "WARNING" + alert_strategy { + auto_close = "86400s" # 1 day + } + combiner = "OR" + conditions { + display_name = "Chat Server Container CPU Utilization (${local.environment})" + condition_threshold { + comparison = "COMPARISON_GT" + duration = "0s" + filter = templatefile("./monitoring_chat_server_cpu_utilization.tftpl", { + environment_pattern = local.environment + }) + threshold_value = 0.8 + + aggregations { + alignment_period = "60s" + per_series_aligner = "ALIGN_PERCENTILE_99" + cross_series_reducer = "REDUCE_MAX" + group_by_fields = [ + "resource.label.service_name", + ] + } + trigger { + count = 1 + percent = 0 + } + + } + } + documentation { + content = "Chat server CPU utilization exceeded" + mime_type = "text/markdown" + } +} + +resource "google_monitoring_alert_policy" "chat_server_memory_utilization" { + count = local.environment == "production" ? 1 : 0 + display_name = "COMO: ${local.environment} chat server memory utilization exceeded" + notification_channels = [data.terraform_remote_state.holi_infra_state.outputs.monitoring_notification_channel_rocket_chat_production_id] + severity = "WARNING" + alert_strategy { + auto_close = "86400s" # 1 day + } + combiner = "OR" + conditions { + display_name = "Chat Server Container memory Utilization (${local.environment})" + condition_threshold { + comparison = "COMPARISON_GT" + duration = "0s" + filter = templatefile("./monitoring_chat_server_memory_utilization.tftpl", { + environment_pattern = local.environment + }) + threshold_value = 0.8 + + aggregations { + alignment_period = "60s" + per_series_aligner = "ALIGN_PERCENTILE_99" + cross_series_reducer = "REDUCE_MAX" + group_by_fields = [ + "resource.label.service_name", + ] + } + trigger { + count = 1 + percent = 0 + } + + } + } + documentation { + content = "Chat server memory utilization exceeded" + mime_type = "text/markdown" + } +} diff --git a/terraform/environments/monitoring_chat_server_cpu_utilization.tftpl b/terraform/environments/monitoring_chat_server_cpu_utilization.tftpl new file mode 100644 index 0000000000000000000000000000000000000000..8058f7f668646c912692dc3c4114aef3d5e68630 --- /dev/null +++ b/terraform/environments/monitoring_chat_server_cpu_utilization.tftpl @@ -0,0 +1,3 @@ +resource.type = "cloud_run_revision" AND +resource.labels.service_name = monitoring.regex.full_match("chat-host-${environment_pattern}-.*") +AND metric.type = "run.googleapis.com/container/cpu/utilizations" diff --git a/terraform/environments/monitoring_chat_server_memory_utilization.tftpl b/terraform/environments/monitoring_chat_server_memory_utilization.tftpl new file mode 100644 index 0000000000000000000000000000000000000000..1a6406bc9c6076bce3d068fa40dc2e4d635b3b1b --- /dev/null +++ b/terraform/environments/monitoring_chat_server_memory_utilization.tftpl @@ -0,0 +1,3 @@ +resource.type = "cloud_run_revision" AND +resource.labels.service_name = monitoring.regex.full_match("chat-host-${environment_pattern}-.*") +AND metric.type = "run.googleapis.com/container/memory/utilizations"