fix(ci): push latest tag and reduce monitoring costs

- Add latest tag push alongside SHA tag for manual rollback/debugging
- Reduce log retention from 3 to 1 day
- Comment out bot_error_metrics and alerts to save ~$0.47/month
- Minor whitespace fix in cd.yml
This commit is contained in:
2026-03-16 05:17:47 +04:00
parent 224cdc2d45
commit 3d15754936
3 changed files with 77 additions and 70 deletions

View File

@@ -187,7 +187,7 @@ jobs:
git fetch origin "${{ github.event.workflow_run.head_branch }}" git fetch origin "${{ github.event.workflow_run.head_branch }}"
latest_sha=$(git rev-parse "origin/${{ github.event.workflow_run.head_branch }}") latest_sha=$(git rev-parse "origin/${{ github.event.workflow_run.head_branch }}")
deploy_sha="${{ steps.images.outputs.deploy_sha }}" deploy_sha="${{ steps.images.outputs.deploy_sha }}"
if [[ "$latest_sha" != "$deploy_sha" ]]; then if [[ "$latest_sha" != "$deploy_sha" ]]; then
echo "::notice::Newer commit ($latest_sha) found on branch. Skipping deployment of $deploy_sha to avoid race conditions." echo "::notice::Newer commit ($latest_sha) found on branch. Skipping deployment of $deploy_sha to avoid race conditions."
echo "skip=true" >> "$GITHUB_OUTPUT" echo "skip=true" >> "$GITHUB_OUTPUT"

View File

@@ -160,6 +160,7 @@ jobs:
repo="${GCP_REGION}-docker.pkg.dev/${{ vars.GCP_PROJECT_ID }}/${ARTIFACT_REPOSITORY}" repo="${GCP_REGION}-docker.pkg.dev/${{ vars.GCP_PROJECT_ID }}/${ARTIFACT_REPOSITORY}"
echo "name=${repo}/${{ matrix.service }}:${GITHUB_SHA}" >> "$GITHUB_OUTPUT" echo "name=${repo}/${{ matrix.service }}:${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
echo "cache_ref=${repo}/${{ matrix.service }}:cache" >> "$GITHUB_OUTPUT" echo "cache_ref=${repo}/${{ matrix.service }}:cache" >> "$GITHUB_OUTPUT"
echo "latest=${repo}/${{ matrix.service }}:latest" >> "$GITHUB_OUTPUT"
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
@@ -167,7 +168,9 @@ jobs:
context: . context: .
file: apps/${{ matrix.service }}/Dockerfile file: apps/${{ matrix.service }}/Dockerfile
push: true push: true
tags: ${{ steps.image.outputs.name }} tags: |
${{ steps.image.outputs.name }}
${{ steps.image.outputs.latest }}
platforms: linux/amd64 platforms: linux/amd64
provenance: false provenance: false
cache-from: type=registry,ref=${{ steps.image.outputs.cache_ref }} cache-from: type=registry,ref=${{ steps.image.outputs.cache_ref }}

View File

@@ -51,7 +51,7 @@ resource "google_logging_project_bucket_config" "default" {
project = var.project_id project = var.project_id
location = "global" location = "global"
bucket_id = "_Default" bucket_id = "_Default"
retention_days = 3 retention_days = 1
} }
resource "google_monitoring_notification_channel" "email" { resource "google_monitoring_notification_channel" "email" {
@@ -68,26 +68,28 @@ resource "google_monitoring_notification_channel" "email" {
depends_on = [google_project_service.enabled] depends_on = [google_project_service.enabled]
} }
resource "google_logging_metric" "bot_error_events" { # DEV-187: Commented out to save ~$0.47/month on Cloud Monitoring costs
for_each = local.bot_error_metrics # TODO: Re-enable if alerting on specific bot error events becomes necessary
# resource "google_logging_metric" "bot_error_events" {
project = var.project_id # for_each = local.bot_error_metrics
name = each.value.metric_name #
description = "Counts `${each.value.event}` log events for ${module.bot_api_service.name}." # project = var.project_id
filter = <<-EOT # name = each.value.metric_name
resource.type="cloud_run_revision" # description = "Counts `${each.value.event}` log events for ${module.bot_api_service.name}."
resource.labels.service_name="${module.bot_api_service.name}" # filter = <<-EOT
jsonPayload.event="${each.value.event}" # resource.type="cloud_run_revision"
EOT # resource.labels.service_name="${module.bot_api_service.name}"
# jsonPayload.event="${each.value.event}"
metric_descriptor { # EOT
metric_kind = "DELTA" #
value_type = "INT64" # metric_descriptor {
unit = "1" # metric_kind = "DELTA"
} # value_type = "INT64"
# unit = "1"
depends_on = [google_project_service.enabled] # }
} #
# depends_on = [google_project_service.enabled]
# }
resource "google_monitoring_alert_policy" "bot_api_5xx" { resource "google_monitoring_alert_policy" "bot_api_5xx" {
project = var.project_id project = var.project_id
@@ -138,50 +140,52 @@ metric.labels.response_code_class="5xx"
depends_on = [google_project_service.enabled] depends_on = [google_project_service.enabled]
} }
resource "google_monitoring_alert_policy" "bot_error_events" { # DEV-187: Commented out to save ~$0.47/month on Cloud Monitoring costs
for_each = local.bot_error_metrics # TODO: Re-enable if alerting on specific bot error events becomes necessary
# resource "google_monitoring_alert_policy" "bot_error_events" {
project = var.project_id # for_each = local.bot_error_metrics
display_name = each.value.display_name #
combiner = "OR" # project = var.project_id
# display_name = each.value.display_name
notification_channels = [ # combiner = "OR"
for channel in google_monitoring_notification_channel.email : channel.name #
] # notification_channels = [
# for channel in google_monitoring_notification_channel.email : channel.name
documentation { # ]
content = "Structured bot failure event `${each.value.event}` was logged by `${module.bot_api_service.name}` in `${var.environment}`." #
mime_type = "text/markdown" # documentation {
} # content = "Structured bot failure event `${each.value.event}` was logged by `${module.bot_api_service.name}` in `${var.environment}`."
# mime_type = "text/markdown"
conditions { # }
display_name = each.value.display_name #
# conditions {
condition_threshold { # display_name = each.value.display_name
filter = <<-EOT #
resource.type="cloud_run_revision" # condition_threshold {
resource.labels.service_name="${module.bot_api_service.name}" # filter = <<-EOT
metric.type="logging.googleapis.com/user/${google_logging_metric.bot_error_events[each.key].name}" # resource.type="cloud_run_revision"
EOT # resource.labels.service_name="${module.bot_api_service.name}"
# metric.type="logging.googleapis.com/user/${google_logging_metric.bot_error_events[each.key].name}"
comparison = "COMPARISON_GT" # EOT
threshold_value = 0 #
duration = "0s" # comparison = "COMPARISON_GT"
# threshold_value = 0
aggregations { # duration = "0s"
alignment_period = "300s" #
per_series_aligner = "ALIGN_RATE" # aggregations {
} # alignment_period = "300s"
# per_series_aligner = "ALIGN_RATE"
trigger { # }
count = 1 #
} # trigger {
} # count = 1
} # }
# }
alert_strategy { # }
auto_close = "1800s" #
} # alert_strategy {
# auto_close = "1800s"
depends_on = [google_logging_metric.bot_error_events] # }
} #
# depends_on = [google_logging_metric.bot_error_events]
# }