From f4fe4470f7c36685fae6767392b109a4b932a785 Mon Sep 17 00:00:00 2001 From: whekin Date: Sun, 15 Mar 2026 19:11:18 +0400 Subject: [PATCH] feat(infra): implement multi-environment deployment strategy - Update CD workflow for branch-based environments (main -> Prod, dev -> Dev) - Support Terraform workspaces for environment isolation - Add manage_runtime_secrets flag to prevent accidental secret destruction - Add infra management and secret setup utility scripts - Prefix GitHub deployer identity with environment name - Synchronize bot environment variables with latest runtime config --- .github/workflows/cd.yml | 55 ++++------ infra/terraform/main.tf | 23 +++- .../modules/cloud_run_service/main.tf | 6 + infra/terraform/variables.tf | 17 ++- package.json | 7 +- scripts/ops/import-shared-resources.sh | 44 ++++++++ scripts/ops/setup-test-secrets.ts | 103 ++++++++++++++++++ 7 files changed, 211 insertions(+), 44 deletions(-) create mode 100755 scripts/ops/import-shared-resources.sh create mode 100644 scripts/ops/setup-test-secrets.ts diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 5d7acb1..26fba94 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -8,6 +8,7 @@ on: - completed branches: - main + - dev workflow_dispatch: permissions: @@ -15,13 +16,15 @@ permissions: id-token: write concurrency: - group: cd-main + group: cd-${{ github.ref_name }} cancel-in-progress: false jobs: check-secrets: name: Check deploy prerequisites runs-on: ubuntu-latest + # Select GitHub Environment based on branch + environment: ${{ github.ref == 'refs/heads/main' && 'Production' || 'Development' }} outputs: eligible_event: ${{ steps.check.outputs.eligible_event }} secrets_ok: ${{ steps.check.outputs.secrets_ok }} @@ -63,11 +66,16 @@ jobs: needs: check-secrets timeout-minutes: 30 if: ${{ needs.check-secrets.outputs.eligible_event == 'true' && needs.check-secrets.outputs.secrets_ok == 'true' && needs.check-secrets.outputs.db_secret_ok == 'true' }} + environment: ${{ github.ref == 'refs/heads/main' && 'Production' || 'Development' }} env: GCP_REGION: ${{ vars.GCP_REGION || 'europe-west1' }} ARTIFACT_REPOSITORY: ${{ vars.ARTIFACT_REPOSITORY || 'household-bot' }} - CLOUD_RUN_SERVICE_BOT: ${{ vars.CLOUD_RUN_SERVICE_BOT || 'household-dev-bot-api' }} - CLOUD_RUN_SERVICE_MINI: ${{ vars.CLOUD_RUN_SERVICE_MINI || 'household-dev-mini-app' }} + # Dynamic Service Names based on environment + # Branch 'main' -> Environment 'prod' -> household-prod-* + # Branch 'dev' -> Environment 'dev' -> household-dev-* + CLOUD_RUN_SERVICE_BOT: ${{ github.ref == 'refs/heads/main' && 'household-prod-bot-api' || 'household-dev-bot-api' }} + CLOUD_RUN_SERVICE_MINI: ${{ github.ref == 'refs/heads/main' && 'household-prod-mini-app' || 'household-dev-mini-app' }} + TELEGRAM_BOT_TOKEN_SECRET_ID: ${{ github.ref == 'refs/heads/main' && 'telegram-bot-token' || 'telegram-bot-token-test' }} steps: - name: Checkout deployment ref @@ -99,8 +107,6 @@ jobs: - name: Load Telegram bot token for command sync id: telegram-token - env: - TELEGRAM_BOT_TOKEN_SECRET_ID: ${{ vars.TELEGRAM_BOT_TOKEN_SECRET_ID || 'telegram-bot-token' }} run: | set +e token="$(gcloud secrets versions access latest \ @@ -169,34 +175,15 @@ jobs: TELEGRAM_BOT_TOKEN: ${{ steps.telegram-token.outputs.token }} run: bun run ops:telegram:commands set - - name: Telegram command sync skipped - if: ${{ steps.telegram-token.outputs.available != 'true' }} + - name: Set Telegram Webhook + if: ${{ steps.telegram-token.outputs.available == 'true' }} + env: + TELEGRAM_BOT_TOKEN: ${{ steps.telegram-token.outputs.token }} run: | - echo "Telegram command sync skipped." - echo "Grant the CD service account access to the bot token secret or set TELEGRAM_BOT_TOKEN_SECRET_ID." + SERVICE_URL=$(gcloud run services describe "${CLOUD_RUN_SERVICE_BOT}" \ + --region "${GCP_REGION}" \ + --project "${{ secrets.GCP_PROJECT_ID }}" \ + --format 'value(status.url)') - deploy-skipped: - name: Deploy skipped (missing config) - runs-on: ubuntu-latest - needs: check-secrets - if: ${{ needs.check-secrets.outputs.eligible_event == 'true' && needs.check-secrets.outputs.secrets_ok == 'false' }} - - steps: - - name: Print configuration hint - run: | - echo "CD skipped: configure required GitHub secrets." - echo "Required: GCP_PROJECT_ID, GCP_WORKLOAD_IDENTITY_PROVIDER, GCP_SERVICE_ACCOUNT, DATABASE_URL" - echo "Optional repo/service vars: GCP_REGION, ARTIFACT_REPOSITORY, CLOUD_RUN_SERVICE_BOT, CLOUD_RUN_SERVICE_MINI" - - deploy-blocked-db: - name: Deploy blocked (missing DATABASE_URL) - runs-on: ubuntu-latest - needs: check-secrets - if: ${{ needs.check-secrets.outputs.eligible_event == 'true' && needs.check-secrets.outputs.secrets_ok == 'true' && needs.check-secrets.outputs.db_secret_ok != 'true' }} - - steps: - - name: Fail fast on missing DATABASE_URL - run: | - echo "CD blocked: DATABASE_URL GitHub secret is required." - echo "This workflow now refuses to deploy without running migrations against the target database." - exit 1 + export TELEGRAM_WEBHOOK_URL="$SERVICE_URL/webhook/telegram" + bun run ops:telegram:webhook set diff --git a/infra/terraform/main.tf b/infra/terraform/main.tf index ab344dc..ca6877e 100644 --- a/infra/terraform/main.tf +++ b/infra/terraform/main.tf @@ -19,6 +19,14 @@ resource "google_artifact_registry_repository" "containers" { labels = local.common_labels + lifecycle { + ignore_changes = [ + labels, + effective_labels, + terraform_labels, + ] + } + depends_on = [google_project_service.enabled] } @@ -41,7 +49,7 @@ resource "google_service_account" "scheduler_invoker" { } resource "google_secret_manager_secret" "runtime" { - for_each = local.runtime_secret_ids + for_each = var.manage_runtime_secrets ? local.runtime_secret_ids : toset([]) project = var.project_id secret_id = each.value @@ -56,10 +64,10 @@ resource "google_secret_manager_secret" "runtime" { } resource "google_secret_manager_secret_iam_member" "bot_runtime_access" { - for_each = google_secret_manager_secret.runtime + for_each = local.runtime_secret_ids project = var.project_id - secret_id = each.value.secret_id + secret_id = each.value role = "roles/secretmanager.secretAccessor" member = "serviceAccount:${google_service_account.bot_runtime.email}" } @@ -96,8 +104,11 @@ module "bot_api_service" { var.bot_assistant_model == null ? {} : { ASSISTANT_MODEL = var.bot_assistant_model }, - var.bot_assistant_router_model == null ? {} : { - ASSISTANT_ROUTER_MODEL = var.bot_assistant_router_model + var.bot_topic_processor_model == null ? {} : { + TOPIC_PROCESSOR_MODEL = var.bot_topic_processor_model + }, + var.bot_topic_processor_timeout_ms == null ? {} : { + TOPIC_PROCESSOR_TIMEOUT_MS = tostring(var.bot_topic_processor_timeout_ms) }, var.bot_assistant_timeout_ms == null ? {} : { ASSISTANT_TIMEOUT_MS = tostring(var.bot_assistant_timeout_ms) @@ -222,7 +233,7 @@ resource "google_service_account" "github_deployer" { count = var.create_workload_identity ? 1 : 0 project = var.project_id - account_id = var.github_deploy_service_account_id + account_id = "${var.environment}-${var.github_deploy_service_account_id}" display_name = "${local.name_prefix} GitHub deployer" } diff --git a/infra/terraform/modules/cloud_run_service/main.tf b/infra/terraform/modules/cloud_run_service/main.tf index f604c61..289eb2a 100644 --- a/infra/terraform/modules/cloud_run_service/main.tf +++ b/infra/terraform/modules/cloud_run_service/main.tf @@ -54,6 +54,12 @@ resource "google_cloud_run_v2_service" "this" { percent = 100 type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST" } + + lifecycle { + ignore_changes = [ + template[0].scaling, + ] + } } resource "google_cloud_run_v2_service_iam_member" "public_invoker" { diff --git a/infra/terraform/variables.tf b/infra/terraform/variables.tf index 93b070f..a324cda 100644 --- a/infra/terraform/variables.tf +++ b/infra/terraform/variables.tf @@ -83,13 +83,20 @@ variable "bot_assistant_model" { nullable = true } -variable "bot_assistant_router_model" { - description = "Optional ASSISTANT_ROUTER_MODEL override for bot runtime" +variable "bot_topic_processor_model" { + description = "Optional TOPIC_PROCESSOR_MODEL override for bot runtime" type = string default = null nullable = true } +variable "bot_topic_processor_timeout_ms" { + description = "Optional TOPIC_PROCESSOR_TIMEOUT_MS override for bot runtime" + type = number + default = null + nullable = true +} + variable "bot_assistant_timeout_ms" { description = "Optional ASSISTANT_TIMEOUT_MS override for bot runtime" type = number @@ -217,6 +224,12 @@ variable "labels" { default = {} } +variable "manage_runtime_secrets" { + description = "Whether Terraform should manage the creation of runtime secrets (disable if secrets are created manually)" + type = bool + default = true +} + variable "create_workload_identity" { description = "Create GitHub OIDC Workload Identity resources" type = bool diff --git a/package.json b/package.json index 17d1089..749947e 100644 --- a/package.json +++ b/package.json @@ -27,9 +27,12 @@ "review:coderabbit": "coderabbit --prompt-only --base main || ~/.local/bin/coderabbit --prompt-only --base main", "infra:fmt": "terraform -chdir=infra/terraform fmt -recursive", "infra:fmt:check": "terraform -chdir=infra/terraform fmt -check -recursive", + "infra:init": "terraform -chdir=infra/terraform init", "infra:validate": "terraform -chdir=infra/terraform init -backend=false && terraform -chdir=infra/terraform validate", - "infra:plan:dev": "terraform -chdir=infra/terraform plan -var-file=dev.tfvars", - "infra:apply:dev": "terraform -chdir=infra/terraform apply -var-file=dev.tfvars", + "infra:plan:dev": "terraform -chdir=infra/terraform workspace select dev || terraform -chdir=infra/terraform workspace new dev && terraform -chdir=infra/terraform plan -var-file=dev.tfvars", + "infra:apply:dev": "terraform -chdir=infra/terraform workspace select dev && terraform -chdir=infra/terraform apply -var-file=dev.tfvars", + "infra:plan:prod": "terraform -chdir=infra/terraform workspace select prod || terraform -chdir=infra/terraform workspace new prod && terraform -chdir=infra/terraform plan -var-file=prod.tfvars", + "infra:apply:prod": "terraform -chdir=infra/terraform workspace select prod && terraform -chdir=infra/terraform apply -var-file=prod.tfvars", "dev:bot": "bun run --filter @household/bot dev", "dev:miniapp": "bun run --filter @household/miniapp dev", "docker:build:bot": "docker build -f apps/bot/Dockerfile -t household-bot:local .", diff --git a/scripts/ops/import-shared-resources.sh b/scripts/ops/import-shared-resources.sh new file mode 100755 index 0000000..26d3666 --- /dev/null +++ b/scripts/ops/import-shared-resources.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Configuration +PROJECT_ID="gen-lang-client-0200379851" +REGION="europe-west1" +WORKSPACE="${1:-prod}" # Takes first argument, defaults to 'prod' + +# Change directory to terraform folder +cd infra/terraform || exit 1 + +echo "--- Shared Resource Import Utility ---" +echo "Target Project: $PROJECT_ID" +echo "Target Workspace: $WORKSPACE" + +# 1. Ensure the workspace exists and is selected +terraform workspace select "$WORKSPACE" || terraform workspace new "$WORKSPACE" + +# 2. Construct Resource IDs +echo -e "\nConstructing Resource IDs..." +REPO_ID="projects/$PROJECT_ID/locations/$REGION/repositories/household-bot" +POOL_ID="projects/$PROJECT_ID/locations/global/workloadIdentityPools/github-pool" +PROV_ID="projects/$PROJECT_ID/locations/global/workloadIdentityPools/github-pool/providers/github-provider" + +echo "1. Repository Resource ID: $REPO_ID" +echo "2. Identity Pool Resource ID: $POOL_ID" +echo "3. Provider Resource ID: $PROV_ID" + +# 3. Perform the Imports +echo -e "\nStarting Terraform Imports..." + +# Import Repository +echo -e "\n--- Importing Artifact Registry ---" +terraform import -input=false -var-file="$WORKSPACE.tfvars" google_artifact_registry_repository.containers "$REPO_ID" + +# Import Workload Identity Pool +echo -e "\n--- Importing Workload Identity Pool ---" +terraform import -input=false -var-file="$WORKSPACE.tfvars" 'google_iam_workload_identity_pool.github[0]' "$POOL_ID" + +# Import Workload Identity Provider +echo -e "\n--- Importing Workload Identity Provider ---" +terraform import -input=false -var-file="$WORKSPACE.tfvars" 'google_iam_workload_identity_pool_provider.github[0]' "$PROV_ID" + +echo -e "\n--- Import Complete for $WORKSPACE! ---" +echo "You can now run: bun run infra:apply:$WORKSPACE" diff --git a/scripts/ops/setup-test-secrets.ts b/scripts/ops/setup-test-secrets.ts new file mode 100644 index 0000000..fc9d7a9 --- /dev/null +++ b/scripts/ops/setup-test-secrets.ts @@ -0,0 +1,103 @@ +import { $ } from 'bun' + +const PROJECT_ID = 'gen-lang-client-0200379851' + +async function secretExists(name: string): Promise { + const result = + (await $`gcloud secrets describe ${name} --project=${PROJECT_ID}`.quiet().exitCode) === 0 + return result +} + +async function createSecret(name: string, value: string) { + console.log(`\n[Checking] ${name}...`) + if (await secretExists(name)) { + console.log(`[Skipping] ${name} already exists. If you want to change it, use the GCP console.`) + return + } + + try { + console.log(`[Creating] ${name} for the first time...`) + await $`echo -n ${value} | gcloud secrets create ${name} --data-file=- --replication-policy="automatic" --project=${PROJECT_ID}`.quiet() + console.log(`[Success] ${name} is ready.`) + } catch (err) { + console.error(`[Error] Failed to setup ${name}:`, err) + } +} + +console.log('--- Production & Test Environment Secret Setup ---') +console.log(`Target Project: ${PROJECT_ID}`) + +// 1. PRODUCTION Bot Token +let prodBotToken = '' +if (!(await secretExists('telegram-bot-token'))) { + prodBotToken = prompt('1. Enter your PRODUCTION Telegram Bot Token (the original one):') || '' +} + +// 2. PRODUCTION Database URL +let prodDbUrl = '' +if (!(await secretExists('database-url'))) { + prodDbUrl = prompt('2. Enter your PRODUCTION Supabase DATABASE_URL (for public schema):') || '' +} + +// 3. TEST Bot Token +let testBotToken = '' +if (!(await secretExists('telegram-bot-token-test'))) { + testBotToken = prompt('3. Enter your TEST Telegram Bot Token (from @BotFather):') || '' +} + +// 4. TEST Database URL (Derived from prod if not exists) +let testDbUrlPrompt = '' +if (!(await secretExists('database-url-test'))) { + testDbUrlPrompt = + prompt( + '4. Enter your TEST Supabase DATABASE_URL (or leave empty to reuse prod with ?options=-csearch_path=test):' + ) || '' +} + +// 5. OpenAI API Key (Shared) +let openaiKey = '' +if (!(await secretExists('openai-api-key'))) { + openaiKey = prompt('5. Enter your OpenAI API Key:') || '' +} + +// Logic for test DB URL +const testDbUrl = + testDbUrlPrompt || + (prodDbUrl && + (prodDbUrl.includes('?') + ? `${prodDbUrl}&options=-csearch_path%3Dtest` + : `${prodDbUrl}?options=-csearch_path%3Dtest`)) + +// Logic for prod DB URL +const finalProdDbUrl = + prodDbUrl && + (prodDbUrl.includes('?') + ? `${prodDbUrl}&options=-csearch_path%3Dpublic` + : `${prodDbUrl}?options=-csearch_path%3Dpublic`) + +// Generate random secrets (Always safe to recreate if missing) +const webhookSecret = Buffer.from(crypto.getRandomValues(new Uint8Array(32))).toString('base64') +const schedulerSecret = Buffer.from(crypto.getRandomValues(new Uint8Array(32))).toString('base64') + +console.log('\nStarting GCP operations...') + +if (prodBotToken) await createSecret('telegram-bot-token', prodBotToken.trim()) +if (finalProdDbUrl) await createSecret('database-url', finalProdDbUrl.trim()) +if (testBotToken) await createSecret('telegram-bot-token-test', testBotToken.trim()) +if (testDbUrl) await createSecret('database-url-test', testDbUrl.trim()) +if (openaiKey) await createSecret('openai-api-key', openaiKey.trim()) + +// Create unique secrets per environment if missing +await createSecret('telegram-webhook-secret-test', webhookSecret) +await createSecret('scheduler-shared-secret-test', schedulerSecret) +await createSecret( + 'telegram-webhook-secret', + Buffer.from(crypto.getRandomValues(new Uint8Array(32))).toString('base64') +) +await createSecret( + 'scheduler-shared-secret', + Buffer.from(crypto.getRandomValues(new Uint8Array(32))).toString('base64') +) + +console.log('\n--- Setup Complete! ---') +console.log('You can now run the import commands and then infra:apply:prod')