Merge pull request #10 from whekin/codex/whe-22-topic-ingestion

feat(WHE-22): ingest purchases from configured Telegram topic
This commit is contained in:
Stas
2026-03-05 03:33:54 +03:00
committed by GitHub
16 changed files with 1838 additions and 20 deletions

View File

@@ -14,6 +14,11 @@ TELEGRAM_BOT_TOKEN=your-telegram-bot-token
TELEGRAM_WEBHOOK_SECRET=your-webhook-secret
TELEGRAM_BOT_USERNAME=your_bot_username
TELEGRAM_WEBHOOK_PATH=/webhook/telegram
TELEGRAM_HOUSEHOLD_CHAT_ID=-1001234567890
TELEGRAM_PURCHASE_TOPIC_ID=777
# Household
HOUSEHOLD_ID=11111111-1111-4111-8111-111111111111
# Parsing / AI
OPENAI_API_KEY=your-openai-api-key

View File

@@ -10,6 +10,8 @@
"lint": "oxlint \"src\""
},
"dependencies": {
"@household/db": "workspace:*",
"drizzle-orm": "^0.44.7",
"grammy": "1.41.1"
}
}

View File

@@ -3,6 +3,11 @@ export interface BotRuntimeConfig {
telegramBotToken: string
telegramWebhookSecret: string
telegramWebhookPath: string
databaseUrl?: string
householdId?: string
telegramHouseholdChatId?: string
telegramPurchaseTopicId?: number
purchaseTopicIngestionEnabled: boolean
}
function parsePort(raw: string | undefined): number {
@@ -26,11 +31,56 @@ function requireValue(value: string | undefined, key: string): string {
return value
}
function parseOptionalTopicId(raw: string | undefined): number | undefined {
if (!raw) {
return undefined
}
const parsed = Number(raw)
if (!Number.isInteger(parsed) || parsed <= 0) {
throw new Error(`Invalid TELEGRAM_PURCHASE_TOPIC_ID value: ${raw}`)
}
return parsed
}
function parseOptionalValue(value: string | undefined): string | undefined {
const trimmed = value?.trim()
return trimmed && trimmed.length > 0 ? trimmed : undefined
}
export function getBotRuntimeConfig(env: NodeJS.ProcessEnv = process.env): BotRuntimeConfig {
return {
const databaseUrl = parseOptionalValue(env.DATABASE_URL)
const householdId = parseOptionalValue(env.HOUSEHOLD_ID)
const telegramHouseholdChatId = parseOptionalValue(env.TELEGRAM_HOUSEHOLD_CHAT_ID)
const telegramPurchaseTopicId = parseOptionalTopicId(env.TELEGRAM_PURCHASE_TOPIC_ID)
const purchaseTopicIngestionEnabled =
databaseUrl !== undefined &&
householdId !== undefined &&
telegramHouseholdChatId !== undefined &&
telegramPurchaseTopicId !== undefined
const runtime: BotRuntimeConfig = {
port: parsePort(env.PORT),
telegramBotToken: requireValue(env.TELEGRAM_BOT_TOKEN, 'TELEGRAM_BOT_TOKEN'),
telegramWebhookSecret: requireValue(env.TELEGRAM_WEBHOOK_SECRET, 'TELEGRAM_WEBHOOK_SECRET'),
telegramWebhookPath: env.TELEGRAM_WEBHOOK_PATH ?? '/webhook/telegram'
telegramWebhookPath: env.TELEGRAM_WEBHOOK_PATH ?? '/webhook/telegram',
purchaseTopicIngestionEnabled
}
if (databaseUrl !== undefined) {
runtime.databaseUrl = databaseUrl
}
if (householdId !== undefined) {
runtime.householdId = householdId
}
if (telegramHouseholdChatId !== undefined) {
runtime.telegramHouseholdChatId = telegramHouseholdChatId
}
if (telegramPurchaseTopicId !== undefined) {
runtime.telegramPurchaseTopicId = telegramPurchaseTopicId
}
return runtime
}

View File

@@ -2,12 +2,37 @@ import { webhookCallback } from 'grammy'
import { createTelegramBot } from './bot'
import { getBotRuntimeConfig } from './config'
import {
createPurchaseMessageRepository,
registerPurchaseTopicIngestion
} from './purchase-topic-ingestion'
import { createBotWebhookServer } from './server'
const runtime = getBotRuntimeConfig()
const bot = createTelegramBot(runtime.telegramBotToken)
const webhookHandler = webhookCallback(bot, 'std/http')
let closePurchaseRepository: (() => Promise<void>) | undefined
if (runtime.purchaseTopicIngestionEnabled) {
const purchaseRepositoryClient = createPurchaseMessageRepository(runtime.databaseUrl!)
closePurchaseRepository = purchaseRepositoryClient.close
registerPurchaseTopicIngestion(
bot,
{
householdId: runtime.householdId!,
householdChatId: runtime.telegramHouseholdChatId!,
purchaseTopicId: runtime.telegramPurchaseTopicId!
},
purchaseRepositoryClient.repository
)
} else {
console.warn(
'Purchase topic ingestion is disabled. Set DATABASE_URL, HOUSEHOLD_ID, TELEGRAM_HOUSEHOLD_CHAT_ID, and TELEGRAM_PURCHASE_TOPIC_ID to enable.'
)
}
const server = createBotWebhookServer({
webhookPath: runtime.telegramWebhookPath,
webhookSecret: runtime.telegramWebhookSecret,
@@ -23,6 +48,10 @@ if (import.meta.main) {
console.log(
`@household/bot webhook server started on :${runtime.port} path=${runtime.telegramWebhookPath}`
)
process.on('SIGTERM', () => {
void closePurchaseRepository?.()
})
}
export { server }

View File

@@ -0,0 +1,53 @@
import { describe, expect, test } from 'bun:test'
import {
extractPurchaseTopicCandidate,
type PurchaseTopicCandidate
} from './purchase-topic-ingestion'
const config = {
householdId: '11111111-1111-4111-8111-111111111111',
householdChatId: '-10012345',
purchaseTopicId: 777
}
function candidate(overrides: Partial<PurchaseTopicCandidate> = {}): PurchaseTopicCandidate {
return {
updateId: 1,
chatId: '-10012345',
messageId: '10',
threadId: '777',
senderTelegramUserId: '10002',
rawText: 'Bought toilet paper 30 gel',
messageSentAt: new Date('2026-03-05T00:00:00.000Z'),
...overrides
}
}
describe('extractPurchaseTopicCandidate', () => {
test('returns record when message belongs to configured topic', () => {
const record = extractPurchaseTopicCandidate(candidate(), config)
expect(record).not.toBeNull()
expect(record?.householdId).toBe(config.householdId)
expect(record?.rawText).toBe('Bought toilet paper 30 gel')
})
test('skips message from other chat', () => {
const record = extractPurchaseTopicCandidate(candidate({ chatId: '-10099999' }), config)
expect(record).toBeNull()
})
test('skips message from other topic', () => {
const record = extractPurchaseTopicCandidate(candidate({ threadId: '778' }), config)
expect(record).toBeNull()
})
test('skips blank text after trim', () => {
const record = extractPurchaseTopicCandidate(candidate({ rawText: ' ' }), config)
expect(record).toBeNull()
})
})

View File

@@ -0,0 +1,179 @@
import { and, eq } from 'drizzle-orm'
import type { Bot, Context } from 'grammy'
import { createDbClient, schema } from '@household/db'
export interface PurchaseTopicIngestionConfig {
householdId: string
householdChatId: string
purchaseTopicId: number
}
export interface PurchaseTopicCandidate {
updateId: number
chatId: string
messageId: string
threadId: string
senderTelegramUserId: string
senderDisplayName?: string
rawText: string
messageSentAt: Date
}
export interface PurchaseTopicRecord extends PurchaseTopicCandidate {
householdId: string
}
export interface PurchaseMessageIngestionRepository {
save(record: PurchaseTopicRecord): Promise<'created' | 'duplicate'>
}
export function extractPurchaseTopicCandidate(
value: PurchaseTopicCandidate,
config: PurchaseTopicIngestionConfig
): PurchaseTopicRecord | null {
if (value.chatId !== config.householdChatId) {
return null
}
if (value.threadId !== String(config.purchaseTopicId)) {
return null
}
const normalizedText = value.rawText.trim()
if (normalizedText.length === 0) {
return null
}
return {
...value,
rawText: normalizedText,
householdId: config.householdId
}
}
export function createPurchaseMessageRepository(databaseUrl: string): {
repository: PurchaseMessageIngestionRepository
close: () => Promise<void>
} {
const { db, queryClient } = createDbClient(databaseUrl, {
max: 5,
prepare: false
})
const repository: PurchaseMessageIngestionRepository = {
async save(record) {
const matchedMember = await db
.select({ id: schema.members.id })
.from(schema.members)
.where(
and(
eq(schema.members.householdId, record.householdId),
eq(schema.members.telegramUserId, record.senderTelegramUserId)
)
)
.limit(1)
const senderMemberId = matchedMember[0]?.id ?? null
const inserted = await db
.insert(schema.purchaseMessages)
.values({
householdId: record.householdId,
senderMemberId,
senderTelegramUserId: record.senderTelegramUserId,
senderDisplayName: record.senderDisplayName,
rawText: record.rawText,
telegramChatId: record.chatId,
telegramMessageId: record.messageId,
telegramThreadId: record.threadId,
telegramUpdateId: String(record.updateId),
messageSentAt: record.messageSentAt,
processingStatus: 'pending'
})
.onConflictDoNothing({
target: [
schema.purchaseMessages.householdId,
schema.purchaseMessages.telegramChatId,
schema.purchaseMessages.telegramMessageId
]
})
.returning({ id: schema.purchaseMessages.id })
return inserted.length > 0 ? 'created' : 'duplicate'
}
}
return {
repository,
close: async () => {
await queryClient.end({ timeout: 5 })
}
}
}
function toCandidateFromContext(ctx: Context): PurchaseTopicCandidate | null {
const message = ctx.message
if (!message || !('text' in message)) {
return null
}
if (!message.is_topic_message || message.message_thread_id === undefined) {
return null
}
const senderTelegramUserId = ctx.from?.id?.toString()
if (!senderTelegramUserId) {
return null
}
const senderDisplayName = [ctx.from?.first_name, ctx.from?.last_name]
.filter((part) => !!part && part.trim().length > 0)
.join(' ')
const candidate: PurchaseTopicCandidate = {
updateId: ctx.update.update_id,
chatId: message.chat.id.toString(),
messageId: message.message_id.toString(),
threadId: message.message_thread_id.toString(),
senderTelegramUserId,
rawText: message.text,
messageSentAt: new Date(message.date * 1000)
}
if (senderDisplayName.length > 0) {
candidate.senderDisplayName = senderDisplayName
}
return candidate
}
export function registerPurchaseTopicIngestion(
bot: Bot,
config: PurchaseTopicIngestionConfig,
repository: PurchaseMessageIngestionRepository
): void {
bot.on('message:text', async (ctx) => {
const candidate = toCandidateFromContext(ctx)
if (!candidate) {
return
}
const record = extractPurchaseTopicCandidate(candidate, config)
if (!record) {
return
}
try {
const status = await repository.save(record)
if (status === 'created') {
console.log(
`purchase topic message ingested chat=${record.chatId} thread=${record.threadId} message=${record.messageId}`
)
}
} catch (error) {
console.error('Failed to ingest purchase topic message', error)
}
})
}

View File

@@ -15,6 +15,8 @@
"apps/bot": {
"name": "@household/bot",
"dependencies": {
"@household/db": "workspace:*",
"drizzle-orm": "^0.44.7",
"grammy": "1.41.1",
},
},
@@ -51,7 +53,6 @@
"packages/db": {
"name": "@household/db",
"dependencies": {
"@household/config": "workspace:*",
"drizzle-orm": "^0.44.5",
"postgres": "^3.4.7",
},

View File

@@ -0,0 +1,73 @@
# HOUSEBOT-021: Purchase Topic Ingestion
## Summary
Ingest messages from configured Telegram household purchase topic (`Общие покупки`) and persist raw message metadata idempotently.
## Goals
- Process only configured chat/topic.
- Persist sender + raw message + Telegram metadata.
- Make ingestion idempotent for duplicate Telegram deliveries.
## Non-goals
- Purchase amount parsing.
- Settlement impact calculations.
## Scope
- In: bot middleware for topic filtering, persistence repository, DB schema for raw inbox records.
- Out: parser pipeline and command responses.
## Interfaces and Contracts
- Telegram webhook receives update.
- Bot middleware extracts candidate from `message:text` updates.
- DB write target: `purchase_messages`.
## Domain Rules
- Only configured `TELEGRAM_HOUSEHOLD_CHAT_ID` + `TELEGRAM_PURCHASE_TOPIC_ID` are accepted.
- Empty/blank messages are ignored.
- Duplicate message IDs are ignored via unique constraints.
## Data Model Changes
- Add `purchase_messages` with:
- sender metadata
- raw text
- Telegram IDs (chat/message/thread/update)
- processing status (`pending` default)
## Security and Privacy
- No PII beyond Telegram sender identifiers needed for household accounting.
- Webhook auth remains enforced by secret token header.
## Observability
- Log successful ingestion with chat/thread/message IDs.
- Log ingestion failures without crashing bot process.
## Edge Cases and Failure Modes
- Missing ingestion env config -> ingestion disabled.
- Unknown sender member -> stored with null member mapping.
- Duplicate webhook delivery -> ignored as duplicate.
## Test Plan
- Unit tests for topic filter extraction logic.
- Existing endpoint tests continue to pass.
## Acceptance Criteria
- [ ] Only configured topic messages are persisted.
- [ ] Sender + message metadata stored in DB.
- [ ] Duplicate deliveries are idempotent.
## Rollout Plan
- Deploy with ingestion enabled in dev group first.
- Validate rows in `purchase_messages` before enabling parser flow.

View File

@@ -0,0 +1,22 @@
CREATE TABLE "purchase_messages" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"household_id" uuid NOT NULL,
"sender_member_id" uuid,
"sender_telegram_user_id" text NOT NULL,
"sender_display_name" text,
"raw_text" text NOT NULL,
"telegram_chat_id" text NOT NULL,
"telegram_message_id" text NOT NULL,
"telegram_thread_id" text NOT NULL,
"telegram_update_id" text NOT NULL,
"message_sent_at" timestamp with time zone,
"processing_status" text DEFAULT 'pending' NOT NULL,
"ingested_at" timestamp with time zone DEFAULT now() NOT NULL
);
--> statement-breakpoint
ALTER TABLE "purchase_messages" ADD CONSTRAINT "purchase_messages_household_id_households_id_fk" FOREIGN KEY ("household_id") REFERENCES "public"."households"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "purchase_messages" ADD CONSTRAINT "purchase_messages_sender_member_id_members_id_fk" FOREIGN KEY ("sender_member_id") REFERENCES "public"."members"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "purchase_messages_household_thread_idx" ON "purchase_messages" USING btree ("household_id","telegram_thread_id");--> statement-breakpoint
CREATE INDEX "purchase_messages_sender_idx" ON "purchase_messages" USING btree ("sender_telegram_user_id");--> statement-breakpoint
CREATE UNIQUE INDEX "purchase_messages_household_tg_message_unique" ON "purchase_messages" USING btree ("household_id","telegram_chat_id","telegram_message_id");--> statement-breakpoint
CREATE UNIQUE INDEX "purchase_messages_household_tg_update_unique" ON "purchase_messages" USING btree ("household_id","telegram_update_id");

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,13 @@
"when": 1772669239939,
"tag": "0001_spicy_sersi",
"breakpoints": true
},
{
"idx": 2,
"version": "7",
"when": 1772670548136,
"tag": "0002_tough_sandman",
"breakpoints": true
}
]
}

View File

@@ -2,6 +2,9 @@
"name": "@household/db",
"private": true,
"type": "module",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"build": "bun build src/index.ts --outdir dist --target bun",
"typecheck": "tsgo --project tsconfig.json --noEmit",
@@ -10,7 +13,6 @@
"seed": "bun run src/seed.ts"
},
"dependencies": {
"@household/config": "workspace:*",
"drizzle-orm": "^0.44.5",
"postgres": "^3.4.7"
}

View File

@@ -1,12 +1,21 @@
import postgres from 'postgres'
import { drizzle } from 'drizzle-orm/postgres-js'
import { env } from '@household/config'
export interface DbClientOptions {
max?: number
prepare?: boolean
}
const queryClient = postgres(env.DATABASE_URL, {
prepare: false,
max: 5
export function createDbClient(databaseUrl: string, options: DbClientOptions = {}) {
const queryClient = postgres(databaseUrl, {
max: options.max ?? 5,
prepare: options.prepare ?? false
})
export const db = drizzle(queryClient)
export { queryClient }
const db = drizzle(queryClient)
return {
db,
queryClient
}
}

View File

@@ -1,2 +1,2 @@
export { db, queryClient } from './client'
export { createDbClient } from './client'
export * as schema from './schema'

View File

@@ -180,6 +180,45 @@ export const purchaseEntries = pgTable(
})
)
export const purchaseMessages = pgTable(
'purchase_messages',
{
id: uuid('id').defaultRandom().primaryKey(),
householdId: uuid('household_id')
.notNull()
.references(() => households.id, { onDelete: 'cascade' }),
senderMemberId: uuid('sender_member_id').references(() => members.id, {
onDelete: 'set null'
}),
senderTelegramUserId: text('sender_telegram_user_id').notNull(),
senderDisplayName: text('sender_display_name'),
rawText: text('raw_text').notNull(),
telegramChatId: text('telegram_chat_id').notNull(),
telegramMessageId: text('telegram_message_id').notNull(),
telegramThreadId: text('telegram_thread_id').notNull(),
telegramUpdateId: text('telegram_update_id').notNull(),
messageSentAt: timestamp('message_sent_at', { withTimezone: true }),
processingStatus: text('processing_status').default('pending').notNull(),
ingestedAt: timestamp('ingested_at', { withTimezone: true }).defaultNow().notNull()
},
(table) => ({
householdThreadIdx: index('purchase_messages_household_thread_idx').on(
table.householdId,
table.telegramThreadId
),
senderIdx: index('purchase_messages_sender_idx').on(table.senderTelegramUserId),
tgMessageUnique: uniqueIndex('purchase_messages_household_tg_message_unique').on(
table.householdId,
table.telegramChatId,
table.telegramMessageId
),
tgUpdateUnique: uniqueIndex('purchase_messages_household_tg_update_unique').on(
table.householdId,
table.telegramUpdateId
)
})
)
export const processedBotMessages = pgTable(
'processed_bot_messages',
{
@@ -261,4 +300,5 @@ export type Member = typeof members.$inferSelect
export type BillingCycle = typeof billingCycles.$inferSelect
export type UtilityBill = typeof utilityBills.$inferSelect
export type PurchaseEntry = typeof purchaseEntries.$inferSelect
export type PurchaseMessage = typeof purchaseMessages.$inferSelect
export type Settlement = typeof settlements.$inferSelect

View File

@@ -1,7 +1,5 @@
import { and, eq } from 'drizzle-orm'
import { drizzle } from 'drizzle-orm/postgres-js'
import postgres from 'postgres'
import { createDbClient } from './client'
import {
billingCycles,
households,
@@ -20,13 +18,11 @@ if (!databaseUrl) {
throw new Error('DATABASE_URL is required for db seed')
}
const queryClient = postgres(databaseUrl, {
prepare: false,
max: 2
const { db, queryClient } = createDbClient(databaseUrl, {
max: 2,
prepare: false
})
const db = drizzle(queryClient)
const FIXTURE_IDS = {
household: '11111111-1111-4111-8111-111111111111',
cycle: '22222222-2222-4222-8222-222222222222',