feat(WHE-23): add hybrid purchase parser with persisted parse metadata

This commit is contained in:
2026-03-05 04:43:57 +04:00
parent 3b1b6468db
commit ebb6ce4ce6
14 changed files with 1881 additions and 7 deletions

View File

@@ -10,6 +10,7 @@
"lint": "oxlint \"src\""
},
"dependencies": {
"@household/application": "workspace:*",
"@household/db": "workspace:*",
"drizzle-orm": "^0.44.7",
"grammy": "1.41.1"

View File

@@ -8,6 +8,8 @@ export interface BotRuntimeConfig {
telegramHouseholdChatId?: string
telegramPurchaseTopicId?: number
purchaseTopicIngestionEnabled: boolean
openaiApiKey?: string
parserModel: string
}
function parsePort(raw: string | undefined): number {
@@ -66,7 +68,8 @@ export function getBotRuntimeConfig(env: NodeJS.ProcessEnv = process.env): BotRu
telegramBotToken: requireValue(env.TELEGRAM_BOT_TOKEN, 'TELEGRAM_BOT_TOKEN'),
telegramWebhookSecret: requireValue(env.TELEGRAM_WEBHOOK_SECRET, 'TELEGRAM_WEBHOOK_SECRET'),
telegramWebhookPath: env.TELEGRAM_WEBHOOK_PATH ?? '/webhook/telegram',
purchaseTopicIngestionEnabled
purchaseTopicIngestionEnabled,
parserModel: env.PARSER_MODEL?.trim() || 'gpt-4.1-mini'
}
if (databaseUrl !== undefined) {
@@ -81,6 +84,10 @@ export function getBotRuntimeConfig(env: NodeJS.ProcessEnv = process.env): BotRu
if (telegramPurchaseTopicId !== undefined) {
runtime.telegramPurchaseTopicId = telegramPurchaseTopicId
}
const openaiApiKey = parseOptionalValue(env.OPENAI_API_KEY)
if (openaiApiKey !== undefined) {
runtime.openaiApiKey = openaiApiKey
}
return runtime
}

View File

@@ -2,6 +2,7 @@ import { webhookCallback } from 'grammy'
import { createTelegramBot } from './bot'
import { getBotRuntimeConfig } from './config'
import { createOpenAiParserFallback } from './openai-parser-fallback'
import {
createPurchaseMessageRepository,
registerPurchaseTopicIngestion
@@ -17,6 +18,7 @@ let closePurchaseRepository: (() => Promise<void>) | undefined
if (runtime.purchaseTopicIngestionEnabled) {
const purchaseRepositoryClient = createPurchaseMessageRepository(runtime.databaseUrl!)
closePurchaseRepository = purchaseRepositoryClient.close
const llmFallback = createOpenAiParserFallback(runtime.openaiApiKey, runtime.parserModel)
registerPurchaseTopicIngestion(
bot,
@@ -25,7 +27,12 @@ if (runtime.purchaseTopicIngestionEnabled) {
householdChatId: runtime.telegramHouseholdChatId!,
purchaseTopicId: runtime.telegramPurchaseTopicId!
},
purchaseRepositoryClient.repository
purchaseRepositoryClient.repository,
llmFallback
? {
llmFallback
}
: {}
)
} else {
console.warn(

View File

@@ -0,0 +1,119 @@
import type { PurchaseParserLlmFallback } from '@household/application'
interface OpenAiStructuredResult {
amountMinor: string
currency: 'GEL' | 'USD'
itemDescription: string
confidence: number
needsReview: boolean
}
function asBigInt(value: string): bigint | null {
if (!/^[0-9]+$/.test(value)) {
return null
}
const parsed = BigInt(value)
return parsed > 0n ? parsed : null
}
export function createOpenAiParserFallback(
apiKey: string | undefined,
model: string
): PurchaseParserLlmFallback | undefined {
if (!apiKey) {
return undefined
}
return async (rawText: string) => {
const response = await fetch('https://api.openai.com/v1/responses', {
method: 'POST',
headers: {
authorization: `Bearer ${apiKey}`,
'content-type': 'application/json'
},
body: JSON.stringify({
model,
input: [
{
role: 'system',
content:
'Extract a shared household purchase from text. Return only valid JSON with amountMinor, currency, itemDescription, confidence, needsReview.'
},
{
role: 'user',
content: rawText
}
],
text: {
format: {
type: 'json_schema',
name: 'purchase_parse',
schema: {
type: 'object',
additionalProperties: false,
properties: {
amountMinor: {
type: 'string'
},
currency: {
type: 'string',
enum: ['GEL', 'USD']
},
itemDescription: {
type: 'string'
},
confidence: {
type: 'number',
minimum: 0,
maximum: 100
},
needsReview: {
type: 'boolean'
}
},
required: ['amountMinor', 'currency', 'itemDescription', 'confidence', 'needsReview']
}
}
}
})
})
if (!response.ok) {
return null
}
const payload = (await response.json()) as {
output_text?: string
}
if (!payload.output_text) {
return null
}
let parsedJson: OpenAiStructuredResult
try {
parsedJson = JSON.parse(payload.output_text) as OpenAiStructuredResult
} catch {
return null
}
const amountMinor = asBigInt(parsedJson.amountMinor)
if (!amountMinor) {
return null
}
if (parsedJson.itemDescription.trim().length === 0) {
return null
}
return {
amountMinor,
currency: parsedJson.currency,
itemDescription: parsedJson.itemDescription,
confidence: Math.max(0, Math.min(100, Math.round(parsedJson.confidence))),
parserMode: 'llm',
needsReview: parsedJson.needsReview
}
}
}

View File

@@ -1,3 +1,4 @@
import { parsePurchaseMessage, type PurchaseParserLlmFallback } from '@household/application'
import { and, eq } from 'drizzle-orm'
import type { Bot, Context } from 'grammy'
@@ -25,7 +26,10 @@ export interface PurchaseTopicRecord extends PurchaseTopicCandidate {
}
export interface PurchaseMessageIngestionRepository {
save(record: PurchaseTopicRecord): Promise<'created' | 'duplicate'>
save(
record: PurchaseTopicRecord,
llmFallback?: PurchaseParserLlmFallback
): Promise<'created' | 'duplicate'>
}
export function extractPurchaseTopicCandidate(
@@ -52,6 +56,10 @@ export function extractPurchaseTopicCandidate(
}
}
function needsReviewAsInt(value: boolean): number {
return value ? 1 : 0
}
export function createPurchaseMessageRepository(databaseUrl: string): {
repository: PurchaseMessageIngestionRepository
close: () => Promise<void>
@@ -62,7 +70,7 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
})
const repository: PurchaseMessageIngestionRepository = {
async save(record) {
async save(record, llmFallback) {
const matchedMember = await db
.select({ id: schema.members.id })
.from(schema.members)
@@ -75,6 +83,30 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
.limit(1)
const senderMemberId = matchedMember[0]?.id ?? null
let parserError: string | null = null
const parsed = await parsePurchaseMessage(
{
rawText: record.rawText
},
llmFallback
? {
llmFallback
}
: {}
).catch((error) => {
parserError = error instanceof Error ? error.message : 'Unknown parser error'
return null
})
const processingStatus =
parserError !== null
? 'parse_failed'
: parsed === null
? 'needs_review'
: parsed.needsReview
? 'needs_review'
: 'parsed'
const inserted = await db
.insert(schema.purchaseMessages)
@@ -89,7 +121,14 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
telegramThreadId: record.threadId,
telegramUpdateId: String(record.updateId),
messageSentAt: record.messageSentAt,
processingStatus: 'pending'
parsedAmountMinor: parsed?.amountMinor,
parsedCurrency: parsed?.currency,
parsedItemDescription: parsed?.itemDescription,
parserMode: parsed?.parserMode,
parserConfidence: parsed?.confidence,
needsReview: needsReviewAsInt(parsed?.needsReview ?? true),
parserError,
processingStatus
})
.onConflictDoNothing({
target: [
@@ -151,7 +190,10 @@ function toCandidateFromContext(ctx: Context): PurchaseTopicCandidate | null {
export function registerPurchaseTopicIngestion(
bot: Bot,
config: PurchaseTopicIngestionConfig,
repository: PurchaseMessageIngestionRepository
repository: PurchaseMessageIngestionRepository,
options: {
llmFallback?: PurchaseParserLlmFallback
} = {}
): void {
bot.on('message:text', async (ctx) => {
const candidate = toCandidateFromContext(ctx)
@@ -165,7 +207,7 @@ export function registerPurchaseTopicIngestion(
}
try {
const status = await repository.save(record)
const status = await repository.save(record, options.llmFallback)
if (status === 'created') {
console.log(