mirror of
https://github.com/whekin/household-bot.git
synced 2026-03-31 11:54:03 +00:00
feat(WHE-23): add hybrid purchase parser with persisted parse metadata
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
"lint": "oxlint \"src\""
|
||||
},
|
||||
"dependencies": {
|
||||
"@household/application": "workspace:*",
|
||||
"@household/db": "workspace:*",
|
||||
"drizzle-orm": "^0.44.7",
|
||||
"grammy": "1.41.1"
|
||||
|
||||
@@ -8,6 +8,8 @@ export interface BotRuntimeConfig {
|
||||
telegramHouseholdChatId?: string
|
||||
telegramPurchaseTopicId?: number
|
||||
purchaseTopicIngestionEnabled: boolean
|
||||
openaiApiKey?: string
|
||||
parserModel: string
|
||||
}
|
||||
|
||||
function parsePort(raw: string | undefined): number {
|
||||
@@ -66,7 +68,8 @@ export function getBotRuntimeConfig(env: NodeJS.ProcessEnv = process.env): BotRu
|
||||
telegramBotToken: requireValue(env.TELEGRAM_BOT_TOKEN, 'TELEGRAM_BOT_TOKEN'),
|
||||
telegramWebhookSecret: requireValue(env.TELEGRAM_WEBHOOK_SECRET, 'TELEGRAM_WEBHOOK_SECRET'),
|
||||
telegramWebhookPath: env.TELEGRAM_WEBHOOK_PATH ?? '/webhook/telegram',
|
||||
purchaseTopicIngestionEnabled
|
||||
purchaseTopicIngestionEnabled,
|
||||
parserModel: env.PARSER_MODEL?.trim() || 'gpt-4.1-mini'
|
||||
}
|
||||
|
||||
if (databaseUrl !== undefined) {
|
||||
@@ -81,6 +84,10 @@ export function getBotRuntimeConfig(env: NodeJS.ProcessEnv = process.env): BotRu
|
||||
if (telegramPurchaseTopicId !== undefined) {
|
||||
runtime.telegramPurchaseTopicId = telegramPurchaseTopicId
|
||||
}
|
||||
const openaiApiKey = parseOptionalValue(env.OPENAI_API_KEY)
|
||||
if (openaiApiKey !== undefined) {
|
||||
runtime.openaiApiKey = openaiApiKey
|
||||
}
|
||||
|
||||
return runtime
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import { webhookCallback } from 'grammy'
|
||||
|
||||
import { createTelegramBot } from './bot'
|
||||
import { getBotRuntimeConfig } from './config'
|
||||
import { createOpenAiParserFallback } from './openai-parser-fallback'
|
||||
import {
|
||||
createPurchaseMessageRepository,
|
||||
registerPurchaseTopicIngestion
|
||||
@@ -17,6 +18,7 @@ let closePurchaseRepository: (() => Promise<void>) | undefined
|
||||
if (runtime.purchaseTopicIngestionEnabled) {
|
||||
const purchaseRepositoryClient = createPurchaseMessageRepository(runtime.databaseUrl!)
|
||||
closePurchaseRepository = purchaseRepositoryClient.close
|
||||
const llmFallback = createOpenAiParserFallback(runtime.openaiApiKey, runtime.parserModel)
|
||||
|
||||
registerPurchaseTopicIngestion(
|
||||
bot,
|
||||
@@ -25,7 +27,12 @@ if (runtime.purchaseTopicIngestionEnabled) {
|
||||
householdChatId: runtime.telegramHouseholdChatId!,
|
||||
purchaseTopicId: runtime.telegramPurchaseTopicId!
|
||||
},
|
||||
purchaseRepositoryClient.repository
|
||||
purchaseRepositoryClient.repository,
|
||||
llmFallback
|
||||
? {
|
||||
llmFallback
|
||||
}
|
||||
: {}
|
||||
)
|
||||
} else {
|
||||
console.warn(
|
||||
|
||||
119
apps/bot/src/openai-parser-fallback.ts
Normal file
119
apps/bot/src/openai-parser-fallback.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import type { PurchaseParserLlmFallback } from '@household/application'
|
||||
|
||||
interface OpenAiStructuredResult {
|
||||
amountMinor: string
|
||||
currency: 'GEL' | 'USD'
|
||||
itemDescription: string
|
||||
confidence: number
|
||||
needsReview: boolean
|
||||
}
|
||||
|
||||
function asBigInt(value: string): bigint | null {
|
||||
if (!/^[0-9]+$/.test(value)) {
|
||||
return null
|
||||
}
|
||||
|
||||
const parsed = BigInt(value)
|
||||
return parsed > 0n ? parsed : null
|
||||
}
|
||||
|
||||
export function createOpenAiParserFallback(
|
||||
apiKey: string | undefined,
|
||||
model: string
|
||||
): PurchaseParserLlmFallback | undefined {
|
||||
if (!apiKey) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return async (rawText: string) => {
|
||||
const response = await fetch('https://api.openai.com/v1/responses', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
authorization: `Bearer ${apiKey}`,
|
||||
'content-type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'Extract a shared household purchase from text. Return only valid JSON with amountMinor, currency, itemDescription, confidence, needsReview.'
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: rawText
|
||||
}
|
||||
],
|
||||
text: {
|
||||
format: {
|
||||
type: 'json_schema',
|
||||
name: 'purchase_parse',
|
||||
schema: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
amountMinor: {
|
||||
type: 'string'
|
||||
},
|
||||
currency: {
|
||||
type: 'string',
|
||||
enum: ['GEL', 'USD']
|
||||
},
|
||||
itemDescription: {
|
||||
type: 'string'
|
||||
},
|
||||
confidence: {
|
||||
type: 'number',
|
||||
minimum: 0,
|
||||
maximum: 100
|
||||
},
|
||||
needsReview: {
|
||||
type: 'boolean'
|
||||
}
|
||||
},
|
||||
required: ['amountMinor', 'currency', 'itemDescription', 'confidence', 'needsReview']
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
return null
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as {
|
||||
output_text?: string
|
||||
}
|
||||
|
||||
if (!payload.output_text) {
|
||||
return null
|
||||
}
|
||||
|
||||
let parsedJson: OpenAiStructuredResult
|
||||
try {
|
||||
parsedJson = JSON.parse(payload.output_text) as OpenAiStructuredResult
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
const amountMinor = asBigInt(parsedJson.amountMinor)
|
||||
if (!amountMinor) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (parsedJson.itemDescription.trim().length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
amountMinor,
|
||||
currency: parsedJson.currency,
|
||||
itemDescription: parsedJson.itemDescription,
|
||||
confidence: Math.max(0, Math.min(100, Math.round(parsedJson.confidence))),
|
||||
parserMode: 'llm',
|
||||
needsReview: parsedJson.needsReview
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
import { parsePurchaseMessage, type PurchaseParserLlmFallback } from '@household/application'
|
||||
import { and, eq } from 'drizzle-orm'
|
||||
import type { Bot, Context } from 'grammy'
|
||||
|
||||
@@ -25,7 +26,10 @@ export interface PurchaseTopicRecord extends PurchaseTopicCandidate {
|
||||
}
|
||||
|
||||
export interface PurchaseMessageIngestionRepository {
|
||||
save(record: PurchaseTopicRecord): Promise<'created' | 'duplicate'>
|
||||
save(
|
||||
record: PurchaseTopicRecord,
|
||||
llmFallback?: PurchaseParserLlmFallback
|
||||
): Promise<'created' | 'duplicate'>
|
||||
}
|
||||
|
||||
export function extractPurchaseTopicCandidate(
|
||||
@@ -52,6 +56,10 @@ export function extractPurchaseTopicCandidate(
|
||||
}
|
||||
}
|
||||
|
||||
function needsReviewAsInt(value: boolean): number {
|
||||
return value ? 1 : 0
|
||||
}
|
||||
|
||||
export function createPurchaseMessageRepository(databaseUrl: string): {
|
||||
repository: PurchaseMessageIngestionRepository
|
||||
close: () => Promise<void>
|
||||
@@ -62,7 +70,7 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
|
||||
})
|
||||
|
||||
const repository: PurchaseMessageIngestionRepository = {
|
||||
async save(record) {
|
||||
async save(record, llmFallback) {
|
||||
const matchedMember = await db
|
||||
.select({ id: schema.members.id })
|
||||
.from(schema.members)
|
||||
@@ -75,6 +83,30 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
|
||||
.limit(1)
|
||||
|
||||
const senderMemberId = matchedMember[0]?.id ?? null
|
||||
let parserError: string | null = null
|
||||
|
||||
const parsed = await parsePurchaseMessage(
|
||||
{
|
||||
rawText: record.rawText
|
||||
},
|
||||
llmFallback
|
||||
? {
|
||||
llmFallback
|
||||
}
|
||||
: {}
|
||||
).catch((error) => {
|
||||
parserError = error instanceof Error ? error.message : 'Unknown parser error'
|
||||
return null
|
||||
})
|
||||
|
||||
const processingStatus =
|
||||
parserError !== null
|
||||
? 'parse_failed'
|
||||
: parsed === null
|
||||
? 'needs_review'
|
||||
: parsed.needsReview
|
||||
? 'needs_review'
|
||||
: 'parsed'
|
||||
|
||||
const inserted = await db
|
||||
.insert(schema.purchaseMessages)
|
||||
@@ -89,7 +121,14 @@ export function createPurchaseMessageRepository(databaseUrl: string): {
|
||||
telegramThreadId: record.threadId,
|
||||
telegramUpdateId: String(record.updateId),
|
||||
messageSentAt: record.messageSentAt,
|
||||
processingStatus: 'pending'
|
||||
parsedAmountMinor: parsed?.amountMinor,
|
||||
parsedCurrency: parsed?.currency,
|
||||
parsedItemDescription: parsed?.itemDescription,
|
||||
parserMode: parsed?.parserMode,
|
||||
parserConfidence: parsed?.confidence,
|
||||
needsReview: needsReviewAsInt(parsed?.needsReview ?? true),
|
||||
parserError,
|
||||
processingStatus
|
||||
})
|
||||
.onConflictDoNothing({
|
||||
target: [
|
||||
@@ -151,7 +190,10 @@ function toCandidateFromContext(ctx: Context): PurchaseTopicCandidate | null {
|
||||
export function registerPurchaseTopicIngestion(
|
||||
bot: Bot,
|
||||
config: PurchaseTopicIngestionConfig,
|
||||
repository: PurchaseMessageIngestionRepository
|
||||
repository: PurchaseMessageIngestionRepository,
|
||||
options: {
|
||||
llmFallback?: PurchaseParserLlmFallback
|
||||
} = {}
|
||||
): void {
|
||||
bot.on('message:text', async (ctx) => {
|
||||
const candidate = toCandidateFromContext(ctx)
|
||||
@@ -165,7 +207,7 @@ export function registerPurchaseTopicIngestion(
|
||||
}
|
||||
|
||||
try {
|
||||
const status = await repository.save(record)
|
||||
const status = await repository.save(record, options.llmFallback)
|
||||
|
||||
if (status === 'created') {
|
||||
console.log(
|
||||
|
||||
Reference in New Issue
Block a user