mirror of
https://github.com/whekin/household-bot.git
synced 2026-03-31 17:44:03 +00:00
feat(WHE-23): add hybrid purchase parser with persisted parse metadata
This commit is contained in:
@@ -1 +1,9 @@
|
||||
export { calculateMonthlySettlement } from './settlement-engine'
|
||||
export {
|
||||
parsePurchaseMessage,
|
||||
type ParsedPurchaseResult,
|
||||
type ParsePurchaseInput,
|
||||
type ParsePurchaseOptions,
|
||||
type PurchaseParserLlmFallback,
|
||||
type PurchaseParserMode
|
||||
} from './purchase-parser'
|
||||
|
||||
63
packages/application/src/purchase-parser.test.ts
Normal file
63
packages/application/src/purchase-parser.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import { parsePurchaseMessage } from './purchase-parser'
|
||||
|
||||
describe('parsePurchaseMessage', () => {
|
||||
test('parses explicit currency with rules', async () => {
|
||||
const result = await parsePurchaseMessage({
|
||||
rawText: 'Купил туалетную бумагу 30 gel'
|
||||
})
|
||||
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.amountMinor).toBe(3000n)
|
||||
expect(result?.currency).toBe('GEL')
|
||||
expect(result?.parserMode).toBe('rules')
|
||||
expect(result?.needsReview).toBe(false)
|
||||
})
|
||||
|
||||
test('defaults to GEL when currency is omitted and marks review', async () => {
|
||||
const result = await parsePurchaseMessage({
|
||||
rawText: 'Bought soap 12.5'
|
||||
})
|
||||
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.amountMinor).toBe(1250n)
|
||||
expect(result?.currency).toBe('GEL')
|
||||
expect(result?.needsReview).toBe(true)
|
||||
})
|
||||
|
||||
test('uses llm fallback for ambiguous message with multiple amounts', async () => {
|
||||
const result = await parsePurchaseMessage(
|
||||
{
|
||||
rawText: 'Купил пасту 10 и мыло 5'
|
||||
},
|
||||
{
|
||||
llmFallback: async () => ({
|
||||
amountMinor: 1500n,
|
||||
currency: 'GEL',
|
||||
itemDescription: 'паста и мыло',
|
||||
confidence: 67,
|
||||
parserMode: 'llm',
|
||||
needsReview: true
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
expect(result).not.toBeNull()
|
||||
expect(result?.parserMode).toBe('llm')
|
||||
expect(result?.amountMinor).toBe(1500n)
|
||||
})
|
||||
|
||||
test('returns null when both rules and llm fail', async () => {
|
||||
const result = await parsePurchaseMessage(
|
||||
{
|
||||
rawText: 'без суммы вообще'
|
||||
},
|
||||
{
|
||||
llmFallback: async () => null
|
||||
}
|
||||
)
|
||||
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
})
|
||||
132
packages/application/src/purchase-parser.ts
Normal file
132
packages/application/src/purchase-parser.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
export type PurchaseParserMode = 'rules' | 'llm'
|
||||
|
||||
export interface ParsedPurchaseResult {
|
||||
amountMinor: bigint
|
||||
currency: 'GEL' | 'USD'
|
||||
itemDescription: string
|
||||
confidence: number
|
||||
parserMode: PurchaseParserMode
|
||||
needsReview: boolean
|
||||
}
|
||||
|
||||
export type PurchaseParserLlmFallback = (rawText: string) => Promise<ParsedPurchaseResult | null>
|
||||
|
||||
export interface ParsePurchaseInput {
|
||||
rawText: string
|
||||
}
|
||||
|
||||
export interface ParsePurchaseOptions {
|
||||
llmFallback?: PurchaseParserLlmFallback
|
||||
}
|
||||
|
||||
const CURRENCY_PATTERN = '(?:₾|gel|lari|лари|usd|\\$|доллар(?:а|ов)?)'
|
||||
const AMOUNT_WITH_OPTIONAL_CURRENCY = new RegExp(
|
||||
`(?<amount>\\d+(?:[.,]\\d{1,2})?)\\s*(?<currency>${CURRENCY_PATTERN})?`,
|
||||
'giu'
|
||||
)
|
||||
|
||||
function normalizeCurrency(raw: string | undefined): 'GEL' | 'USD' | null {
|
||||
if (!raw) {
|
||||
return null
|
||||
}
|
||||
|
||||
const value = raw.trim().toLowerCase()
|
||||
if (value === '₾' || value === 'gel' || value === 'lari' || value === 'лари') {
|
||||
return 'GEL'
|
||||
}
|
||||
|
||||
if (value === 'usd' || value === '$' || value.startsWith('доллар')) {
|
||||
return 'USD'
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function toMinorUnits(rawAmount: string): bigint {
|
||||
const normalized = rawAmount.replace(',', '.')
|
||||
const [wholePart, fractionalPart = ''] = normalized.split('.')
|
||||
const cents = fractionalPart.padEnd(2, '0').slice(0, 2)
|
||||
|
||||
return BigInt(`${wholePart}${cents}`)
|
||||
}
|
||||
|
||||
function normalizeDescription(rawText: string, matchedFragment: string): string {
|
||||
const cleaned = rawText.replace(matchedFragment, ' ').replace(/\s+/g, ' ').trim()
|
||||
|
||||
if (cleaned.length === 0) {
|
||||
return 'shared purchase'
|
||||
}
|
||||
|
||||
return cleaned
|
||||
}
|
||||
|
||||
function parseWithRules(rawText: string): ParsedPurchaseResult | null {
|
||||
const matches = Array.from(rawText.matchAll(AMOUNT_WITH_OPTIONAL_CURRENCY))
|
||||
|
||||
if (matches.length !== 1) {
|
||||
return null
|
||||
}
|
||||
|
||||
const [match] = matches
|
||||
if (!match?.groups?.amount) {
|
||||
return null
|
||||
}
|
||||
|
||||
const currency = normalizeCurrency(match.groups.currency)
|
||||
const amountMinor = toMinorUnits(match.groups.amount)
|
||||
|
||||
const explicitCurrency = currency !== null
|
||||
const resolvedCurrency = currency ?? 'GEL'
|
||||
const confidence = explicitCurrency ? 92 : 78
|
||||
|
||||
return {
|
||||
amountMinor,
|
||||
currency: resolvedCurrency,
|
||||
itemDescription: normalizeDescription(rawText, match[0] ?? ''),
|
||||
confidence,
|
||||
parserMode: 'rules',
|
||||
needsReview: !explicitCurrency
|
||||
}
|
||||
}
|
||||
|
||||
function validateLlmResult(result: ParsedPurchaseResult | null): ParsedPurchaseResult | null {
|
||||
if (!result) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (result.amountMinor <= 0n) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (result.confidence < 0 || result.confidence > 100) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (result.itemDescription.trim().length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
export async function parsePurchaseMessage(
|
||||
input: ParsePurchaseInput,
|
||||
options: ParsePurchaseOptions = {}
|
||||
): Promise<ParsedPurchaseResult | null> {
|
||||
const rawText = input.rawText.trim()
|
||||
if (rawText.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const rulesResult = parseWithRules(rawText)
|
||||
if (rulesResult) {
|
||||
return rulesResult
|
||||
}
|
||||
|
||||
if (!options.llmFallback) {
|
||||
return null
|
||||
}
|
||||
|
||||
const llmResult = await options.llmFallback(rawText)
|
||||
return validateLlmResult(llmResult)
|
||||
}
|
||||
7
packages/db/drizzle/0003_mature_roulette.sql
Normal file
7
packages/db/drizzle/0003_mature_roulette.sql
Normal file
@@ -0,0 +1,7 @@
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parsed_amount_minor" bigint;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parsed_currency" text;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parsed_item_description" text;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parser_mode" text;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parser_confidence" integer;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "needs_review" integer DEFAULT 1 NOT NULL;--> statement-breakpoint
|
||||
ALTER TABLE "purchase_messages" ADD COLUMN "parser_error" text;
|
||||
1393
packages/db/drizzle/meta/0003_snapshot.json
Normal file
1393
packages/db/drizzle/meta/0003_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -22,6 +22,13 @@
|
||||
"when": 1772670548136,
|
||||
"tag": "0002_tough_sandman",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 3,
|
||||
"version": "7",
|
||||
"when": 1772671128084,
|
||||
"tag": "0003_mature_roulette",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -198,6 +198,13 @@ export const purchaseMessages = pgTable(
|
||||
telegramThreadId: text('telegram_thread_id').notNull(),
|
||||
telegramUpdateId: text('telegram_update_id').notNull(),
|
||||
messageSentAt: timestamp('message_sent_at', { withTimezone: true }),
|
||||
parsedAmountMinor: bigint('parsed_amount_minor', { mode: 'bigint' }),
|
||||
parsedCurrency: text('parsed_currency'),
|
||||
parsedItemDescription: text('parsed_item_description'),
|
||||
parserMode: text('parser_mode'),
|
||||
parserConfidence: integer('parser_confidence'),
|
||||
needsReview: integer('needs_review').default(1).notNull(),
|
||||
parserError: text('parser_error'),
|
||||
processingStatus: text('processing_status').default('pending').notNull(),
|
||||
ingestedAt: timestamp('ingested_at', { withTimezone: true }).defaultNow().notNull()
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user