feat(WHE-23): add hybrid purchase parser with persisted parse metadata

This commit is contained in:
2026-03-05 04:43:57 +04:00
parent 3b1b6468db
commit ebb6ce4ce6
14 changed files with 1881 additions and 7 deletions

View File

@@ -1 +1,9 @@
export { calculateMonthlySettlement } from './settlement-engine'
export {
parsePurchaseMessage,
type ParsedPurchaseResult,
type ParsePurchaseInput,
type ParsePurchaseOptions,
type PurchaseParserLlmFallback,
type PurchaseParserMode
} from './purchase-parser'

View File

@@ -0,0 +1,63 @@
import { describe, expect, test } from 'bun:test'
import { parsePurchaseMessage } from './purchase-parser'
describe('parsePurchaseMessage', () => {
test('parses explicit currency with rules', async () => {
const result = await parsePurchaseMessage({
rawText: 'Купил туалетную бумагу 30 gel'
})
expect(result).not.toBeNull()
expect(result?.amountMinor).toBe(3000n)
expect(result?.currency).toBe('GEL')
expect(result?.parserMode).toBe('rules')
expect(result?.needsReview).toBe(false)
})
test('defaults to GEL when currency is omitted and marks review', async () => {
const result = await parsePurchaseMessage({
rawText: 'Bought soap 12.5'
})
expect(result).not.toBeNull()
expect(result?.amountMinor).toBe(1250n)
expect(result?.currency).toBe('GEL')
expect(result?.needsReview).toBe(true)
})
test('uses llm fallback for ambiguous message with multiple amounts', async () => {
const result = await parsePurchaseMessage(
{
rawText: 'Купил пасту 10 и мыло 5'
},
{
llmFallback: async () => ({
amountMinor: 1500n,
currency: 'GEL',
itemDescription: 'паста и мыло',
confidence: 67,
parserMode: 'llm',
needsReview: true
})
}
)
expect(result).not.toBeNull()
expect(result?.parserMode).toBe('llm')
expect(result?.amountMinor).toBe(1500n)
})
test('returns null when both rules and llm fail', async () => {
const result = await parsePurchaseMessage(
{
rawText: 'без суммы вообще'
},
{
llmFallback: async () => null
}
)
expect(result).toBeNull()
})
})

View File

@@ -0,0 +1,132 @@
export type PurchaseParserMode = 'rules' | 'llm'
export interface ParsedPurchaseResult {
amountMinor: bigint
currency: 'GEL' | 'USD'
itemDescription: string
confidence: number
parserMode: PurchaseParserMode
needsReview: boolean
}
export type PurchaseParserLlmFallback = (rawText: string) => Promise<ParsedPurchaseResult | null>
export interface ParsePurchaseInput {
rawText: string
}
export interface ParsePurchaseOptions {
llmFallback?: PurchaseParserLlmFallback
}
const CURRENCY_PATTERN = '(?:₾|gel|lari|лари|usd|\\$|доллар(?:а|ов)?)'
const AMOUNT_WITH_OPTIONAL_CURRENCY = new RegExp(
`(?<amount>\\d+(?:[.,]\\d{1,2})?)\\s*(?<currency>${CURRENCY_PATTERN})?`,
'giu'
)
function normalizeCurrency(raw: string | undefined): 'GEL' | 'USD' | null {
if (!raw) {
return null
}
const value = raw.trim().toLowerCase()
if (value === '₾' || value === 'gel' || value === 'lari' || value === 'лари') {
return 'GEL'
}
if (value === 'usd' || value === '$' || value.startsWith('доллар')) {
return 'USD'
}
return null
}
function toMinorUnits(rawAmount: string): bigint {
const normalized = rawAmount.replace(',', '.')
const [wholePart, fractionalPart = ''] = normalized.split('.')
const cents = fractionalPart.padEnd(2, '0').slice(0, 2)
return BigInt(`${wholePart}${cents}`)
}
function normalizeDescription(rawText: string, matchedFragment: string): string {
const cleaned = rawText.replace(matchedFragment, ' ').replace(/\s+/g, ' ').trim()
if (cleaned.length === 0) {
return 'shared purchase'
}
return cleaned
}
function parseWithRules(rawText: string): ParsedPurchaseResult | null {
const matches = Array.from(rawText.matchAll(AMOUNT_WITH_OPTIONAL_CURRENCY))
if (matches.length !== 1) {
return null
}
const [match] = matches
if (!match?.groups?.amount) {
return null
}
const currency = normalizeCurrency(match.groups.currency)
const amountMinor = toMinorUnits(match.groups.amount)
const explicitCurrency = currency !== null
const resolvedCurrency = currency ?? 'GEL'
const confidence = explicitCurrency ? 92 : 78
return {
amountMinor,
currency: resolvedCurrency,
itemDescription: normalizeDescription(rawText, match[0] ?? ''),
confidence,
parserMode: 'rules',
needsReview: !explicitCurrency
}
}
function validateLlmResult(result: ParsedPurchaseResult | null): ParsedPurchaseResult | null {
if (!result) {
return null
}
if (result.amountMinor <= 0n) {
return null
}
if (result.confidence < 0 || result.confidence > 100) {
return null
}
if (result.itemDescription.trim().length === 0) {
return null
}
return result
}
export async function parsePurchaseMessage(
input: ParsePurchaseInput,
options: ParsePurchaseOptions = {}
): Promise<ParsedPurchaseResult | null> {
const rawText = input.rawText.trim()
if (rawText.length === 0) {
return null
}
const rulesResult = parseWithRules(rawText)
if (rulesResult) {
return rulesResult
}
if (!options.llmFallback) {
return null
}
const llmResult = await options.llmFallback(rawText)
return validateLlmResult(llmResult)
}