perf(bot): trim dm assistant response latency

This commit is contained in:
2026-03-11 03:51:40 +04:00
parent 4dd469a0fe
commit b5630c0241
2 changed files with 69 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
import { describe, expect, test } from 'bun:test'
import { createOpenAiChatAssistant } from './openai-chat-assistant'
interface CapturedAssistantRequest {
model: string
max_output_tokens: number
input: Array<{ role: string; content: string }>
}
function successfulResponse(payload: unknown): Response {
return new Response(JSON.stringify(payload), {
status: 200,
headers: {
'content-type': 'application/json'
}
})
}
describe('createOpenAiChatAssistant', () => {
test('caps output tokens and asks for concise replies', async () => {
const assistant = createOpenAiChatAssistant('test-key', 'gpt-5-mini', 20_000)
expect(assistant).toBeDefined()
const originalFetch = globalThis.fetch
let capturedBody: CapturedAssistantRequest | null = null
globalThis.fetch = (async (_input: Request | string | URL, init?: RequestInit) => {
capturedBody = init?.body ? (JSON.parse(String(init.body)) as CapturedAssistantRequest) : null
return successfulResponse({
output_text: 'Hi.',
usage: {
input_tokens: 100,
output_tokens: 1,
total_tokens: 101
}
})
}) as unknown as typeof fetch
try {
const reply = await assistant!.respond({
locale: 'en',
householdContext: 'Household: Kojori House',
memorySummary: null,
recentTurns: [],
userMessage: 'Hello'
})
expect(reply.text).toBe('Hi.')
expect(capturedBody).not.toBeNull()
expect(capturedBody!.max_output_tokens).toBe(220)
expect(capturedBody!.model).toBe('gpt-5-mini')
expect(capturedBody!.input[0]).toMatchObject({
role: 'system',
content: expect.stringContaining('Default to one to three short sentences.')
})
} finally {
globalThis.fetch = originalFetch
}
})
})

View File

@@ -1,5 +1,7 @@
import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
const ASSISTANT_MAX_OUTPUT_TOKENS = 220
export interface AssistantUsage {
inputTokens: number
outputTokens: number
@@ -31,6 +33,10 @@ const ASSISTANT_SYSTEM_PROMPT = [
'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
'Prefer concise, practical answers.',
'Default to one to three short sentences.',
'For simple greetings or small talk, reply in a single short sentence unless the user asks for more.',
'Do not restate the full household context unless the user explicitly asks for details.',
'Avoid bullet lists unless the user asked for a list or several distinct items.',
'Reply in the user language inferred from the latest user message and locale context.'
].join(' ')
@@ -58,6 +64,7 @@ export function createOpenAiChatAssistant(
},
body: JSON.stringify({
model,
max_output_tokens: ASSISTANT_MAX_OUTPUT_TOKENS,
input: [
{
role: 'system',