mirror of
https://github.com/whekin/household-bot.git
synced 2026-03-31 13:54:02 +00:00
perf(bot): trim dm assistant response latency
This commit is contained in:
62
apps/bot/src/openai-chat-assistant.test.ts
Normal file
62
apps/bot/src/openai-chat-assistant.test.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { describe, expect, test } from 'bun:test'
|
||||
|
||||
import { createOpenAiChatAssistant } from './openai-chat-assistant'
|
||||
|
||||
interface CapturedAssistantRequest {
|
||||
model: string
|
||||
max_output_tokens: number
|
||||
input: Array<{ role: string; content: string }>
|
||||
}
|
||||
|
||||
function successfulResponse(payload: unknown): Response {
|
||||
return new Response(JSON.stringify(payload), {
|
||||
status: 200,
|
||||
headers: {
|
||||
'content-type': 'application/json'
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
describe('createOpenAiChatAssistant', () => {
|
||||
test('caps output tokens and asks for concise replies', async () => {
|
||||
const assistant = createOpenAiChatAssistant('test-key', 'gpt-5-mini', 20_000)
|
||||
expect(assistant).toBeDefined()
|
||||
|
||||
const originalFetch = globalThis.fetch
|
||||
let capturedBody: CapturedAssistantRequest | null = null
|
||||
|
||||
globalThis.fetch = (async (_input: Request | string | URL, init?: RequestInit) => {
|
||||
capturedBody = init?.body ? (JSON.parse(String(init.body)) as CapturedAssistantRequest) : null
|
||||
|
||||
return successfulResponse({
|
||||
output_text: 'Hi.',
|
||||
usage: {
|
||||
input_tokens: 100,
|
||||
output_tokens: 1,
|
||||
total_tokens: 101
|
||||
}
|
||||
})
|
||||
}) as unknown as typeof fetch
|
||||
|
||||
try {
|
||||
const reply = await assistant!.respond({
|
||||
locale: 'en',
|
||||
householdContext: 'Household: Kojori House',
|
||||
memorySummary: null,
|
||||
recentTurns: [],
|
||||
userMessage: 'Hello'
|
||||
})
|
||||
|
||||
expect(reply.text).toBe('Hi.')
|
||||
expect(capturedBody).not.toBeNull()
|
||||
expect(capturedBody!.max_output_tokens).toBe(220)
|
||||
expect(capturedBody!.model).toBe('gpt-5-mini')
|
||||
expect(capturedBody!.input[0]).toMatchObject({
|
||||
role: 'system',
|
||||
content: expect.stringContaining('Default to one to three short sentences.')
|
||||
})
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -1,5 +1,7 @@
|
||||
import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
|
||||
|
||||
const ASSISTANT_MAX_OUTPUT_TOKENS = 220
|
||||
|
||||
export interface AssistantUsage {
|
||||
inputTokens: number
|
||||
outputTokens: number
|
||||
@@ -31,6 +33,10 @@ const ASSISTANT_SYSTEM_PROMPT = [
|
||||
'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
|
||||
'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
|
||||
'Prefer concise, practical answers.',
|
||||
'Default to one to three short sentences.',
|
||||
'For simple greetings or small talk, reply in a single short sentence unless the user asks for more.',
|
||||
'Do not restate the full household context unless the user explicitly asks for details.',
|
||||
'Avoid bullet lists unless the user asked for a list or several distinct items.',
|
||||
'Reply in the user language inferred from the latest user message and locale context.'
|
||||
].join(' ')
|
||||
|
||||
@@ -58,6 +64,7 @@ export function createOpenAiChatAssistant(
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
max_output_tokens: ASSISTANT_MAX_OUTPUT_TOKENS,
|
||||
input: [
|
||||
{
|
||||
role: 'system',
|
||||
|
||||
Reference in New Issue
Block a user