mirror of
https://github.com/whekin/household-bot.git
synced 2026-03-31 14:04:04 +00:00
perf(bot): trim dm assistant response latency
This commit is contained in:
62
apps/bot/src/openai-chat-assistant.test.ts
Normal file
62
apps/bot/src/openai-chat-assistant.test.ts
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import { describe, expect, test } from 'bun:test'
|
||||||
|
|
||||||
|
import { createOpenAiChatAssistant } from './openai-chat-assistant'
|
||||||
|
|
||||||
|
interface CapturedAssistantRequest {
|
||||||
|
model: string
|
||||||
|
max_output_tokens: number
|
||||||
|
input: Array<{ role: string; content: string }>
|
||||||
|
}
|
||||||
|
|
||||||
|
function successfulResponse(payload: unknown): Response {
|
||||||
|
return new Response(JSON.stringify(payload), {
|
||||||
|
status: 200,
|
||||||
|
headers: {
|
||||||
|
'content-type': 'application/json'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('createOpenAiChatAssistant', () => {
|
||||||
|
test('caps output tokens and asks for concise replies', async () => {
|
||||||
|
const assistant = createOpenAiChatAssistant('test-key', 'gpt-5-mini', 20_000)
|
||||||
|
expect(assistant).toBeDefined()
|
||||||
|
|
||||||
|
const originalFetch = globalThis.fetch
|
||||||
|
let capturedBody: CapturedAssistantRequest | null = null
|
||||||
|
|
||||||
|
globalThis.fetch = (async (_input: Request | string | URL, init?: RequestInit) => {
|
||||||
|
capturedBody = init?.body ? (JSON.parse(String(init.body)) as CapturedAssistantRequest) : null
|
||||||
|
|
||||||
|
return successfulResponse({
|
||||||
|
output_text: 'Hi.',
|
||||||
|
usage: {
|
||||||
|
input_tokens: 100,
|
||||||
|
output_tokens: 1,
|
||||||
|
total_tokens: 101
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}) as unknown as typeof fetch
|
||||||
|
|
||||||
|
try {
|
||||||
|
const reply = await assistant!.respond({
|
||||||
|
locale: 'en',
|
||||||
|
householdContext: 'Household: Kojori House',
|
||||||
|
memorySummary: null,
|
||||||
|
recentTurns: [],
|
||||||
|
userMessage: 'Hello'
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(reply.text).toBe('Hi.')
|
||||||
|
expect(capturedBody).not.toBeNull()
|
||||||
|
expect(capturedBody!.max_output_tokens).toBe(220)
|
||||||
|
expect(capturedBody!.model).toBe('gpt-5-mini')
|
||||||
|
expect(capturedBody!.input[0]).toMatchObject({
|
||||||
|
role: 'system',
|
||||||
|
content: expect.stringContaining('Default to one to three short sentences.')
|
||||||
|
})
|
||||||
|
} finally {
|
||||||
|
globalThis.fetch = originalFetch
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
|
import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
|
||||||
|
|
||||||
|
const ASSISTANT_MAX_OUTPUT_TOKENS = 220
|
||||||
|
|
||||||
export interface AssistantUsage {
|
export interface AssistantUsage {
|
||||||
inputTokens: number
|
inputTokens: number
|
||||||
outputTokens: number
|
outputTokens: number
|
||||||
@@ -31,6 +33,10 @@ const ASSISTANT_SYSTEM_PROMPT = [
|
|||||||
'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
|
'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
|
||||||
'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
|
'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
|
||||||
'Prefer concise, practical answers.',
|
'Prefer concise, practical answers.',
|
||||||
|
'Default to one to three short sentences.',
|
||||||
|
'For simple greetings or small talk, reply in a single short sentence unless the user asks for more.',
|
||||||
|
'Do not restate the full household context unless the user explicitly asks for details.',
|
||||||
|
'Avoid bullet lists unless the user asked for a list or several distinct items.',
|
||||||
'Reply in the user language inferred from the latest user message and locale context.'
|
'Reply in the user language inferred from the latest user message and locale context.'
|
||||||
].join(' ')
|
].join(' ')
|
||||||
|
|
||||||
@@ -58,6 +64,7 @@ export function createOpenAiChatAssistant(
|
|||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model,
|
model,
|
||||||
|
max_output_tokens: ASSISTANT_MAX_OUTPUT_TOKENS,
|
||||||
input: [
|
input: [
|
||||||
{
|
{
|
||||||
role: 'system',
|
role: 'system',
|
||||||
|
|||||||
Reference in New Issue
Block a user