perf(bot): trim dm assistant response latency

2026-03-31 14:04:04 +00:00 · 2026-03-11 03:51:40 +04:00
parent 4dd469a0fe
commit b5630c0241
2 changed files with 69 additions and 0 deletions
--- a/apps/bot/src/openai-chat-assistant.test.ts
+++ b/apps/bot/src/openai-chat-assistant.test.ts
@@ -0,0 +1,62 @@
 import { describe, expect, test } from 'bun:test'
 import { createOpenAiChatAssistant } from './openai-chat-assistant'
 interface CapturedAssistantRequest {
  model: string
  max_output_tokens: number
  input: Array<{ role: string; content: string }>
 }
 function successfulResponse(payload: unknown): Response {
  return new Response(JSON.stringify(payload), {
    status: 200,
    headers: {
      'content-type': 'application/json'
    }
  })
 }
 describe('createOpenAiChatAssistant', () => {
  test('caps output tokens and asks for concise replies', async () => {
    const assistant = createOpenAiChatAssistant('test-key', 'gpt-5-mini', 20_000)
    expect(assistant).toBeDefined()
    const originalFetch = globalThis.fetch
    let capturedBody: CapturedAssistantRequest | null = null
    globalThis.fetch = (async (_input: Request | string | URL, init?: RequestInit) => {
      capturedBody = init?.body ? (JSON.parse(String(init.body)) as CapturedAssistantRequest) : null
      return successfulResponse({
        output_text: 'Hi.',
        usage: {
          input_tokens: 100,
          output_tokens: 1,
          total_tokens: 101
        }
      })
    }) as unknown as typeof fetch
    try {
      const reply = await assistant!.respond({
        locale: 'en',
        householdContext: 'Household: Kojori House',
        memorySummary: null,
        recentTurns: [],
        userMessage: 'Hello'
      })
      expect(reply.text).toBe('Hi.')
      expect(capturedBody).not.toBeNull()
      expect(capturedBody!.max_output_tokens).toBe(220)
      expect(capturedBody!.model).toBe('gpt-5-mini')
      expect(capturedBody!.input[0]).toMatchObject({
        role: 'system',
        content: expect.stringContaining('Default to one to three short sentences.')
      })
    } finally {
      globalThis.fetch = originalFetch
    }
  })
 })
--- a/apps/bot/src/openai-chat-assistant.ts
+++ b/apps/bot/src/openai-chat-assistant.ts
@@ -1,5 +1,7 @@
 import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
 const ASSISTANT_MAX_OUTPUT_TOKENS = 220
 export interface AssistantUsage {
  inputTokens: number
  outputTokens: number
@@ -31,6 +33,10 @@ const ASSISTANT_SYSTEM_PROMPT = [
  'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
  'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
  'Prefer concise, practical answers.',
  'Default to one to three short sentences.',
  'For simple greetings or small talk, reply in a single short sentence unless the user asks for more.',
  'Do not restate the full household context unless the user explicitly asks for details.',
  'Avoid bullet lists unless the user asked for a list or several distinct items.',
  'Reply in the user language inferred from the latest user message and locale context.'
 ].join(' ')
@@ -58,6 +64,7 @@ export function createOpenAiChatAssistant(
          },
          body: JSON.stringify({
            model,
            max_output_tokens: ASSISTANT_MAX_OUTPUT_TOKENS,
            input: [
              {
                role: 'system',