From b5630c0241d316ab0b1e183b5639b7e12448d5b5 Mon Sep 17 00:00:00 2001
From: whekin <stanislavkalishin@gmail.com>
Date: Wed, 11 Mar 2026 03:51:40 +0400
Subject: [PATCH] perf(bot): trim dm assistant response latency

---
 apps/bot/src/openai-chat-assistant.test.ts | 62 ++++++++++++++++++++++
 apps/bot/src/openai-chat-assistant.ts      |  7 +++
 2 files changed, 69 insertions(+)
 create mode 100644 apps/bot/src/openai-chat-assistant.test.ts

diff --git a/apps/bot/src/openai-chat-assistant.test.ts b/apps/bot/src/openai-chat-assistant.test.ts
new file mode 100644
index 0000000..8f0e824
--- /dev/null
+++ b/apps/bot/src/openai-chat-assistant.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, test } from 'bun:test'
+
+import { createOpenAiChatAssistant } from './openai-chat-assistant'
+
+interface CapturedAssistantRequest {
+  model: string
+  max_output_tokens: number
+  input: Array<{ role: string; content: string }>
+}
+
+function successfulResponse(payload: unknown): Response {
+  return new Response(JSON.stringify(payload), {
+    status: 200,
+    headers: {
+      'content-type': 'application/json'
+    }
+  })
+}
+
+describe('createOpenAiChatAssistant', () => {
+  test('caps output tokens and asks for concise replies', async () => {
+    const assistant = createOpenAiChatAssistant('test-key', 'gpt-5-mini', 20_000)
+    expect(assistant).toBeDefined()
+
+    const originalFetch = globalThis.fetch
+    let capturedBody: CapturedAssistantRequest | null = null
+
+    globalThis.fetch = (async (_input: Request | string | URL, init?: RequestInit) => {
+      capturedBody = init?.body ? (JSON.parse(String(init.body)) as CapturedAssistantRequest) : null
+
+      return successfulResponse({
+        output_text: 'Hi.',
+        usage: {
+          input_tokens: 100,
+          output_tokens: 1,
+          total_tokens: 101
+        }
+      })
+    }) as unknown as typeof fetch
+
+    try {
+      const reply = await assistant!.respond({
+        locale: 'en',
+        householdContext: 'Household: Kojori House',
+        memorySummary: null,
+        recentTurns: [],
+        userMessage: 'Hello'
+      })
+
+      expect(reply.text).toBe('Hi.')
+      expect(capturedBody).not.toBeNull()
+      expect(capturedBody!.max_output_tokens).toBe(220)
+      expect(capturedBody!.model).toBe('gpt-5-mini')
+      expect(capturedBody!.input[0]).toMatchObject({
+        role: 'system',
+        content: expect.stringContaining('Default to one to three short sentences.')
+      })
+    } finally {
+      globalThis.fetch = originalFetch
+    }
+  })
+})
diff --git a/apps/bot/src/openai-chat-assistant.ts b/apps/bot/src/openai-chat-assistant.ts
index 3b17d1a..a72dc19 100644
--- a/apps/bot/src/openai-chat-assistant.ts
+++ b/apps/bot/src/openai-chat-assistant.ts
@@ -1,5 +1,7 @@
 import { extractOpenAiResponseText, type OpenAiResponsePayload } from './openai-responses'
 
+const ASSISTANT_MAX_OUTPUT_TOKENS = 220
+
 export interface AssistantUsage {
   inputTokens: number
   outputTokens: number
@@ -31,6 +33,10 @@ const ASSISTANT_SYSTEM_PROMPT = [
   'If the user asks you to mutate household state, do not claim the action is complete unless the system explicitly says it was confirmed and saved.',
   'For unsupported writes, explain the limitation briefly and suggest the explicit command or confirmation flow.',
   'Prefer concise, practical answers.',
+  'Default to one to three short sentences.',
+  'For simple greetings or small talk, reply in a single short sentence unless the user asks for more.',
+  'Do not restate the full household context unless the user explicitly asks for details.',
+  'Avoid bullet lists unless the user asked for a list or several distinct items.',
   'Reply in the user language inferred from the latest user message and locale context.'
 ].join(' ')
 
@@ -58,6 +64,7 @@ export function createOpenAiChatAssistant(
           },
           body: JSON.stringify({
             model,
+            max_output_tokens: ASSISTANT_MAX_OUTPUT_TOKENS,
             input: [
               {
                 role: 'system',