Fix Ollama thinking mode with /nothink chat message

Send /nothink as a separate message before the actual prompt, with a mock assistant response. This properly disables thinking mode for Qwen3/DeepSeek models. Removed the ineffective think:false API parameter. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-07-02 16:01:01 +02:00 · 2026-01-25 19:40:02 -05:00 · 2026-01-25 19:40:02 -05:00 · f1a055c3b9
commit f1a055c3b9
parent aad5a797b6
1 changed files with 23 additions and 7 deletions
--- a/backend/src/services/ai-extractor.ts
+++ b/backend/src/services/ai-extractor.ts
@ -286,13 +286,20 @@ async function extractWithOllama(
    {
      model: model,
      messages: [
        {
          role: 'user',
          content: '/nothink', // Disable thinking mode for Qwen3/DeepSeek
        },
        {
          role: 'assistant',
          content: 'Ok.',
        },
        {
          role: 'user',
          content: EXTRACTION_PROMPT + preparedHtml,
        },
      ],
      stream: false,
      think: false, // Disable thinking mode for Qwen3/DeepSeek models
      options: {
        num_ctx: 16384, // Increase context window for large HTML content
      },
@ -388,9 +395,12 @@ async function verifyWithOllama(
    `${baseUrl}/api/chat`,
    {
      model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
        { role: 'user', content: '/nothink' },
        { role: 'assistant', content: 'Ok.' },
        { role: 'user', content: prompt },
      ],
      stream: false,
      think: false, // Disable thinking mode for Qwen3/DeepSeek models
      options: {
        num_ctx: 16384, // Increase context window for large HTML content
      },
@ -484,9 +494,12 @@ async function verifyStockStatusWithOllama(
    `${baseUrl}/api/chat`,
    {
      model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
        { role: 'user', content: '/nothink' },
        { role: 'assistant', content: 'Ok.' },
        { role: 'user', content: prompt },
      ],
      stream: false,
      think: false, // Disable thinking mode for Qwen3/DeepSeek models
      options: {
        num_ctx: 16384, // Increase context window for large HTML content
      },
@ -943,9 +956,12 @@ async function arbitrateWithOllama(
    `${baseUrl}/api/chat`,
    {
      model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
        { role: 'user', content: '/nothink' },
        { role: 'assistant', content: 'Ok.' },
        { role: 'user', content: prompt },
      ],
      stream: false,
      think: false, // Disable thinking mode for Qwen3/DeepSeek models
      options: {
        num_ctx: 16384, // Increase context window for large HTML content
      },