From f1a055c3b9fa37a4a573b1074df3ccd8ca05ceca Mon Sep 17 00:00:00 2001
From: clucraft <clucraft@users.noreply.github.com>
Date: Sun, 25 Jan 2026 19:40:02 -0500
Subject: [PATCH] Fix Ollama thinking mode with /nothink chat message

Send /nothink as a separate message before the actual prompt,
with a mock assistant response. This properly disables thinking
mode for Qwen3/DeepSeek models.

Removed the ineffective think:false API parameter.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/services/ai-extractor.ts | 30 +++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts
index 989c29b..3245b75 100644
--- a/backend/src/services/ai-extractor.ts
+++ b/backend/src/services/ai-extractor.ts
@@ -286,13 +286,20 @@ async function extractWithOllama(
     {
       model: model,
       messages: [
+        {
+          role: 'user',
+          content: '/nothink', // Disable thinking mode for Qwen3/DeepSeek
+        },
+        {
+          role: 'assistant',
+          content: 'Ok.',
+        },
         {
           role: 'user',
           content: EXTRACTION_PROMPT + preparedHtml,
         },
       ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -388,9 +395,12 @@ async function verifyWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -484,9 +494,12 @@ async function verifyStockStatusWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -943,9 +956,12 @@ async function arbitrateWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },