fix: simplify dashscope variant and route API calls through variants (#1012)

Replace the client.post()/httpx bypass with standard SDK extra_body, confirmed working against DashScope. Make DashScope the base variant with Qwen as a subclass alias. Route all API calls through variant create_completion/create_completion_stream methods.
2026-07-03 15:01:00 +02:00 · 2026-07-02 09:12:55 +01:00 · 2026-07-02 09:12:55 +01:00 · f18d48dc39
commit f18d48dc39
parent 6887076ce0
2 changed files with 53 additions and 58 deletions
--- a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
@ -96,20 +96,20 @@ class Processor(LlmService):
            api_kwargs = self._build_kwargs(model_name, effective_temperature)
-            resp = self.openai.chat.completions.create(
+            messages = [
-                model=model_name,
+                {
-                messages=[
+                    "role": "user",
-                    {
+                    "content": [
-                        "role": "user",
+                        {
-                        "content": [
+                            "type": "text",
-                            {
+                            "text": prompt
-                                "type": "text",
+                        }
-                                "text": prompt
+                    ]
-                            }
+                }
-                        ]
+            ]
-                    }
+
-                ],
+            resp = self.variant.create_completion(
-                **api_kwargs,
+                self.openai, model_name, messages, **api_kwargs,
            )
            inputtokens = resp.usage.prompt_tokens
@ -176,28 +176,24 @@ class Processor(LlmService):
        try:
            api_kwargs = self._build_kwargs(model_name, effective_temperature)
-            response = self.openai.chat.completions.create(
+            messages = [
-                model=model_name,
+                {
-                messages=[
+                    "role": "user",
-                    {
+                    "content": [
-                        "role": "user",
+                        {
-                        "content": [
+                            "type": "text",
-                            {
+                            "text": prompt
-                                "type": "text",
+                        }
-                                "text": prompt
+                    ]
-                            }
+                }
-                        ]
+            ]
                    }
                ],
                stream=True,
                stream_options={"include_usage": True},
                **api_kwargs,
            )
            total_input_tokens = 0
            total_output_tokens = 0
-            for chunk in response:
+            async for chunk in self.variant.create_completion_stream(
                self.openai, model_name, messages, **api_kwargs,
            ):
                if chunk.choices and chunk.choices[0].delta.content:
                    yield LlmChunk(
                        text=chunk.choices[0].delta.content,
--- a/trustgraph-flow/trustgraph/model/text_completion/openai/variants.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/openai/variants.py
@ -62,6 +62,20 @@ class Variant:
        """Extract thinking content from a streaming delta."""
        return getattr(delta, "reasoning_content", None)
    def create_completion(self, client, model, messages, **kwargs):
        """Call the completions API. Override for non-standard SDKs."""
        return client.chat.completions.create(
            model=model, messages=messages, **kwargs,
        )
    async def create_completion_stream(self, client, model, messages, **kwargs):
        """Call the streaming completions API. Override for non-standard SDKs."""
        for chunk in client.chat.completions.create(
            model=model, messages=messages, stream=True,
            stream_options={"include_usage": True}, **kwargs,
        ):
            yield chunk
 class OpenAIVariant(Variant):
    """Standard OpenAI API (GPT-4o, o1, o3, etc.)."""
@ -96,30 +110,8 @@ class DeepSeekVariant(Variant):
        return {}
 class QwenVariant(Variant):
    """Qwen / Alibaba Cloud API."""
    name = "qwen"
    token_param = "max_completion_tokens"
    temperature_with_thinking = True
    def completion_kwargs(self, max_output, temperature, thinking):
        enabled = thinking != "off"
        kwargs = {
            self.token_param: max_output,
            "temperature": temperature,
            "extra_body": {
                "enable_thinking": enabled,
            },
        }
        return kwargs
    def thinking_kwargs(self, effort):
        return {}
 class DashScopeVariant(Variant):
-    """Alibaba Cloud DashScope API (Qwen models via DashScope)."""
+    """Alibaba Cloud DashScope API (Qwen models)."""
    name = "dashscope"
    token_param = "max_completion_tokens"
@ -127,17 +119,24 @@ class DashScopeVariant(Variant):
    def completion_kwargs(self, max_output, temperature, thinking):
        enabled = thinking != "off"
-        kwargs = {
+        return {
            self.token_param: max_output,
            "temperature": temperature,
-            "enable_thinking": enabled,
+            "extra_body": {
                "enable_thinking": enabled,
            },
        }
        return kwargs
    def thinking_kwargs(self, effort):
        return {}
 class QwenVariant(DashScopeVariant):
    """Qwen — alias for DashScope."""
    name = "qwen"
 class MistralVariant(Variant):
    """Mistral API (Mistral Large, etc.)."""