fix: simplify dashscope variant and route API calls through variants (#1012)

Replace the client.post()/httpx bypass with standard SDK extra_body,
confirmed working against DashScope. Make DashScope the base variant
with Qwen as a subclass alias. Route all API calls through variant
create_completion/create_completion_stream methods.
This commit is contained in:
cybermaggedon 2026-07-02 09:12:55 +01:00 committed by GitHub
parent 6887076ce0
commit f18d48dc39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 53 additions and 58 deletions

View file

@ -96,20 +96,20 @@ class Processor(LlmService):
api_kwargs = self._build_kwargs(model_name, effective_temperature)
resp = self.openai.chat.completions.create(
model=model_name,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
],
**api_kwargs,
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
]
resp = self.variant.create_completion(
self.openai, model_name, messages, **api_kwargs,
)
inputtokens = resp.usage.prompt_tokens
@ -176,28 +176,24 @@ class Processor(LlmService):
try:
api_kwargs = self._build_kwargs(model_name, effective_temperature)
response = self.openai.chat.completions.create(
model=model_name,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
],
stream=True,
stream_options={"include_usage": True},
**api_kwargs,
)
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
]
total_input_tokens = 0
total_output_tokens = 0
for chunk in response:
async for chunk in self.variant.create_completion_stream(
self.openai, model_name, messages, **api_kwargs,
):
if chunk.choices and chunk.choices[0].delta.content:
yield LlmChunk(
text=chunk.choices[0].delta.content,

View file

@ -62,6 +62,20 @@ class Variant:
"""Extract thinking content from a streaming delta."""
return getattr(delta, "reasoning_content", None)
def create_completion(self, client, model, messages, **kwargs):
"""Call the completions API. Override for non-standard SDKs."""
return client.chat.completions.create(
model=model, messages=messages, **kwargs,
)
async def create_completion_stream(self, client, model, messages, **kwargs):
"""Call the streaming completions API. Override for non-standard SDKs."""
for chunk in client.chat.completions.create(
model=model, messages=messages, stream=True,
stream_options={"include_usage": True}, **kwargs,
):
yield chunk
class OpenAIVariant(Variant):
"""Standard OpenAI API (GPT-4o, o1, o3, etc.)."""
@ -96,30 +110,8 @@ class DeepSeekVariant(Variant):
return {}
class QwenVariant(Variant):
"""Qwen / Alibaba Cloud API."""
name = "qwen"
token_param = "max_completion_tokens"
temperature_with_thinking = True
def completion_kwargs(self, max_output, temperature, thinking):
enabled = thinking != "off"
kwargs = {
self.token_param: max_output,
"temperature": temperature,
"extra_body": {
"enable_thinking": enabled,
},
}
return kwargs
def thinking_kwargs(self, effort):
return {}
class DashScopeVariant(Variant):
"""Alibaba Cloud DashScope API (Qwen models via DashScope)."""
"""Alibaba Cloud DashScope API (Qwen models)."""
name = "dashscope"
token_param = "max_completion_tokens"
@ -127,17 +119,24 @@ class DashScopeVariant(Variant):
def completion_kwargs(self, max_output, temperature, thinking):
enabled = thinking != "off"
kwargs = {
return {
self.token_param: max_output,
"temperature": temperature,
"enable_thinking": enabled,
"extra_body": {
"enable_thinking": enabled,
},
}
return kwargs
def thinking_kwargs(self, effort):
return {}
class QwenVariant(DashScopeVariant):
"""Qwen — alias for DashScope."""
name = "qwen"
class MistralVariant(Variant):
"""Mistral API (Mistral Large, etc.)."""