mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 15:01:00 +02:00
fix: simplify dashscope variant and route API calls through variants (#1012)
Replace the client.post()/httpx bypass with standard SDK extra_body, confirmed working against DashScope. Make DashScope the base variant with Qwen as a subclass alias. Route all API calls through variant create_completion/create_completion_stream methods.
This commit is contained in:
parent
6887076ce0
commit
f18d48dc39
2 changed files with 53 additions and 58 deletions
|
|
@ -96,20 +96,20 @@ class Processor(LlmService):
|
||||||
|
|
||||||
api_kwargs = self._build_kwargs(model_name, effective_temperature)
|
api_kwargs = self._build_kwargs(model_name, effective_temperature)
|
||||||
|
|
||||||
resp = self.openai.chat.completions.create(
|
messages = [
|
||||||
model=model_name,
|
{
|
||||||
messages=[
|
"role": "user",
|
||||||
{
|
"content": [
|
||||||
"role": "user",
|
{
|
||||||
"content": [
|
"type": "text",
|
||||||
{
|
"text": prompt
|
||||||
"type": "text",
|
}
|
||||||
"text": prompt
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
|
||||||
],
|
resp = self.variant.create_completion(
|
||||||
**api_kwargs,
|
self.openai, model_name, messages, **api_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
inputtokens = resp.usage.prompt_tokens
|
inputtokens = resp.usage.prompt_tokens
|
||||||
|
|
@ -176,28 +176,24 @@ class Processor(LlmService):
|
||||||
try:
|
try:
|
||||||
api_kwargs = self._build_kwargs(model_name, effective_temperature)
|
api_kwargs = self._build_kwargs(model_name, effective_temperature)
|
||||||
|
|
||||||
response = self.openai.chat.completions.create(
|
messages = [
|
||||||
model=model_name,
|
{
|
||||||
messages=[
|
"role": "user",
|
||||||
{
|
"content": [
|
||||||
"role": "user",
|
{
|
||||||
"content": [
|
"type": "text",
|
||||||
{
|
"text": prompt
|
||||||
"type": "text",
|
}
|
||||||
"text": prompt
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
|
||||||
],
|
|
||||||
stream=True,
|
|
||||||
stream_options={"include_usage": True},
|
|
||||||
**api_kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
total_input_tokens = 0
|
total_input_tokens = 0
|
||||||
total_output_tokens = 0
|
total_output_tokens = 0
|
||||||
|
|
||||||
for chunk in response:
|
async for chunk in self.variant.create_completion_stream(
|
||||||
|
self.openai, model_name, messages, **api_kwargs,
|
||||||
|
):
|
||||||
if chunk.choices and chunk.choices[0].delta.content:
|
if chunk.choices and chunk.choices[0].delta.content:
|
||||||
yield LlmChunk(
|
yield LlmChunk(
|
||||||
text=chunk.choices[0].delta.content,
|
text=chunk.choices[0].delta.content,
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,20 @@ class Variant:
|
||||||
"""Extract thinking content from a streaming delta."""
|
"""Extract thinking content from a streaming delta."""
|
||||||
return getattr(delta, "reasoning_content", None)
|
return getattr(delta, "reasoning_content", None)
|
||||||
|
|
||||||
|
def create_completion(self, client, model, messages, **kwargs):
|
||||||
|
"""Call the completions API. Override for non-standard SDKs."""
|
||||||
|
return client.chat.completions.create(
|
||||||
|
model=model, messages=messages, **kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def create_completion_stream(self, client, model, messages, **kwargs):
|
||||||
|
"""Call the streaming completions API. Override for non-standard SDKs."""
|
||||||
|
for chunk in client.chat.completions.create(
|
||||||
|
model=model, messages=messages, stream=True,
|
||||||
|
stream_options={"include_usage": True}, **kwargs,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
class OpenAIVariant(Variant):
|
class OpenAIVariant(Variant):
|
||||||
"""Standard OpenAI API (GPT-4o, o1, o3, etc.)."""
|
"""Standard OpenAI API (GPT-4o, o1, o3, etc.)."""
|
||||||
|
|
@ -96,30 +110,8 @@ class DeepSeekVariant(Variant):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class QwenVariant(Variant):
|
|
||||||
"""Qwen / Alibaba Cloud API."""
|
|
||||||
|
|
||||||
name = "qwen"
|
|
||||||
token_param = "max_completion_tokens"
|
|
||||||
temperature_with_thinking = True
|
|
||||||
|
|
||||||
def completion_kwargs(self, max_output, temperature, thinking):
|
|
||||||
enabled = thinking != "off"
|
|
||||||
kwargs = {
|
|
||||||
self.token_param: max_output,
|
|
||||||
"temperature": temperature,
|
|
||||||
"extra_body": {
|
|
||||||
"enable_thinking": enabled,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return kwargs
|
|
||||||
|
|
||||||
def thinking_kwargs(self, effort):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
class DashScopeVariant(Variant):
|
class DashScopeVariant(Variant):
|
||||||
"""Alibaba Cloud DashScope API (Qwen models via DashScope)."""
|
"""Alibaba Cloud DashScope API (Qwen models)."""
|
||||||
|
|
||||||
name = "dashscope"
|
name = "dashscope"
|
||||||
token_param = "max_completion_tokens"
|
token_param = "max_completion_tokens"
|
||||||
|
|
@ -127,17 +119,24 @@ class DashScopeVariant(Variant):
|
||||||
|
|
||||||
def completion_kwargs(self, max_output, temperature, thinking):
|
def completion_kwargs(self, max_output, temperature, thinking):
|
||||||
enabled = thinking != "off"
|
enabled = thinking != "off"
|
||||||
kwargs = {
|
return {
|
||||||
self.token_param: max_output,
|
self.token_param: max_output,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"enable_thinking": enabled,
|
"extra_body": {
|
||||||
|
"enable_thinking": enabled,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
return kwargs
|
|
||||||
|
|
||||||
def thinking_kwargs(self, effort):
|
def thinking_kwargs(self, effort):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
class QwenVariant(DashScopeVariant):
|
||||||
|
"""Qwen — alias for DashScope."""
|
||||||
|
|
||||||
|
name = "qwen"
|
||||||
|
|
||||||
|
|
||||||
class MistralVariant(Variant):
|
class MistralVariant(Variant):
|
||||||
"""Mistral API (Mistral Large, etc.)."""
|
"""Mistral API (Mistral Large, etc.)."""
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue