Fix OpenAI compatibility issues for newer models and Azure config (#727)

Use max_completion_tokens for OpenAI and Azure OpenAI providers: The OpenAI API deprecated max_tokens in favor of max_completion_tokens for chat completions. Newer models (gpt-4o, o1, o3) reject the old parameter with a 400 error. AZURE_API_VERSION env var now overrides the default API version: (falls back to 2024-12-01-preview). Update tests to test for expected structures
2026-06-09 06:45:13 +02:00 · 2026-03-28 11:19:45 +00:00 · 2026-03-28 11:19:45 +00:00 · 20204d87c3
commit 20204d87c3
parent a634520509
6 changed files with 14 additions and 14 deletions
--- a/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
@ -20,7 +20,7 @@ default_ident = "text-completion"

 default_temperature = 0.0
 default_max_output = 4192
-default_api = "2024-12-01-preview"
+default_api = os.getenv("AZURE_API_VERSION", "2024-12-01-preview")
 default_endpoint = os.getenv("AZURE_ENDPOINT", None)
 default_token = os.getenv("AZURE_TOKEN", None)
 default_model = os.getenv("AZURE_MODEL", None)
@ -90,7 +90,7 @@ class Processor(LlmService):
                    }
                ],
                temperature=effective_temperature,
-                max_tokens=self.max_output,
+                max_completion_tokens=self.max_output,
                top_p=1,
            )

@ -159,7 +159,7 @@ class Processor(LlmService):
                    }
                ],
                temperature=effective_temperature,
-                max_tokens=self.max_output,
+                max_completion_tokens=self.max_output,
                top_p=1,
                stream=True,
                stream_options={"include_usage": True}
--- a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
@ -86,7 +86,7 @@ class Processor(LlmService):
                    }
                ],
                temperature=effective_temperature,
-                max_tokens=self.max_output,
+                max_completion_tokens=self.max_output,
            )
            
            inputtokens = resp.usage.prompt_tokens
@ -152,7 +152,7 @@ class Processor(LlmService):
                    }
                ],
                temperature=effective_temperature,
-                max_tokens=self.max_output,
+                max_completion_tokens=self.max_output,
                stream=True,
                stream_options={"include_usage": True}
            )