Feature/streaming llm phase 1 (#566)

* Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests
2026-04-26 08:56:21 +02:00 · 2025-11-26 09:59:10 +00:00 · 2025-11-26 09:59:10 +00:00 · 310a2deb06
commit 310a2deb06
parent 943a9d83b0
44 changed files with 2684 additions and 937 deletions
--- a/tests/integration/test_text_completion_integration.py
+++ b/tests/integration/test_text_completion_integration.py
@ -282,10 +282,11 @@ class TestTextCompletionIntegration:
        # Assert
        # Verify OpenAI API call parameters
        call_args = mock_openai_client.chat.completions.create.call_args
-        assert call_args.kwargs['response_format'] == {"type": "text"}
-        assert call_args.kwargs['top_p'] == 1
-        assert call_args.kwargs['frequency_penalty'] == 0
-        assert call_args.kwargs['presence_penalty'] == 0
+        # Note: response_format, top_p, frequency_penalty, and presence_penalty
+        # were removed in #561 as unnecessary parameters
+        assert 'model' in call_args.kwargs
+        assert 'temperature' in call_args.kwargs
+        assert 'max_tokens' in call_args.kwargs

        # Verify result structure
        assert hasattr(result, 'text')
@ -362,9 +363,8 @@ class TestTextCompletionIntegration:
        assert call_args.kwargs['model'] == "gpt-4"
        assert call_args.kwargs['temperature'] == 0.8
        assert call_args.kwargs['max_tokens'] == 2048
-        assert call_args.kwargs['top_p'] == 1
-        assert call_args.kwargs['frequency_penalty'] == 0
-        assert call_args.kwargs['presence_penalty'] == 0
+        # Note: top_p, frequency_penalty, and presence_penalty
+        # were removed in #561 as unnecessary parameters

    @pytest.mark.asyncio
    @pytest.mark.slow