Feature/streaming llm phase 1 (#566)

* Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests
2026-05-09 15:22:38 +02:00 · 2025-11-26 09:59:10 +00:00 · 2025-11-26 09:59:10 +00:00 · 310a2deb06
commit 310a2deb06
parent 943a9d83b0
44 changed files with 2684 additions and 937 deletions
--- a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
@ -12,7 +12,7 @@ import logging
 logger = logging.getLogger(__name__)

 from .... exceptions import TooManyRequests
-from .... base import LlmService, LlmResult
+from .... base import LlmService, LlmResult, LlmChunk

 default_ident = "text-completion"

@ -120,6 +120,67 @@ class Processor(LlmService):
            logger.error(f"Mistral LLM exception ({type(e).__name__}): {e}", exc_info=True)
            raise e

+    def supports_streaming(self):
+        """Mistral supports streaming"""
+        return True
+
+    async def generate_content_stream(self, system, prompt, model=None, temperature=None):
+        """Stream content generation from Mistral"""
+        model_name = model or self.default_model
+        effective_temperature = temperature if temperature is not None else self.temperature
+
+        logger.debug(f"Using model (streaming): {model_name}")
+        logger.debug(f"Using temperature: {effective_temperature}")
+
+        prompt = system + "\n\n" + prompt
+
+        try:
+            stream = self.mistral.chat.stream(
+                model=model_name,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": prompt
+                            }
+                        ]
+                    }
+                ],
+                temperature=effective_temperature,
+                max_tokens=self.max_output,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0,
+                response_format={"type": "text"}
+            )
+
+            for chunk in stream:
+                if chunk.data.choices and chunk.data.choices[0].delta.content:
+                    yield LlmChunk(
+                        text=chunk.data.choices[0].delta.content,
+                        in_token=None,
+                        out_token=None,
+                        model=model_name,
+                        is_final=False
+                    )
+
+            # Send final chunk
+            yield LlmChunk(
+                text="",
+                in_token=None,
+                out_token=None,
+                model=model_name,
+                is_final=True
+            )
+
+            logger.debug("Streaming complete")
+
+        except Exception as e:
+            logger.error(f"Mistral streaming exception ({type(e).__name__}): {e}", exc_info=True)
+            raise e
+
    @staticmethod
    def add_args(parser):