mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 09:26:22 +02:00
Feature/streaming llm phase 1 (#566)
* Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests
This commit is contained in:
parent
943a9d83b0
commit
310a2deb06
44 changed files with 2684 additions and 937 deletions
|
|
@ -9,7 +9,7 @@ import os
|
|||
import logging
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
from .... base import LlmService, LlmResult, LlmChunk
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -106,6 +106,65 @@ class Processor(LlmService):
|
|||
logger.error(f"Claude LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
def supports_streaming(self):
|
||||
"""Claude/Anthropic supports streaming"""
|
||||
return True
|
||||
|
||||
async def generate_content_stream(self, system, prompt, model=None, temperature=None):
|
||||
"""Stream content generation from Claude"""
|
||||
model_name = model or self.default_model
|
||||
effective_temperature = temperature if temperature is not None else self.temperature
|
||||
|
||||
logger.debug(f"Using model (streaming): {model_name}")
|
||||
logger.debug(f"Using temperature: {effective_temperature}")
|
||||
|
||||
try:
|
||||
with self.claude.messages.stream(
|
||||
model=model_name,
|
||||
max_tokens=self.max_output,
|
||||
temperature=effective_temperature,
|
||||
system=system,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
) as stream:
|
||||
for text in stream.text_stream:
|
||||
yield LlmChunk(
|
||||
text=text,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=model_name,
|
||||
is_final=False
|
||||
)
|
||||
|
||||
# Get final message for token counts
|
||||
final_message = stream.get_final_message()
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=final_message.usage.input_tokens,
|
||||
out_token=final_message.usage.output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
||||
logger.debug("Streaming complete")
|
||||
|
||||
except anthropic.RateLimitError:
|
||||
logger.warning("Rate limit exceeded during streaming")
|
||||
raise TooManyRequests()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Claude streaming exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue