trustgraph/trustgraph-base/trustgraph/clients/llm_client.py


import _pulsar

from .. schema import TextCompletionRequest, TextCompletionResponse
from .. schema import text_completion_request_queue
from .. schema import text_completion_response_queue
from . base import BaseClient
from .. exceptions import LlmError

# Ugly
ERROR=_pulsar.LoggerLevel.Error
WARN=_pulsar.LoggerLevel.Warn
INFO=_pulsar.LoggerLevel.Info
DEBUG=_pulsar.LoggerLevel.Debug

class LlmClient(BaseClient):

    def __init__(
            self, log_level=ERROR,
            subscriber=None,
            input_queue=None,
            output_queue=None,
            pulsar_host="pulsar://pulsar:6650",
            pulsar_api_key=None,
    ):

        if input_queue is None: input_queue = text_completion_request_queue
        if output_queue is None: output_queue = text_completion_response_queue

        super(LlmClient, self).__init__(
            log_level=log_level,
            subscriber=subscriber,
            input_queue=input_queue,
            output_queue=output_queue,
            pulsar_host=pulsar_host,
            pulsar_api_key=pulsar_api_key,
            input_schema=TextCompletionRequest,
            output_schema=TextCompletionResponse,
        )

    def request(self, system, prompt, timeout=300, streaming=False):
        """
        Non-streaming request (backward compatible).
        Returns complete response string.
        """
        if streaming:
            raise ValueError("Use request_stream() for streaming requests")
        return self.call(
            system=system, prompt=prompt, streaming=False, timeout=timeout
        ).response

    def request_stream(self, system, prompt, timeout=300):
        """
        Streaming request generator.
        Yields response chunks as they arrive.
        Usage:
            for chunk in client.request_stream(system, prompt):
                print(chunk.response, end='', flush=True)
        """
        import time
        import uuid

        id = str(uuid.uuid4())
        request = TextCompletionRequest(
            system=system, prompt=prompt, streaming=True
        )

        end_time = time.time() + timeout
        self.producer.send(request, properties={"id": id})

        # Collect responses until end_of_stream
        while time.time() < end_time:
            try:
                msg = self.consumer.receive(timeout_millis=2500)
            except Exception:
                continue

            mid = msg.properties()["id"]

            if mid == id:
                value = msg.value()

                # Handle errors
                if value.error:
                    self.consumer.acknowledge(msg)
                    if value.error.type == "llm-error":
                        raise LlmError(value.error.message)
                    else:
                        raise RuntimeError(
                            f"{value.error.type}: {value.error.message}"
                        )

                self.consumer.acknowledge(msg)
                yield value

                # Check if this is the final chunk
                if getattr(value, 'end_of_stream', True):
                    break
            else:
                # Ignore messages with wrong ID
                self.consumer.acknowledge(msg)

        if time.time() >= end_time:
            raise TimeoutError("Timed out waiting for response")
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
			`import _pulsar`

- Change flawed _client timeout logic which was causing major lags - Moved clients to trustgraph.clients to tidy the parent directory - Version bump 2024-08-20 17:54:11 +01:00			`from .. schema import TextCompletionRequest, TextCompletionResponse`
			`from .. schema import text_completion_request_queue`
			`from .. schema import text_completion_response_queue`
Improve request/response handling (#18) * Request/response error handling with common client * Fixup error handling change 2024-08-22 17:02:18 +01:00			`from . base import BaseClient`
Feature/streaming llm phase 1 (#566) * Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests 2025-11-26 09:59:10 +00:00			`from .. exceptions import LlmError`
Refactor names (#4) - Downsize embeddings model to mini-lm in docker-compose files - Rename for structure - Default queues defined in schema file - Standardize naming: graph embeddings, chunk embeddings, triples 2024-07-23 21:34:03 +01:00
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`# Ugly`
			`ERROR=_pulsar.LoggerLevel.Error`
			`WARN=_pulsar.LoggerLevel.Warn`
			`INFO=_pulsar.LoggerLevel.Info`
			`DEBUG=_pulsar.LoggerLevel.Debug`

Improve request/response handling (#18) * Request/response error handling with common client * Fixup error handling change 2024-08-22 17:02:18 +01:00			`class LlmClient(BaseClient):`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
			`def __init__(`
Have configurable input/output queues on call from kg-definitions 2024-08-05 22:05:28 +01:00			`self, log_level=ERROR,`
			`subscriber=None,`
			`input_queue=None,`
			`output_queue=None,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`pulsar_host="pulsar://pulsar:6650",`
Feature/pulsar api key support (#308) * Add pulsar API token check * Added missing api_key references --------- Co-authored-by: Tyler O <4535788+toliver38@users.noreply.github.com> 2025-02-15 11:22:48 +00:00			`pulsar_api_key=None,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`):`

Improve request/response handling (#18) * Request/response error handling with common client * Fixup error handling change 2024-08-22 17:02:18 +01:00			`if input_queue is None: input_queue = text_completion_request_queue`
			`if output_queue is None: output_queue = text_completion_response_queue`

			`super(LlmClient, self).__init__(`
			`log_level=log_level,`
			`subscriber=subscriber,`
			`input_queue=input_queue,`
			`output_queue=output_queue,`
			`pulsar_host=pulsar_host,`
Feature/pulsar api key support (#308) * Add pulsar API token check * Added missing api_key references --------- Co-authored-by: Tyler O <4535788+toliver38@users.noreply.github.com> 2025-02-15 11:22:48 +00:00			`pulsar_api_key=pulsar_api_key,`
Improve request/response handling (#18) * Request/response error handling with common client * Fixup error handling change 2024-08-22 17:02:18 +01:00			`input_schema=TextCompletionRequest,`
			`output_schema=TextCompletionResponse,`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00			`)`

Feature/streaming llm phase 1 (#566) * Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests 2025-11-26 09:59:10 +00:00			`def request(self, system, prompt, timeout=300, streaming=False):`
			`"""`
			`Non-streaming request (backward compatible).`
			`Returns complete response string.`
			`"""`
			`if streaming:`
			`raise ValueError("Use request_stream() for streaming requests")`
Implement system in text completion API (#137) * Add system prompt to LLM invocation * Added system parameter to LLMs * Added to Bedrock and VertexAI 2024-11-05 22:46:17 +00:00			`return self.call(`
Feature/streaming llm phase 1 (#566) * Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests 2025-11-26 09:59:10 +00:00			`system=system, prompt=prompt, streaming=False, timeout=timeout`
Implement system in text completion API (#137) * Add system prompt to LLM invocation * Added system parameter to LLMs * Added to Bedrock and VertexAI 2024-11-05 22:46:17 +00:00			`).response`
Trustgraph initial code drop 2024-07-10 23:20:06 +01:00
Feature/streaming llm phase 1 (#566) * Tidy up duplicate tech specs in doc directory * Streaming LLM text-completion service tech spec. * text-completion and prompt interfaces * streaming change applied to all LLMs, so far tested with VertexAI * Skip Pinecone unit tests, upstream module issue is affecting things, tests are passing again * Added agent streaming, not working and has broken tests 2025-11-26 09:59:10 +00:00			`def request_stream(self, system, prompt, timeout=300):`
			`"""`
			`Streaming request generator.`
			`Yields response chunks as they arrive.`
			`Usage:`
			`for chunk in client.request_stream(system, prompt):`
			`print(chunk.response, end='', flush=True)`
			`"""`
			`import time`
			`import uuid`

			`id = str(uuid.uuid4())`
			`request = TextCompletionRequest(`
			`system=system, prompt=prompt, streaming=True`
			`)`

			`end_time = time.time() + timeout`
			`self.producer.send(request, properties={"id": id})`

			`# Collect responses until end_of_stream`
			`while time.time() < end_time:`
			`try:`
			`msg = self.consumer.receive(timeout_millis=2500)`
			`except Exception:`
			`continue`

			`mid = msg.properties()["id"]`

			`if mid == id:`
			`value = msg.value()`

			`# Handle errors`
			`if value.error:`
			`self.consumer.acknowledge(msg)`
			`if value.error.type == "llm-error":`
			`raise LlmError(value.error.message)`
			`else:`
			`raise RuntimeError(`
			`f"{value.error.type}: {value.error.message}"`
			`)`

			`self.consumer.acknowledge(msg)`
			`yield value`

			`# Check if this is the final chunk`
			`if getattr(value, 'end_of_stream', True):`
			`break`
			`else:`
			`# Ignore messages with wrong ID`
			`self.consumer.acknowledge(msg)`

			`if time.time() >= end_time:`
			`raise TimeoutError("Timed out waiting for response")`