release/v1.4 -> master (#548)

2026-04-27 01:16:22 +02:00 · 2025-10-06 17:54:26 +01:00 · 2025-10-06 17:54:26 +01:00 · 2bd68ed7f4
commit 2bd68ed7f4
parent 3ec2cd54f9
94 changed files with 8571 additions and 1740 deletions
--- a/trustgraph-base/pyproject.toml
+++ b/trustgraph-base/pyproject.toml
@ -12,6 +12,7 @@ requires-python = ">=3.8"
 dependencies = [
    "pulsar-client",
    "prometheus-client",
+    "requests",
 ]
 classifiers = [
    "Programming Language :: Python :: 3",
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -87,7 +87,7 @@ class Flow:

        return json.loads(self.request(request = input)["flow"])

-    def start(self, class_name, id, description):
+    def start(self, class_name, id, description, parameters=None):

        # The input consists of system and prompt strings
        input = {
@ -97,6 +97,9 @@ class Flow:
            "description": description,
        }

+        if parameters:
+            input["parameters"] = parameters
+
        self.request(request = input)

    def stop(self, id):
--- a/trustgraph-base/trustgraph/base/init.py
+++ b/trustgraph-base/trustgraph/base/init.py
@ -8,11 +8,12 @@ from . subscriber import Subscriber
 from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
 from . flow_processor import FlowProcessor
 from . consumer_spec import ConsumerSpec
-from . setting_spec import SettingSpec
+from . parameter_spec import ParameterSpec
 from . producer_spec import ProducerSpec
 from . subscriber_spec import SubscriberSpec
 from . request_response_spec import RequestResponseSpec
 from . llm_service import LlmService, LlmResult
+from . chunking_service import ChunkingService
 from . embeddings_service import EmbeddingsService
 from . embeddings_client import EmbeddingsClientSpec
 from . text_completion_client import TextCompletionClientSpec
--- a/trustgraph-base/trustgraph/base/chunking_service.py
+++ b/trustgraph-base/trustgraph/base/chunking_service.py
@ -0,0 +1,62 @@
+"""
+Base chunking service that provides parameter specification functionality
+for chunk-size and chunk-overlap parameters
+"""
+
+import logging
+from .flow_processor import FlowProcessor
+from .parameter_spec import ParameterSpec
+
+# Module logger
+logger = logging.getLogger(__name__)
+
+class ChunkingService(FlowProcessor):
+    """Base service for chunking processors with parameter specification support"""
+
+    def __init__(self, **params):
+
+        # Call parent constructor
+        super(ChunkingService, self).__init__(**params)
+
+        # Register parameter specifications for chunk-size and chunk-overlap
+        self.register_specification(
+            ParameterSpec(name="chunk-size")
+        )
+
+        self.register_specification(
+            ParameterSpec(name="chunk-overlap")
+        )
+
+        logger.debug("ChunkingService initialized with parameter specifications")
+
+    async def chunk_document(self, msg, consumer, flow, default_chunk_size, default_chunk_overlap):
+        """
+        Extract chunk parameters from flow and return effective values
+
+        Args:
+            msg: The message containing the document to chunk
+            consumer: The consumer spec
+            flow: The flow context
+            default_chunk_size: Default chunk size from processor config
+            default_chunk_overlap: Default chunk overlap from processor config
+
+        Returns:
+            tuple: (chunk_size, chunk_overlap) - effective values to use
+        """
+        # Extract parameters from flow (flow-configurable parameters)
+        chunk_size = flow("chunk-size")
+        chunk_overlap = flow("chunk-overlap")
+
+        # Use provided values or fall back to defaults
+        effective_chunk_size = chunk_size if chunk_size is not None else default_chunk_size
+        effective_chunk_overlap = chunk_overlap if chunk_overlap is not None else default_chunk_overlap
+
+        logger.debug(f"Using chunk-size: {effective_chunk_size}")
+        logger.debug(f"Using chunk-overlap: {effective_chunk_overlap}")
+
+        return effective_chunk_size, effective_chunk_overlap
+
+    @staticmethod
+    def add_args(parser):
+        """Add chunking service arguments to parser"""
+        FlowProcessor.add_args(parser)
--- a/trustgraph-base/trustgraph/base/flow.py
+++ b/trustgraph-base/trustgraph/base/flow.py
@ -12,7 +12,7 @@ class Flow:
        # Consumers and publishers.  Is this a bit untidy?
        self.consumer = {}

-        self.setting = {}
+        self.parameter = {}

        for spec in processor.specifications:
            spec.add(self, processor, defn)
@ -28,5 +28,5 @@ class Flow:
    def __call__(self, key):
        if key in self.producer: return self.producer[key]
        if key in self.consumer: return self.consumer[key]
-        if key in self.setting: return self.setting[key].value
+        if key in self.parameter: return self.parameter[key].value
        return None
--- a/trustgraph-base/trustgraph/base/flow_processor.py
+++ b/trustgraph-base/trustgraph/base/flow_processor.py
@ -35,7 +35,7 @@ class FlowProcessor(AsyncProcessor):

        # These can be overriden by a derived class:

-        # Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
+        # Array of specifications: ConsumerSpec, ProducerSpec, ParameterSpec
        self.specifications = []

        logger.info("Service initialised.")
--- a/trustgraph-base/trustgraph/base/llm_service.py
+++ b/trustgraph-base/trustgraph/base/llm_service.py
@ -5,11 +5,11 @@ LLM text completion base class

 import time
 import logging
-from prometheus_client import Histogram
+from prometheus_client import Histogram, Info

 from .. schema import TextCompletionRequest, TextCompletionResponse, Error
 from .. exceptions import TooManyRequests
-from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
+from .. base import FlowProcessor, ConsumerSpec, ProducerSpec, ParameterSpec

 # Module logger
 logger = logging.getLogger(__name__)
@ -32,7 +32,7 @@ class LlmService(FlowProcessor):

    def __init__(self, **params):

-        id = params.get("id")
+        id = params.get("id", default_ident)
        concurrency = params.get("concurrency", 1)

        super(LlmService, self).__init__(**params | {
@ -56,6 +56,18 @@ class LlmService(FlowProcessor):
            )
        )

+        self.register_specification(
+            ParameterSpec(
+                name = "model",
+            )
+        )
+
+        self.register_specification(
+            ParameterSpec(
+                name = "temperature",
+            )
+        )
+
        if not hasattr(__class__, "text_completion_metric"):
            __class__.text_completion_metric = Histogram(
                'text_completion_duration',
@ -70,6 +82,13 @@ class LlmService(FlowProcessor):
                ]
            )

+        if not hasattr(__class__, "text_completion_model_metric"):
+            __class__.text_completion_model_metric = Info(
+                'text_completion_model',
+                'Text completion model',
+                ["processor", "flow"]
+            )
+
    async def on_request(self, msg, consumer, flow):

        try:
@ -85,10 +104,21 @@ class LlmService(FlowProcessor):
                    flow=f"{flow.name}-{consumer.name}",
            ).time():

+                model = flow("model")
+                temperature = flow("temperature")
+
                response = await self.generate_content(
-                    request.system, request.prompt
+                    request.system, request.prompt, model, temperature
                )

+            __class__.text_completion_model_metric.labels(
+                processor = self.id,
+                flow = flow.name
+            ).info({
+                "model": str(model) if model is not None else "",
+                "temperature": str(temperature) if temperature is not None else "",
+            })
+
            await flow("response").send(
                TextCompletionResponse(
                    error=None,
--- a/trustgraph-base/trustgraph/base/parameter_spec.py
+++ b/trustgraph-base/trustgraph/base/parameter_spec.py
@ -1,7 +1,7 @@

 from . spec import Spec

-class Setting:
+class Parameter:
    def __init__(self, value):
        self.value = value
    async def start():
@ -9,11 +9,13 @@ class Setting:
    async def stop():
        pass
        
-class SettingSpec(Spec):
+class ParameterSpec(Spec):
    def __init__(self, name):
        self.name = name

    def add(self, flow, processor, definition):

-        flow.config[self.name] = Setting(definition[self.name])
+        value = definition.get(self.name, None)
+
+        flow.parameter[self.name] = Parameter(value)

--- a/trustgraph-base/trustgraph/base/request_response_spec.py
+++ b/trustgraph-base/trustgraph/base/request_response_spec.py
@ -49,8 +49,6 @@ class RequestResponse(Subscriber):

        id = str(uuid.uuid4())

-        logger.debug(f"Sending request {id}...")
-
        q = await self.subscribe(id)

        try:
@ -75,8 +73,6 @@ class RequestResponse(Subscriber):
                    timeout=timeout
                )

-                logger.debug("Received response")
-
                if recipient is None:

                    # If no recipient handler, just return the first
--- a/trustgraph-base/trustgraph/messaging/translators/flow.py
+++ b/trustgraph-base/trustgraph/messaging/translators/flow.py
@ -12,12 +12,13 @@ class FlowRequestTranslator(MessageTranslator):
            class_name=data.get("class-name"),
            class_definition=data.get("class-definition"),
            description=data.get("description"),
-            flow_id=data.get("flow-id")
+            flow_id=data.get("flow-id"),
+            parameters=data.get("parameters")
        )
    
    def from_pulsar(self, obj: FlowRequest) -> Dict[str, Any]:
        result = {}
-        
+
        if obj.operation is not None:
            result["operation"] = obj.operation
        if obj.class_name is not None:
@ -28,7 +29,9 @@ class FlowRequestTranslator(MessageTranslator):
            result["description"] = obj.description
        if obj.flow_id is not None:
            result["flow-id"] = obj.flow_id
-        
+        if obj.parameters is not None:
+            result["parameters"] = obj.parameters
+
        return result


@ -40,7 +43,7 @@ class FlowResponseTranslator(MessageTranslator):
    
    def from_pulsar(self, obj: FlowResponse) -> Dict[str, Any]:
        result = {}
-        
+
        if obj.class_names is not None:
            result["class-names"] = obj.class_names
        if obj.flow_ids is not None:
@ -51,7 +54,9 @@ class FlowResponseTranslator(MessageTranslator):
            result["flow"] = obj.flow
        if obj.description is not None:
            result["description"] = obj.description
-        
+        if obj.parameters is not None:
+            result["parameters"] = obj.parameters
+
        return result
    
    def from_response_with_completion(self, obj: FlowResponse) -> Tuple[Dict[str, Any], bool]:
--- a/trustgraph-base/trustgraph/schema/services/flow.py
+++ b/trustgraph-base/trustgraph/schema/services/flow.py
@ -35,6 +35,9 @@ class FlowRequest(Record):
    # get_flow, start_flow, stop_flow
    flow_id = String()

+    # start_flow - optional parameters for flow customization
+    parameters = Map(String())
+
 class FlowResponse(Record):

    # list_classes
@ -52,6 +55,9 @@ class FlowResponse(Record):
    # get_flow
    description = String()

+    # get_flow - parameters used when flow was started
+    parameters = Map(String())
+
    # Everything
    error = Error()