mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 01:16:22 +02:00
release/v1.4 -> master (#548)
This commit is contained in:
parent
3ec2cd54f9
commit
2bd68ed7f4
94 changed files with 8571 additions and 1740 deletions
|
|
@ -12,6 +12,7 @@ requires-python = ">=3.8"
|
|||
dependencies = [
|
||||
"pulsar-client",
|
||||
"prometheus-client",
|
||||
"requests",
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class Flow:
|
|||
|
||||
return json.loads(self.request(request = input)["flow"])
|
||||
|
||||
def start(self, class_name, id, description):
|
||||
def start(self, class_name, id, description, parameters=None):
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
|
|
@ -97,6 +97,9 @@ class Flow:
|
|||
"description": description,
|
||||
}
|
||||
|
||||
if parameters:
|
||||
input["parameters"] = parameters
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def stop(self, id):
|
||||
|
|
|
|||
|
|
@ -8,11 +8,12 @@ from . subscriber import Subscriber
|
|||
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
||||
from . flow_processor import FlowProcessor
|
||||
from . consumer_spec import ConsumerSpec
|
||||
from . setting_spec import SettingSpec
|
||||
from . parameter_spec import ParameterSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
from . subscriber_spec import SubscriberSpec
|
||||
from . request_response_spec import RequestResponseSpec
|
||||
from . llm_service import LlmService, LlmResult
|
||||
from . chunking_service import ChunkingService
|
||||
from . embeddings_service import EmbeddingsService
|
||||
from . embeddings_client import EmbeddingsClientSpec
|
||||
from . text_completion_client import TextCompletionClientSpec
|
||||
|
|
|
|||
62
trustgraph-base/trustgraph/base/chunking_service.py
Normal file
62
trustgraph-base/trustgraph/base/chunking_service.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""
|
||||
Base chunking service that provides parameter specification functionality
|
||||
for chunk-size and chunk-overlap parameters
|
||||
"""
|
||||
|
||||
import logging
|
||||
from .flow_processor import FlowProcessor
|
||||
from .parameter_spec import ParameterSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ChunkingService(FlowProcessor):
|
||||
"""Base service for chunking processors with parameter specification support"""
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
# Call parent constructor
|
||||
super(ChunkingService, self).__init__(**params)
|
||||
|
||||
# Register parameter specifications for chunk-size and chunk-overlap
|
||||
self.register_specification(
|
||||
ParameterSpec(name="chunk-size")
|
||||
)
|
||||
|
||||
self.register_specification(
|
||||
ParameterSpec(name="chunk-overlap")
|
||||
)
|
||||
|
||||
logger.debug("ChunkingService initialized with parameter specifications")
|
||||
|
||||
async def chunk_document(self, msg, consumer, flow, default_chunk_size, default_chunk_overlap):
|
||||
"""
|
||||
Extract chunk parameters from flow and return effective values
|
||||
|
||||
Args:
|
||||
msg: The message containing the document to chunk
|
||||
consumer: The consumer spec
|
||||
flow: The flow context
|
||||
default_chunk_size: Default chunk size from processor config
|
||||
default_chunk_overlap: Default chunk overlap from processor config
|
||||
|
||||
Returns:
|
||||
tuple: (chunk_size, chunk_overlap) - effective values to use
|
||||
"""
|
||||
# Extract parameters from flow (flow-configurable parameters)
|
||||
chunk_size = flow("chunk-size")
|
||||
chunk_overlap = flow("chunk-overlap")
|
||||
|
||||
# Use provided values or fall back to defaults
|
||||
effective_chunk_size = chunk_size if chunk_size is not None else default_chunk_size
|
||||
effective_chunk_overlap = chunk_overlap if chunk_overlap is not None else default_chunk_overlap
|
||||
|
||||
logger.debug(f"Using chunk-size: {effective_chunk_size}")
|
||||
logger.debug(f"Using chunk-overlap: {effective_chunk_overlap}")
|
||||
|
||||
return effective_chunk_size, effective_chunk_overlap
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
"""Add chunking service arguments to parser"""
|
||||
FlowProcessor.add_args(parser)
|
||||
|
|
@ -12,7 +12,7 @@ class Flow:
|
|||
# Consumers and publishers. Is this a bit untidy?
|
||||
self.consumer = {}
|
||||
|
||||
self.setting = {}
|
||||
self.parameter = {}
|
||||
|
||||
for spec in processor.specifications:
|
||||
spec.add(self, processor, defn)
|
||||
|
|
@ -28,5 +28,5 @@ class Flow:
|
|||
def __call__(self, key):
|
||||
if key in self.producer: return self.producer[key]
|
||||
if key in self.consumer: return self.consumer[key]
|
||||
if key in self.setting: return self.setting[key].value
|
||||
if key in self.parameter: return self.parameter[key].value
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
|
||||
# These can be overriden by a derived class:
|
||||
|
||||
# Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
|
||||
# Array of specifications: ConsumerSpec, ProducerSpec, ParameterSpec
|
||||
self.specifications = []
|
||||
|
||||
logger.info("Service initialised.")
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@ LLM text completion base class
|
|||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
from prometheus_client import Histogram, Info
|
||||
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec, ParameterSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -32,7 +32,7 @@ class LlmService(FlowProcessor):
|
|||
|
||||
def __init__(self, **params):
|
||||
|
||||
id = params.get("id")
|
||||
id = params.get("id", default_ident)
|
||||
concurrency = params.get("concurrency", 1)
|
||||
|
||||
super(LlmService, self).__init__(**params | {
|
||||
|
|
@ -56,6 +56,18 @@ class LlmService(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
self.register_specification(
|
||||
ParameterSpec(
|
||||
name = "model",
|
||||
)
|
||||
)
|
||||
|
||||
self.register_specification(
|
||||
ParameterSpec(
|
||||
name = "temperature",
|
||||
)
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "text_completion_metric"):
|
||||
__class__.text_completion_metric = Histogram(
|
||||
'text_completion_duration',
|
||||
|
|
@ -70,6 +82,13 @@ class LlmService(FlowProcessor):
|
|||
]
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "text_completion_model_metric"):
|
||||
__class__.text_completion_model_metric = Info(
|
||||
'text_completion_model',
|
||||
'Text completion model',
|
||||
["processor", "flow"]
|
||||
)
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
|
@ -85,10 +104,21 @@ class LlmService(FlowProcessor):
|
|||
flow=f"{flow.name}-{consumer.name}",
|
||||
).time():
|
||||
|
||||
model = flow("model")
|
||||
temperature = flow("temperature")
|
||||
|
||||
response = await self.generate_content(
|
||||
request.system, request.prompt
|
||||
request.system, request.prompt, model, temperature
|
||||
)
|
||||
|
||||
__class__.text_completion_model_metric.labels(
|
||||
processor = self.id,
|
||||
flow = flow.name
|
||||
).info({
|
||||
"model": str(model) if model is not None else "",
|
||||
"temperature": str(temperature) if temperature is not None else "",
|
||||
})
|
||||
|
||||
await flow("response").send(
|
||||
TextCompletionResponse(
|
||||
error=None,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
from . spec import Spec
|
||||
|
||||
class Setting:
|
||||
class Parameter:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
async def start():
|
||||
|
|
@ -9,11 +9,13 @@ class Setting:
|
|||
async def stop():
|
||||
pass
|
||||
|
||||
class SettingSpec(Spec):
|
||||
class ParameterSpec(Spec):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def add(self, flow, processor, definition):
|
||||
|
||||
flow.config[self.name] = Setting(definition[self.name])
|
||||
value = definition.get(self.name, None)
|
||||
|
||||
flow.parameter[self.name] = Parameter(value)
|
||||
|
||||
|
|
@ -49,8 +49,6 @@ class RequestResponse(Subscriber):
|
|||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
logger.debug(f"Sending request {id}...")
|
||||
|
||||
q = await self.subscribe(id)
|
||||
|
||||
try:
|
||||
|
|
@ -75,8 +73,6 @@ class RequestResponse(Subscriber):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
logger.debug("Received response")
|
||||
|
||||
if recipient is None:
|
||||
|
||||
# If no recipient handler, just return the first
|
||||
|
|
|
|||
|
|
@ -12,12 +12,13 @@ class FlowRequestTranslator(MessageTranslator):
|
|||
class_name=data.get("class-name"),
|
||||
class_definition=data.get("class-definition"),
|
||||
description=data.get("description"),
|
||||
flow_id=data.get("flow-id")
|
||||
flow_id=data.get("flow-id"),
|
||||
parameters=data.get("parameters")
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: FlowRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.class_name is not None:
|
||||
|
|
@ -28,7 +29,9 @@ class FlowRequestTranslator(MessageTranslator):
|
|||
result["description"] = obj.description
|
||||
if obj.flow_id is not None:
|
||||
result["flow-id"] = obj.flow_id
|
||||
|
||||
if obj.parameters is not None:
|
||||
result["parameters"] = obj.parameters
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -40,7 +43,7 @@ class FlowResponseTranslator(MessageTranslator):
|
|||
|
||||
def from_pulsar(self, obj: FlowResponse) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
|
||||
if obj.class_names is not None:
|
||||
result["class-names"] = obj.class_names
|
||||
if obj.flow_ids is not None:
|
||||
|
|
@ -51,7 +54,9 @@ class FlowResponseTranslator(MessageTranslator):
|
|||
result["flow"] = obj.flow
|
||||
if obj.description is not None:
|
||||
result["description"] = obj.description
|
||||
|
||||
if obj.parameters is not None:
|
||||
result["parameters"] = obj.parameters
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: FlowResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ class FlowRequest(Record):
|
|||
# get_flow, start_flow, stop_flow
|
||||
flow_id = String()
|
||||
|
||||
# start_flow - optional parameters for flow customization
|
||||
parameters = Map(String())
|
||||
|
||||
class FlowResponse(Record):
|
||||
|
||||
# list_classes
|
||||
|
|
@ -52,6 +55,9 @@ class FlowResponse(Record):
|
|||
# get_flow
|
||||
description = String()
|
||||
|
||||
# get_flow - parameters used when flow was started
|
||||
parameters = Map(String())
|
||||
|
||||
# Everything
|
||||
error = Error()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue