mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-03 20:32:38 +02:00
Release/v1.2 (#457)
* Bump setup.py versions for 1.1 * PoC MCP server (#419) * Very initial MCP server PoC for TrustGraph * Put service on port 8000 * Add MCP container and packages to buildout * Update docs for API/CLI changes in 1.0 (#421) * Update some API basics for the 0.23/1.0 API change * Add MCP container push (#425) * Add command args to the MCP server (#426) * Host and port parameters * Added websocket arg * More docs * MCP client support (#427) - MCP client service - Tool request/response schema - API gateway support for mcp-tool - Message translation for tool request & response - Make mcp-tool using configuration service for information about where the MCP services are. * Feature/react call mcp (#428) Key Features - MCP Tool Integration: Added core MCP tool support with ToolClientSpec and ToolClient classes - API Enhancement: New mcp_tool method for flow-specific tool invocation - CLI Tooling: New tg-invoke-mcp-tool command for testing MCP integration - React Agent Enhancement: Fixed and improved multi-tool invocation capabilities - Tool Management: Enhanced CLI for tool configuration and management Changes - Added MCP tool invocation to API with flow-specific integration - Implemented ToolClientSpec and ToolClient for tool call handling - Updated agent-manager-react to invoke MCP tools with configurable types - Enhanced CLI with new commands and improved help text - Added comprehensive documentation for new CLI commands - Improved tool configuration management Testing - Added tg-invoke-mcp-tool CLI command for isolated MCP integration testing - Enhanced agent capability to invoke multiple tools simultaneously * Test suite executed from CI pipeline (#433) * Test strategy & test cases * Unit tests * Integration tests * Extending test coverage (#434) * Contract tests * Testing embeedings * Agent unit tests * Knowledge pipeline tests * Turn on contract tests * Increase storage test coverage (#435) * Fixing storage and adding tests * PR pipeline only runs quick tests * Empty configuration is returned as empty list, previously was not in response (#436) * Update config util to take files as well as command-line text (#437) * Updated CLI invocation and config model for tools and mcp (#438) * Updated CLI invocation and config model for tools and mcp * CLI anomalies * Tweaked the MCP tool implementation for new model * Update agent implementation to match the new model * Fix agent tools, now all tested * Fixed integration tests * Fix MCP delete tool params * Update Python deps to 1.2 * Update to enable knowledge extraction using the agent framework (#439) * Implement KG extraction agent (kg-extract-agent) * Using ReAct framework (agent-manager-react) * ReAct manager had an issue when emitting JSON, which conflicts which ReAct manager's own JSON messages, so refactored ReAct manager to use traditional ReAct messages, non-JSON structure. * Minor refactor to take the prompt template client out of prompt-template so it can be more readily used by other modules. kg-extract-agent uses this framework. * Migrate from setup.py to pyproject.toml (#440) * Converted setup.py to pyproject.toml * Modern package infrastructure as recommended by py docs * Install missing build deps (#441) * Install missing build deps (#442) * Implement logging strategy (#444) * Logging strategy and convert all prints() to logging invocations * Fix/startup failure (#445) * Fix loggin startup problems * Fix logging startup problems (#446) * Fix logging startup problems (#447) * Fixed Mistral OCR to use current API (#448) * Fixed Mistral OCR to use current API * Added PDF decoder tests * Fix Mistral OCR ident to be standard pdf-decoder (#450) * Fix Mistral OCR ident to be standard pdf-decoder * Correct test * Schema structure refactor (#451) * Write schema refactor spec * Implemented schema refactor spec * Structure data mvp (#452) * Structured data tech spec * Architecture principles * New schemas * Updated schemas and specs * Object extractor * Add .coveragerc * New tests * Cassandra object storage * Trying to object extraction working, issues exist * Validate librarian collection (#453) * Fix token chunker, broken API invocation (#454) * Fix token chunker, broken API invocation (#455) * Knowledge load utility CLI (#456) * Knowledge loader * More tests
This commit is contained in:
parent
c85ba197be
commit
89be656990
509 changed files with 49632 additions and 5159 deletions
28
trustgraph-base/pyproject.toml
Normal file
28
trustgraph-base/pyproject.toml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "trustgraph-base"
|
||||
dynamic = ["version"]
|
||||
authors = [{name = "trustgraph.ai", email = "security@trustgraph.ai"}]
|
||||
description = "TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"pulsar-client",
|
||||
"prometheus-client",
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/trustgraph-ai/trustgraph"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["trustgraph*"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = {attr = "trustgraph.base_version.__version__"}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
import setuptools
|
||||
import os
|
||||
import importlib
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
# Load a version number module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
'version', 'trustgraph/base_version.py'
|
||||
)
|
||||
version_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(version_module)
|
||||
|
||||
version = version_module.__version__
|
||||
|
||||
setuptools.setup(
|
||||
name="trustgraph-base",
|
||||
version=version,
|
||||
author="trustgraph.ai",
|
||||
author_email="security@trustgraph.ai",
|
||||
description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/trustgraph-ai/trustgraph",
|
||||
packages=setuptools.find_namespace_packages(
|
||||
where='./',
|
||||
),
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
|
||||
install_requires=[
|
||||
"pulsar-client",
|
||||
"prometheus-client",
|
||||
],
|
||||
scripts=[
|
||||
]
|
||||
)
|
||||
|
|
@ -49,9 +49,6 @@ class Api:
|
|||
|
||||
url = f"{self.url}{path}"
|
||||
|
||||
# print("uri:", url)
|
||||
# print(json.dumps(request, indent=4))
|
||||
|
||||
# Invoke the API, input is passed as JSON
|
||||
resp = requests.post(url, json=request, timeout=self.timeout)
|
||||
|
||||
|
|
@ -59,8 +56,6 @@ class Api:
|
|||
if resp.status_code != 200:
|
||||
raise ProtocolException(f"Status code {resp.status_code}")
|
||||
|
||||
# print(resp.text)
|
||||
|
||||
try:
|
||||
# Parse the response as JSON
|
||||
object = resp.json()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . exceptions import *
|
||||
from . types import ConfigValue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Config:
|
||||
|
||||
def __init__(self, api):
|
||||
|
|
@ -33,7 +37,7 @@ class Config:
|
|||
for v in object["values"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse config get response", exc_info=True)
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def put(self, values):
|
||||
|
|
@ -49,6 +53,19 @@ class Config:
|
|||
|
||||
self.request(input)
|
||||
|
||||
def delete(self, keys):
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"operation": "delete",
|
||||
"keys": [
|
||||
{ "type": v.type, "key": v.key }
|
||||
for v in keys
|
||||
]
|
||||
}
|
||||
|
||||
self.request(input)
|
||||
|
||||
def list(self, type):
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
|
|
@ -67,7 +84,7 @@ class Config:
|
|||
"type": type,
|
||||
}
|
||||
|
||||
object = self.request(input)["directory"]
|
||||
object = self.request(input)
|
||||
|
||||
try:
|
||||
return [
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import base64
|
|||
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from . types import Triple
|
||||
from . exceptions import ProtocolException
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
|
|
@ -197,7 +198,6 @@ class FlowInstance:
|
|||
|
||||
def prompt(self, id, variables):
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"id": id,
|
||||
"variables": variables
|
||||
|
|
@ -221,12 +221,37 @@ class FlowInstance:
|
|||
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def mcp_tool(self, name, parameters={}):
|
||||
|
||||
# The input consists of name and parameters
|
||||
input = {
|
||||
"name": name,
|
||||
"parameters": parameters,
|
||||
}
|
||||
|
||||
object = self.request(
|
||||
"service/mcp-tool",
|
||||
input
|
||||
)
|
||||
|
||||
if "text" in object:
|
||||
return object["text"]
|
||||
|
||||
if "object" in object:
|
||||
try:
|
||||
return object["object"]
|
||||
except Exception as e:
|
||||
raise ProtocolException(
|
||||
"Returned object not well-formed JSON"
|
||||
)
|
||||
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def triples_query(
|
||||
self, s=None, p=None, o=None,
|
||||
user=None, collection=None, limit=10000
|
||||
):
|
||||
|
||||
# The input consists of system and prompt strings
|
||||
input = {
|
||||
"limit": limit
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,11 +2,14 @@
|
|||
import datetime
|
||||
import time
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from . types import DocumentMetadata, ProcessingMetadata, Triple
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from . exceptions import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
return Literal(x["v"])
|
||||
|
|
@ -112,7 +115,7 @@ class Library:
|
|||
for v in object["document-metadatas"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def get_document(self, user, id):
|
||||
|
|
@ -145,7 +148,7 @@ class Library:
|
|||
tags = doc["tags"]
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def update_document(self, user, id, metadata):
|
||||
|
|
@ -192,7 +195,7 @@ class Library:
|
|||
tags = doc["tags"]
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document update response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def remove_document(self, user, id):
|
||||
|
|
@ -266,6 +269,6 @@ class Library:
|
|||
for v in object["processing-metadatas"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse processing list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
|
|
|
|||
|
|
@ -28,4 +28,7 @@ from . triples_client import TriplesClientSpec
|
|||
from . document_embeddings_client import DocumentEmbeddingsClientSpec
|
||||
from . agent_service import AgentService
|
||||
from . graph_rag_client import GraphRagClientSpec
|
||||
from . tool_service import ToolService
|
||||
from . tool_client import ToolClientSpec
|
||||
from . agent_client import AgentClientSpec
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ from .. schema import AgentRequest, AgentResponse
|
|||
from .. knowledge import Uri, Literal
|
||||
|
||||
class AgentClient(RequestResponse):
|
||||
async def request(self, recipient, question, plan=None, state=None,
|
||||
async def invoke(self, recipient, question, plan=None, state=None,
|
||||
history=[], timeout=300):
|
||||
|
||||
|
||||
resp = await self.request(
|
||||
AgentRequest(
|
||||
question = question,
|
||||
|
|
@ -18,22 +18,20 @@ class AgentClient(RequestResponse):
|
|||
timeout=timeout,
|
||||
)
|
||||
|
||||
print(resp, flush=True)
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
return resp
|
||||
return resp.answer
|
||||
|
||||
class GraphEmbeddingsClientSpec(RequestResponseSpec):
|
||||
class AgentClientSpec(RequestResponseSpec):
|
||||
def __init__(
|
||||
self, request_name, response_name,
|
||||
):
|
||||
super(GraphEmbeddingsClientSpec, self).__init__(
|
||||
super(AgentClientSpec, self).__init__(
|
||||
request_name = request_name,
|
||||
request_schema = GraphEmbeddingsRequest,
|
||||
request_schema = AgentRequest,
|
||||
response_name = response_name,
|
||||
response_schema = GraphEmbeddingsResponse,
|
||||
impl = GraphEmbeddingsClient,
|
||||
response_schema = AgentResponse,
|
||||
impl = AgentClient,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Agent manager service completion base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import AgentRequest, AgentResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "agent-manager"
|
||||
|
||||
class AgentService(FlowProcessor):
|
||||
|
|
@ -76,9 +80,9 @@ class AgentService(FlowProcessor):
|
|||
except Exception as e:
|
||||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
print(f"on_request Exception: {e}")
|
||||
logger.error(f"Exception in agent service on_request: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
AgentResponse(
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import argparse
|
|||
import _pulsar
|
||||
import time
|
||||
import uuid
|
||||
import logging
|
||||
import os
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. schema import ConfigPush, config_push_queue
|
||||
|
|
@ -20,6 +22,9 @@ from . metrics import ProcessorMetrics, ConsumerMetrics
|
|||
|
||||
default_config_queue = config_push_queue
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Async processor
|
||||
class AsyncProcessor:
|
||||
|
||||
|
|
@ -113,7 +118,7 @@ class AsyncProcessor:
|
|||
version = message.value().version
|
||||
|
||||
# Invoke message handlers
|
||||
print("Config change event", version, flush=True)
|
||||
logger.info(f"Config change event: version={version}")
|
||||
for ch in self.config_handlers:
|
||||
await ch(config, version)
|
||||
|
||||
|
|
@ -156,9 +161,23 @@ class AsyncProcessor:
|
|||
|
||||
# This is here to output a debug message, shouldn't be needed.
|
||||
except Exception as e:
|
||||
print("Exception, closing taskgroup", flush=True)
|
||||
logger.error("Exception, closing taskgroup", exc_info=True)
|
||||
raise e
|
||||
|
||||
@classmethod
|
||||
def setup_logging(cls, log_level='INFO'):
|
||||
"""Configure logging for the entire application"""
|
||||
# Support environment variable override
|
||||
env_log_level = os.environ.get('TRUSTGRAPH_LOG_LEVEL', log_level)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, env_log_level.upper()),
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger.info(f"Logging configured with level: {env_log_level}")
|
||||
|
||||
# Startup fabric. launch calls launch_async in async mode.
|
||||
@classmethod
|
||||
def launch(cls, ident, doc):
|
||||
|
|
@ -183,8 +202,11 @@ class AsyncProcessor:
|
|||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
# Setup logging before anything else
|
||||
cls.setup_logging(args.get('log_level', 'INFO').upper())
|
||||
|
||||
# Debug
|
||||
print(args, flush=True)
|
||||
logger.debug(f"Arguments: {args}")
|
||||
|
||||
# Start the Prometheus metrics service if needed
|
||||
if args["metrics"]:
|
||||
|
|
@ -193,7 +215,7 @@ class AsyncProcessor:
|
|||
# Loop forever, exception handler
|
||||
while True:
|
||||
|
||||
print("Starting...", flush=True)
|
||||
logger.info("Starting...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -203,30 +225,30 @@ class AsyncProcessor:
|
|||
))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.", flush=True)
|
||||
logger.info("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.", flush=True)
|
||||
logger.info("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
# Exceptions from a taskgroup come in as an exception group
|
||||
except ExceptionGroup as e:
|
||||
|
||||
print("Exception group:", flush=True)
|
||||
logger.error("Exception group:")
|
||||
|
||||
for se in e.exceptions:
|
||||
print(" Type:", type(se), flush=True)
|
||||
print(f" Exception: {se}", flush=True)
|
||||
logger.error(f" Type: {type(se)}")
|
||||
logger.error(f" Exception: {se}", exc_info=se)
|
||||
|
||||
except Exception as e:
|
||||
print("Type:", type(e), flush=True)
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Type: {type(e)}")
|
||||
logger.error(f"Exception: {e}", exc_info=True)
|
||||
|
||||
# Retry occurs here
|
||||
print("Will retry...", flush=True)
|
||||
logger.warning("Will retry...")
|
||||
time.sleep(4)
|
||||
print("Retrying...", flush=True)
|
||||
logger.info("Retrying...")
|
||||
|
||||
# The command-line arguments are built using a stack of add_args
|
||||
# invocations
|
||||
|
|
@ -254,3 +276,4 @@ class AsyncProcessor:
|
|||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,13 @@ import pulsar
|
|||
import _pulsar
|
||||
import asyncio
|
||||
import time
|
||||
import logging
|
||||
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Consumer:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -90,7 +94,7 @@ class Consumer:
|
|||
|
||||
try:
|
||||
|
||||
print(self.topic, "subscribing...", flush=True)
|
||||
logger.info(f"Subscribing to topic: {self.topic}")
|
||||
|
||||
if self.start_of_messages:
|
||||
pos = pulsar.InitialPosition.Earliest
|
||||
|
|
@ -108,21 +112,18 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consumer subs Exception:", e, flush=True)
|
||||
logger.error(f"Consumer subscription exception: {e}", exc_info=True)
|
||||
await asyncio.sleep(self.reconnect_time)
|
||||
continue
|
||||
|
||||
print(self.topic, "subscribed", flush=True)
|
||||
logger.info(f"Successfully subscribed to topic: {self.topic}")
|
||||
|
||||
if self.metrics:
|
||||
self.metrics.state("running")
|
||||
|
||||
try:
|
||||
|
||||
print(
|
||||
"Starting", self.concurrency, "receiver threads",
|
||||
flush=True
|
||||
)
|
||||
logger.info(f"Starting {self.concurrency} receiver threads")
|
||||
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consumer loop exception:", e, flush=True)
|
||||
logger.error(f"Consumer loop exception: {e}", exc_info=True)
|
||||
self.consumer.unsubscribe()
|
||||
self.consumer.close()
|
||||
self.consumer = None
|
||||
|
|
@ -174,7 +175,7 @@ class Consumer:
|
|||
|
||||
if time.time() > expiry:
|
||||
|
||||
print("Gave up waiting for rate-limit retry", flush=True)
|
||||
logger.warning("Gave up waiting for rate-limit retry")
|
||||
|
||||
# Message failed to be processed, this causes it to
|
||||
# be retried
|
||||
|
|
@ -188,7 +189,7 @@ class Consumer:
|
|||
|
||||
try:
|
||||
|
||||
print("Handle...", flush=True)
|
||||
logger.debug("Processing message...")
|
||||
|
||||
if self.metrics:
|
||||
|
||||
|
|
@ -198,7 +199,7 @@ class Consumer:
|
|||
else:
|
||||
await self.handler(msg, self, self.flow)
|
||||
|
||||
print("Handled.", flush=True)
|
||||
logger.debug("Message processed successfully")
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
|
@ -211,7 +212,7 @@ class Consumer:
|
|||
|
||||
except TooManyRequests:
|
||||
|
||||
print("TooManyRequests: will retry...", flush=True)
|
||||
logger.warning("Rate limit exceeded, will retry...")
|
||||
|
||||
if self.metrics:
|
||||
self.metrics.rate_limit()
|
||||
|
|
@ -224,7 +225,7 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consume exception:", e, flush=True)
|
||||
logger.error(f"Message processing exception: {e}", exc_info=True)
|
||||
|
||||
# Message failed to be processed, this causes it to
|
||||
# be retried
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentEmbeddingsClient(RequestResponse):
|
||||
async def query(self, vectors, limit=20, user="trustgraph",
|
||||
collection="default", timeout=30):
|
||||
|
|
@ -17,7 +22,7 @@ class DocumentEmbeddingsClient(RequestResponse):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print(resp, flush=True)
|
||||
logger.debug(f"Document embeddings response: {resp}")
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Document embeddings query service. Input is vectors. Output is list of
|
|||
embeddings.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
|
||||
class DocumentEmbeddingsQueryService(FlowProcessor):
|
||||
|
|
@ -47,21 +52,21 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling document embeddings query request {id}...")
|
||||
|
||||
docs = await self.query_document_embeddings(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending document embeddings query response...")
|
||||
r = DocumentEmbeddingsResponse(documents=docs, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Document embeddings query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in document embeddings query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = DocumentEmbeddingsResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Document embeddings store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import DocumentEmbeddings
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "document-embeddings-write"
|
||||
|
||||
class DocumentEmbeddingsStoreService(FlowProcessor):
|
||||
|
|
@ -40,7 +45,7 @@ class DocumentEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in document embeddings store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Embeddings resolution base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import EmbeddingsRequest, EmbeddingsResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "embeddings"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -51,7 +55,7 @@ class EmbeddingsService(FlowProcessor):
|
|||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print("Handling request", id, "...", flush=True)
|
||||
logger.debug(f"Handling embeddings request {id}...")
|
||||
|
||||
vectors = await self.on_embeddings(request.text)
|
||||
|
||||
|
|
@ -63,7 +67,7 @@ class EmbeddingsService(FlowProcessor):
|
|||
properties={"id": id}
|
||||
)
|
||||
|
||||
print("Handled.", flush=True)
|
||||
logger.debug("Embeddings request handled successfully")
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
|
@ -72,9 +76,9 @@ class EmbeddingsService(FlowProcessor):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Exception in embeddings service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
EmbeddingsResponse(
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# configuration service which can't manage itself.
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
|
|
@ -14,6 +15,9 @@ from .. log_level import LogLevel
|
|||
from . async_processor import AsyncProcessor
|
||||
from . flow import Flow
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Parent class for configurable processors, configured with flows by
|
||||
# the config service
|
||||
class FlowProcessor(AsyncProcessor):
|
||||
|
|
@ -34,7 +38,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
# Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
|
||||
self.specifications = []
|
||||
|
||||
print("Service initialised.")
|
||||
logger.info("Service initialised.")
|
||||
|
||||
# Register a configuration variable
|
||||
def register_specification(self, spec):
|
||||
|
|
@ -44,19 +48,19 @@ class FlowProcessor(AsyncProcessor):
|
|||
async def start_flow(self, flow, defn):
|
||||
self.flows[flow] = Flow(self.id, flow, self, defn)
|
||||
await self.flows[flow].start()
|
||||
print("Started flow: ", flow)
|
||||
logger.info(f"Started flow: {flow}")
|
||||
|
||||
# Stop processing for a new flow
|
||||
async def stop_flow(self, flow):
|
||||
if flow in self.flows:
|
||||
await self.flows[flow].stop()
|
||||
del self.flows[flow]
|
||||
print("Stopped flow: ", flow, flush=True)
|
||||
logger.info(f"Stopped flow: {flow}")
|
||||
|
||||
# Event handler - called for a configuration change
|
||||
async def on_configure_flows(self, config, version):
|
||||
|
||||
print("Got config version", version, flush=True)
|
||||
logger.info(f"Got config version {version}")
|
||||
|
||||
# Skip over invalid data
|
||||
if "flows-active" not in config: return
|
||||
|
|
@ -69,7 +73,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
|
||||
else:
|
||||
|
||||
print("No configuration settings for me.", flush=True)
|
||||
logger.debug("No configuration settings for me.")
|
||||
flow_config = {}
|
||||
|
||||
# Get list of flows which should be running and are currently
|
||||
|
|
@ -88,7 +92,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
if flow not in wanted_flows:
|
||||
await self.stop_flow(flow)
|
||||
|
||||
print("Handled config update")
|
||||
logger.info("Handled config update")
|
||||
|
||||
# Start threads, just call parent
|
||||
async def start(self):
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def to_value(x):
|
||||
if x.is_uri: return Uri(x.value)
|
||||
return Literal(x.value)
|
||||
|
|
@ -21,7 +26,7 @@ class GraphEmbeddingsClient(RequestResponse):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print(resp, flush=True)
|
||||
logger.debug(f"Graph embeddings response: {resp}")
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Graph embeddings query service. Input is vectors. Output is list of
|
|||
embeddings.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
|
||||
class GraphEmbeddingsQueryService(FlowProcessor):
|
||||
|
|
@ -47,21 +52,21 @@ class GraphEmbeddingsQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling graph embeddings query request {id}...")
|
||||
|
||||
entities = await self.query_graph_embeddings(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending graph embeddings query response...")
|
||||
r = GraphEmbeddingsResponse(entities=entities, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Graph embeddings query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in graph embeddings query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = GraphEmbeddingsResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Graph embeddings store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import GraphEmbeddings
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-embeddings-write"
|
||||
|
||||
class GraphEmbeddingsStoreService(FlowProcessor):
|
||||
|
|
@ -40,7 +45,7 @@ class GraphEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in graph embeddings store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ LLM text completion base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -103,9 +107,9 @@ class LlmService(FlowProcessor):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"LLM service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
TextCompletionResponse(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Producer:
|
||||
|
||||
|
|
@ -39,15 +43,15 @@ class Producer:
|
|||
while self.running and self.producer is None:
|
||||
|
||||
try:
|
||||
print("Connect publisher to", self.topic, "...", flush=True)
|
||||
logger.info(f"Connecting publisher to {self.topic}...")
|
||||
self.producer = self.client.create_producer(
|
||||
topic = self.topic,
|
||||
schema = JsonSchema(self.schema),
|
||||
chunking_enabled = self.chunking_enabled,
|
||||
)
|
||||
print("Connected to", self.topic, flush=True)
|
||||
logger.info(f"Connected publisher to {self.topic}")
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception connecting publisher: {e}", exc_info=True)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if not self.running: break
|
||||
|
|
@ -68,7 +72,7 @@ class Producer:
|
|||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception sending message: {e}", exc_info=True)
|
||||
self.producer.close()
|
||||
self.producer = None
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,13 @@ class PromptClient(RequestResponse):
|
|||
timeout = timeout,
|
||||
)
|
||||
|
||||
async def extract_objects(self, text, schema, timeout=600):
|
||||
return await self.prompt(
|
||||
id = "extract-rows",
|
||||
variables = { "text": text, "schema": schema, },
|
||||
timeout = timeout,
|
||||
)
|
||||
|
||||
async def kg_prompt(self, query, kg, timeout=600):
|
||||
return await self.prompt(
|
||||
id = "kg-prompt",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ from pulsar.schema import JsonSchema
|
|||
import asyncio
|
||||
import time
|
||||
import pulsar
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Publisher:
|
||||
|
||||
|
|
@ -62,7 +66,7 @@ class Publisher:
|
|||
producer.send(item)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception in publisher: {e}", exc_info=True)
|
||||
|
||||
if not self.running:
|
||||
return
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
import os
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import uuid
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
|
|
@ -21,7 +22,7 @@ class PulsarClient:
|
|||
"pulsar_api_key",
|
||||
self.default_pulsar_api_key
|
||||
)
|
||||
log_level = params.get("log_level", LogLevel.INFO)
|
||||
# Hard-code Pulsar logging to ERROR level to minimize noise
|
||||
|
||||
self.pulsar_host = pulsar_host
|
||||
self.pulsar_api_key = pulsar_api_key
|
||||
|
|
@ -31,13 +32,13 @@ class PulsarClient:
|
|||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
authentication=auth,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
logger=pulsar.ConsoleLogger(_pulsar.LoggerLevel.Error)
|
||||
)
|
||||
else:
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
listener_name=pulsar_listener,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
logger=pulsar.ConsoleLogger(_pulsar.LoggerLevel.Error)
|
||||
)
|
||||
|
||||
self.pulsar_listener = pulsar_listener
|
||||
|
|
@ -73,8 +74,7 @@ class PulsarClient:
|
|||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
default='INFO',
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
help=f'Log level (default: INFO)'
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
|
||||
import uuid
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from . subscriber import Subscriber
|
||||
from . producer import Producer
|
||||
from . spec import Spec
|
||||
from . metrics import ConsumerMetrics, ProducerMetrics, SubscriberMetrics
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RequestResponse(Subscriber):
|
||||
|
||||
def __init__(
|
||||
|
|
@ -45,7 +49,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
print("Request", id, "...", flush=True)
|
||||
logger.debug(f"Sending request {id}...")
|
||||
|
||||
q = await self.subscribe(id)
|
||||
|
||||
|
|
@ -58,7 +62,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e)
|
||||
logger.error(f"Exception sending request: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
|
||||
|
|
@ -71,7 +75,7 @@ class RequestResponse(Subscriber):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print("Got response.", flush=True)
|
||||
logger.debug("Received response")
|
||||
|
||||
if recipient is None:
|
||||
|
||||
|
|
@ -93,7 +97,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e)
|
||||
logger.error(f"Exception processing response: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
finally:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ from pulsar.schema import JsonSchema
|
|||
import asyncio
|
||||
import _pulsar
|
||||
import time
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Subscriber:
|
||||
|
||||
|
|
@ -66,7 +70,7 @@ class Subscriber:
|
|||
if self.metrics:
|
||||
self.metrics.state("running")
|
||||
|
||||
print("Subscriber running...", flush=True)
|
||||
logger.info("Subscriber running...")
|
||||
|
||||
while self.running:
|
||||
|
||||
|
|
@ -78,8 +82,7 @@ class Subscriber:
|
|||
except _pulsar.Timeout:
|
||||
continue
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
print(type(e))
|
||||
logger.error(f"Exception in subscriber receive: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
if self.metrics:
|
||||
|
|
@ -110,7 +113,7 @@ class Subscriber:
|
|||
|
||||
except Exception as e:
|
||||
self.metrics.dropped()
|
||||
print("Q Put:", e, flush=True)
|
||||
logger.warning(f"Failed to put message in queue: {e}")
|
||||
|
||||
for q in self.full.values():
|
||||
try:
|
||||
|
|
@ -121,10 +124,10 @@ class Subscriber:
|
|||
)
|
||||
except Exception as e:
|
||||
self.metrics.dropped()
|
||||
print("Q Put:", e, flush=True)
|
||||
logger.warning(f"Failed to put message in full queue: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print("Subscriber exception:", e, flush=True)
|
||||
logger.error(f"Subscriber exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
|
|
|
|||
40
trustgraph-base/trustgraph/base/tool_client.py
Normal file
40
trustgraph-base/trustgraph/base/tool_client.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
import json
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import ToolRequest, ToolResponse
|
||||
|
||||
class ToolClient(RequestResponse):
|
||||
|
||||
async def invoke(self, name, parameters={}, timeout=600):
|
||||
|
||||
if parameters is None:
|
||||
parameters = {}
|
||||
|
||||
resp = await self.request(
|
||||
ToolRequest(
|
||||
name = name,
|
||||
parameters = json.dumps(parameters),
|
||||
),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
if resp.text: return resp.text
|
||||
|
||||
return json.loads(resp.object)
|
||||
|
||||
class ToolClientSpec(RequestResponseSpec):
|
||||
def __init__(
|
||||
self, request_name, response_name,
|
||||
):
|
||||
super(ToolClientSpec, self).__init__(
|
||||
request_name = request_name,
|
||||
request_schema = ToolRequest,
|
||||
response_name = response_name,
|
||||
response_schema = ToolResponse,
|
||||
impl = ToolClient,
|
||||
)
|
||||
|
||||
125
trustgraph-base/trustgraph/base/tool_service.py
Normal file
125
trustgraph-base/trustgraph/base/tool_service.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
|
||||
"""
|
||||
Tool invocation base class
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from prometheus_client import Counter
|
||||
|
||||
from .. schema import ToolRequest, ToolResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_concurrency = 1
|
||||
|
||||
class ToolService(FlowProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
id = params.get("id")
|
||||
concurrency = params.get("concurrency", 1)
|
||||
|
||||
super(ToolService, self).__init__(**params | {
|
||||
"id": id,
|
||||
"concurrency": concurrency,
|
||||
})
|
||||
|
||||
self.register_specification(
|
||||
ConsumerSpec(
|
||||
name = "request",
|
||||
schema = ToolRequest,
|
||||
handler = self.on_request,
|
||||
concurrency = concurrency,
|
||||
)
|
||||
)
|
||||
|
||||
self.register_specification(
|
||||
ProducerSpec(
|
||||
name = "response",
|
||||
schema = ToolResponse
|
||||
)
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "tool_invocation_metric"):
|
||||
__class__.tool_invocation_metric = Counter(
|
||||
'tool_invocation_count', 'Tool invocation count',
|
||||
["id", "flow", "name"],
|
||||
)
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
||||
request = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
response = await self.invoke_tool(
|
||||
request.name,
|
||||
json.loads(request.parameters) if request.parameters else {},
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
await flow("response").send(
|
||||
ToolResponse(
|
||||
error=None,
|
||||
text=response,
|
||||
object=None,
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
else:
|
||||
await flow("response").send(
|
||||
ToolResponse(
|
||||
error=None,
|
||||
text=None,
|
||||
object=json.dumps(response),
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
|
||||
__class__.tool_invocation_metric.labels(
|
||||
id = self.id, flow = flow.name, name = request.name,
|
||||
).inc()
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
logger.error(f"Exception in tool service: {e}", exc_info=True)
|
||||
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
ToolResponse(
|
||||
error=Error(
|
||||
type = "tool-error",
|
||||
message = str(e),
|
||||
),
|
||||
text=None,
|
||||
object=None,
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--concurrency',
|
||||
type=int,
|
||||
default=default_concurrency,
|
||||
help=f'Concurrent processing threads (default: {default_concurrency})'
|
||||
)
|
||||
|
||||
FlowProcessor.add_args(parser)
|
||||
|
||||
|
|
@ -4,6 +4,8 @@ Triples query service. Input is a (s, p, o) triple, some values may be
|
|||
null. Output is a list of triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .. schema import Value, Triple
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
class TriplesQueryService(FlowProcessor):
|
||||
|
|
@ -45,21 +50,21 @@ class TriplesQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling triples query request {id}...")
|
||||
|
||||
triples = await self.query_triples(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending triples query response...")
|
||||
r = TriplesQueryResponse(triples=triples, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Triples query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in triples query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = TriplesQueryResponse(
|
||||
error = Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Triples store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import Triples
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-write"
|
||||
|
||||
class TriplesStoreService(FlowProcessor):
|
||||
|
|
@ -38,7 +43,7 @@ class TriplesStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in triples store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from .translators.document_loading import DocumentTranslator, TextDocumentTransl
|
|||
from .translators.config import ConfigRequestTranslator, ConfigResponseTranslator
|
||||
from .translators.flow import FlowRequestTranslator, FlowResponseTranslator
|
||||
from .translators.prompt import PromptRequestTranslator, PromptResponseTranslator
|
||||
from .translators.tool import ToolRequestTranslator, ToolResponseTranslator
|
||||
from .translators.embeddings_query import (
|
||||
DocumentEmbeddingsRequestTranslator, DocumentEmbeddingsResponseTranslator,
|
||||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
|
|
@ -88,6 +89,12 @@ TranslatorRegistry.register_service(
|
|||
PromptResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"tool",
|
||||
ToolRequestTranslator(),
|
||||
ToolResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"document-embeddings-query",
|
||||
DocumentEmbeddingsRequestTranslator(),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from .base import Translator, MessageTranslator
|
||||
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator
|
||||
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
|
||||
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
|
||||
from .agent import AgentRequestTranslator, AgentResponseTranslator
|
||||
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator
|
||||
|
|
|
|||
|
|
@ -38,12 +38,13 @@ class ConfigRequestTranslator(MessageTranslator):
|
|||
def from_pulsar(self, obj: ConfigRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.operation:
|
||||
if obj.operation is not None:
|
||||
result["operation"] = obj.operation
|
||||
if obj.type:
|
||||
|
||||
if obj.type is not None:
|
||||
result["type"] = obj.type
|
||||
|
||||
if obj.keys:
|
||||
if obj.keys is not None:
|
||||
result["keys"] = [
|
||||
{
|
||||
"type": k.type,
|
||||
|
|
@ -52,7 +53,7 @@ class ConfigRequestTranslator(MessageTranslator):
|
|||
for k in obj.keys
|
||||
]
|
||||
|
||||
if obj.values:
|
||||
if obj.values is not None:
|
||||
result["values"] = [
|
||||
{
|
||||
"type": v.type,
|
||||
|
|
@ -77,7 +78,7 @@ class ConfigResponseTranslator(MessageTranslator):
|
|||
if obj.version is not None:
|
||||
result["version"] = obj.version
|
||||
|
||||
if obj.values:
|
||||
if obj.values is not None:
|
||||
result["values"] = [
|
||||
{
|
||||
"type": v.type,
|
||||
|
|
@ -87,14 +88,14 @@ class ConfigResponseTranslator(MessageTranslator):
|
|||
for v in obj.values
|
||||
]
|
||||
|
||||
if obj.directory:
|
||||
if obj.directory is not None:
|
||||
result["directory"] = obj.directory
|
||||
|
||||
if obj.config:
|
||||
if obj.config is not None:
|
||||
result["config"] = obj.config
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: ConfigResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
return self.from_pulsar(obj), True
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Dict, Any, List
|
||||
from ...schema import Value, Triple
|
||||
from ...schema import Value, Triple, RowSchema, Field
|
||||
from .base import Translator
|
||||
|
||||
|
||||
|
|
@ -44,4 +44,97 @@ class SubgraphTranslator(Translator):
|
|||
return [self.triple_translator.to_pulsar(t) for t in data]
|
||||
|
||||
def from_pulsar(self, obj: List[Triple]) -> List[Dict[str, Any]]:
|
||||
return [self.triple_translator.from_pulsar(t) for t in obj]
|
||||
return [self.triple_translator.from_pulsar(t) for t in obj]
|
||||
|
||||
|
||||
class RowSchemaTranslator(Translator):
|
||||
"""Translator for RowSchema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> RowSchema:
|
||||
"""Convert dict to RowSchema Pulsar object"""
|
||||
fields = []
|
||||
for field_data in data.get("fields", []):
|
||||
field = Field(
|
||||
name=field_data.get("name", ""),
|
||||
type=field_data.get("type", "string"),
|
||||
size=field_data.get("size", 0),
|
||||
primary=field_data.get("primary", False),
|
||||
description=field_data.get("description", ""),
|
||||
required=field_data.get("required", False),
|
||||
indexed=field_data.get("indexed", False),
|
||||
enum_values=field_data.get("enum_values", [])
|
||||
)
|
||||
fields.append(field)
|
||||
|
||||
return RowSchema(
|
||||
name=data.get("name", ""),
|
||||
description=data.get("description", ""),
|
||||
fields=fields
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: RowSchema) -> Dict[str, Any]:
|
||||
"""Convert RowSchema Pulsar object to JSON-serializable dictionary"""
|
||||
result = {
|
||||
"name": obj.name,
|
||||
"description": obj.description,
|
||||
"fields": []
|
||||
}
|
||||
|
||||
for field in obj.fields:
|
||||
field_dict = {
|
||||
"name": field.name,
|
||||
"type": field.type,
|
||||
"size": field.size,
|
||||
"primary": field.primary,
|
||||
"description": field.description,
|
||||
"required": field.required,
|
||||
"indexed": field.indexed
|
||||
}
|
||||
|
||||
# Handle enum_values array
|
||||
if field.enum_values:
|
||||
field_dict["enum_values"] = list(field.enum_values)
|
||||
|
||||
result["fields"].append(field_dict)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class FieldTranslator(Translator):
|
||||
"""Translator for Field objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> Field:
|
||||
"""Convert dict to Field Pulsar object"""
|
||||
return Field(
|
||||
name=data.get("name", ""),
|
||||
type=data.get("type", "string"),
|
||||
size=data.get("size", 0),
|
||||
primary=data.get("primary", False),
|
||||
description=data.get("description", ""),
|
||||
required=data.get("required", False),
|
||||
indexed=data.get("indexed", False),
|
||||
enum_values=data.get("enum_values", [])
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: Field) -> Dict[str, Any]:
|
||||
"""Convert Field Pulsar object to JSON-serializable dictionary"""
|
||||
result = {
|
||||
"name": obj.name,
|
||||
"type": obj.type,
|
||||
"size": obj.size,
|
||||
"primary": obj.primary,
|
||||
"description": obj.description,
|
||||
"required": obj.required,
|
||||
"indexed": obj.indexed
|
||||
}
|
||||
|
||||
# Handle enum_values array
|
||||
if obj.enum_values:
|
||||
result["enum_values"] = list(obj.enum_values)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Create singleton instances for easy access
|
||||
row_schema_translator = RowSchemaTranslator()
|
||||
field_translator = FieldTranslator()
|
||||
51
trustgraph-base/trustgraph/messaging/translators/tool.py
Normal file
51
trustgraph-base/trustgraph/messaging/translators/tool.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import json
|
||||
from typing import Dict, Any, Tuple
|
||||
from ...schema import ToolRequest, ToolResponse
|
||||
from .base import MessageTranslator
|
||||
|
||||
class ToolRequestTranslator(MessageTranslator):
|
||||
"""Translator for ToolRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ToolRequest:
|
||||
# Handle both "name" and "parameters" input keys
|
||||
name = data.get("name", "")
|
||||
if "parameters" in data:
|
||||
parameters = json.dumps(data["parameters"])
|
||||
else:
|
||||
parameters = None
|
||||
|
||||
return ToolRequest(
|
||||
name = name,
|
||||
parameters = parameters,
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: ToolRequest) -> Dict[str, Any]:
|
||||
result = {}
|
||||
|
||||
if obj.name:
|
||||
result["name"] = obj.name
|
||||
if obj.parameters is not None:
|
||||
result["parameters"] = json.loads(obj.parameters)
|
||||
|
||||
return result
|
||||
|
||||
class ToolResponseTranslator(MessageTranslator):
|
||||
"""Translator for ToolResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> ToolResponse:
|
||||
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
||||
|
||||
def from_pulsar(self, obj: ToolResponse) -> Dict[str, Any]:
|
||||
|
||||
result = {}
|
||||
|
||||
if obj.text:
|
||||
result["text"] = obj.text
|
||||
if obj.object:
|
||||
result["object"] = json.loads(obj.object)
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: ToolResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
35
trustgraph-base/trustgraph/schema/README.flows
Normal file
35
trustgraph-base/trustgraph/schema/README.flows
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
pdf-
|
||||
decoder
|
||||
|
||||
|
|
||||
v
|
||||
|
||||
chunker
|
||||
|
||||
|
|
||||
,------------------+----------- . . .
|
||||
| |
|
||||
v v
|
||||
|
||||
extract- extract-
|
||||
relationships definitions
|
||||
|
||||
| | |
|
||||
+----------------' |
|
||||
| v
|
||||
v
|
||||
vectorize
|
||||
triple-
|
||||
store |
|
||||
v
|
||||
|
||||
ge-write
|
||||
|
||||
Refactor:
|
||||
|
||||
[] Change vectorize
|
||||
[] Re-route chunker to extract-*
|
||||
[] Re-route vectorize to ge-write*
|
||||
[] Re-route extract-definitions to ge-write*
|
||||
[] Remove extract-relationships to ge-write routing
|
||||
|
|
@ -1,17 +1,10 @@
|
|||
|
||||
from . types import *
|
||||
from . prompt import *
|
||||
from . documents import *
|
||||
from . models import *
|
||||
from . object import *
|
||||
from . topic import *
|
||||
from . graph import *
|
||||
from . retrieval import *
|
||||
from . metadata import *
|
||||
from . agent import *
|
||||
from . lookup import *
|
||||
from . library import *
|
||||
from . config import *
|
||||
from . flows import *
|
||||
from . knowledge import *
|
||||
# Import core types and primitives
|
||||
from .core import *
|
||||
|
||||
# Import knowledge schemas
|
||||
from .knowledge import *
|
||||
|
||||
# Import service schemas
|
||||
from .services import *
|
||||
|
||||
|
|
|
|||
3
trustgraph-base/trustgraph/schema/core/__init__.py
Normal file
3
trustgraph-base/trustgraph/schema/core/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .primitives import *
|
||||
from .metadata import *
|
||||
from .topic import *
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array
|
||||
from . types import Triple
|
||||
from .primitives import Triple
|
||||
|
||||
class Metadata(Record):
|
||||
|
||||
|
|
@ -17,11 +17,15 @@ class Triple(Record):
|
|||
|
||||
class Field(Record):
|
||||
name = String()
|
||||
# int, string, long, bool, float, double
|
||||
# int, string, long, bool, float, double, timestamp
|
||||
type = String()
|
||||
size = Integer()
|
||||
primary = Boolean()
|
||||
description = String()
|
||||
# NEW FIELDS for structured data:
|
||||
required = Boolean() # Whether field is required
|
||||
enum_values = Array(String()) # For enum type fields
|
||||
indexed = Boolean() # Whether field should be indexed
|
||||
|
||||
class RowSchema(Record):
|
||||
name = String()
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
metadata = Metadata()
|
||||
data = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
metadata = Metadata()
|
||||
text = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
metadata = Metadata()
|
||||
chunk = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
documents = Array(Bytes())
|
||||
|
||||
8
trustgraph-base/trustgraph/schema/knowledge/__init__.py
Normal file
8
trustgraph-base/trustgraph/schema/knowledge/__init__.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from .graph import *
|
||||
from .document import *
|
||||
from .embeddings import *
|
||||
from .knowledge import *
|
||||
from .nlp import *
|
||||
from .rows import *
|
||||
from .structured import *
|
||||
from .object import *
|
||||
29
trustgraph-base/trustgraph/schema/knowledge/document.py
Normal file
29
trustgraph-base/trustgraph/schema/knowledge/document.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
from pulsar.schema import Record, Bytes
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
metadata = Metadata()
|
||||
data = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
metadata = Metadata()
|
||||
text = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
metadata = Metadata()
|
||||
chunk = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,22 +1,8 @@
|
|||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map
|
||||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
|
||||
from . types import Error, Value, Triple
|
||||
from . topic import topic
|
||||
from . metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
# Entity context are an entity associated with textual context
|
||||
|
||||
class EntityContext(Record):
|
||||
entity = Value()
|
||||
context = String()
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class EntityContexts(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityContext())
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import Value, RowSchema
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -33,39 +19,38 @@ class GraphEmbeddings(Record):
|
|||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings query
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triples(Record):
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
triples = Array(Triple())
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Triples query
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
class ObjectEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
############################################################################
|
||||
|
||||
# Structured object embeddings with enhanced capabilities
|
||||
|
||||
class StructuredObjectEmbedding(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
schema_name = String()
|
||||
object_id = String() # Primary key value
|
||||
field_embeddings = Map(Array(Double())) # Per-field embeddings
|
||||
|
||||
############################################################################
|
||||
28
trustgraph-base/trustgraph/schema/knowledge/graph.py
Normal file
28
trustgraph-base/trustgraph/schema/knowledge/graph.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from pulsar.schema import Record, String, Array
|
||||
|
||||
from ..core.primitives import Value, Triple
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Entity context are an entity associated with textual context
|
||||
|
||||
class EntityContext(Record):
|
||||
entity = Value()
|
||||
context = String()
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class EntityContexts(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityContext())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triples(Record):
|
||||
metadata = Metadata()
|
||||
triples = Array(Triple())
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
|
||||
from . types import Triple
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
from . documents import Document, TextDocument
|
||||
from . graph import Triples, GraphEmbeddings
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
from .document import Document, TextDocument
|
||||
from .graph import Triples
|
||||
from .embeddings import GraphEmbeddings
|
||||
|
||||
# get-kg-core
|
||||
# -> (???)
|
||||
26
trustgraph-base/trustgraph/schema/knowledge/nlp.py
Normal file
26
trustgraph-base/trustgraph/schema/knowledge/nlp.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from pulsar.schema import Record, String, Boolean
|
||||
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# NLP extraction data types
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
17
trustgraph-base/trustgraph/schema/knowledge/object.py
Normal file
17
trustgraph-base/trustgraph/schema/knowledge/object.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from pulsar.schema import Record, String, Map, Double
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Extracted object from text processing
|
||||
|
||||
class ExtractedObject(Record):
|
||||
metadata = Metadata()
|
||||
schema_name = String() # Which schema this object belongs to
|
||||
values = Map(String()) # Field name -> value
|
||||
confidence = Double()
|
||||
source_span = String() # Text span where object was found
|
||||
|
||||
############################################################################
|
||||
16
trustgraph-base/trustgraph/schema/knowledge/rows.py
Normal file
16
trustgraph-base/trustgraph/schema/knowledge/rows.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
from pulsar.schema import Record, Array, Map, String
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import RowSchema
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
metadata = Metadata()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
|
||||
############################################################################
|
||||
17
trustgraph-base/trustgraph/schema/knowledge/structured.py
Normal file
17
trustgraph-base/trustgraph/schema/knowledge/structured.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from pulsar.schema import Record, String, Bytes, Map
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Structured data submission for fire-and-forget processing
|
||||
|
||||
class StructuredDataSubmission(Record):
|
||||
metadata = Metadata()
|
||||
format = String() # "json", "csv", "xml"
|
||||
schema_name = String() # Reference to schema in config
|
||||
data = Bytes() # Raw data to ingest
|
||||
options = Map(String()) # Format-specific options
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array
|
||||
from pulsar.schema import Double, Map
|
||||
|
||||
from . metadata import Metadata
|
||||
from . types import Value, RowSchema
|
||||
from . topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class ObjectEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
metadata = Metadata()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
|
||||
|
||||
|
||||
11
trustgraph-base/trustgraph/schema/services/__init__.py
Normal file
11
trustgraph-base/trustgraph/schema/services/__init__.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
from .llm import *
|
||||
from .retrieval import *
|
||||
from .query import *
|
||||
from .agent import *
|
||||
from .flow import *
|
||||
from .prompt import *
|
||||
from .config import *
|
||||
from .library import *
|
||||
from .lookup import *
|
||||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Map
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,10 +1,9 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long
|
||||
from . types import Triple
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
from . documents import Document, TextDocument
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
from ..knowledge.document import Document, TextDocument
|
||||
|
||||
# add-document
|
||||
# -> (document_id, document_metadata, content)
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Double, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -30,3 +30,22 @@ class EmbeddingsResponse(Record):
|
|||
error = Error()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
############################################################################
|
||||
|
||||
# Tool request/response
|
||||
|
||||
class ToolRequest(Record):
|
||||
name = String()
|
||||
|
||||
# Parameters are JSON encoded
|
||||
parameters = String()
|
||||
|
||||
class ToolResponse(Record):
|
||||
error = Error()
|
||||
|
||||
# Plain text aka "unstructured"
|
||||
text = String()
|
||||
|
||||
# JSON-encoded object aka "structured"
|
||||
object = String()
|
||||
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
|
||||
from pulsar.schema import Record, String
|
||||
|
||||
from . types import Error, Value, Triple
|
||||
from . topic import topic
|
||||
from . metadata import Metadata
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
22
trustgraph-base/trustgraph/schema/services/nlp_query.py
Normal file
22
trustgraph-base/trustgraph/schema/services/nlp_query.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from pulsar.schema import Record, String, Array, Map, Integer, Double
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# NLP to Structured Query Service - converts natural language to GraphQL
|
||||
|
||||
class NLPToStructuredQueryRequest(Record):
|
||||
natural_language_query = String()
|
||||
max_results = Integer()
|
||||
context_hints = Map(String()) # Optional context for query generation
|
||||
|
||||
class NLPToStructuredQueryResponse(Record):
|
||||
error = Error()
|
||||
graphql_query = String() # Generated GraphQL query
|
||||
variables = Map(String()) # GraphQL variables if any
|
||||
detected_schemas = Array(String()) # Which schemas the query targets
|
||||
confidence = Double()
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,32 +1,12 @@
|
|||
from pulsar.schema import Record, String, Map
|
||||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error, RowSchema
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
|
||||
# extract-definitions:
|
||||
# chunk -> definitions
|
||||
# extract-relationships:
|
||||
|
|
@ -55,5 +35,4 @@ class PromptResponse(Record):
|
|||
# JSON encoded
|
||||
object = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
############################################################################
|
||||
48
trustgraph-base/trustgraph/schema/services/query.py
Normal file
48
trustgraph-base/trustgraph/schema/services/query.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
from pulsar.schema import Record, String, Integer, Array, Double
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings query
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples query
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
user = String()
|
||||
collection = String()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
chunks = Array(String())
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error, Value
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error, Value
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
from pulsar.schema import Record, String, Map, Array
|
||||
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Structured Query Service - executes GraphQL queries
|
||||
|
||||
class StructuredQueryRequest(Record):
|
||||
query = String() # GraphQL query
|
||||
variables = Map(String()) # GraphQL variables
|
||||
operation_name = String() # Optional operation name for multi-operation documents
|
||||
|
||||
class StructuredQueryResponse(Record):
|
||||
error = Error()
|
||||
data = String() # JSON-encoded GraphQL response data
|
||||
errors = Array(String()) # GraphQL errors if any
|
||||
|
||||
############################################################################
|
||||
Loading…
Add table
Add a link
Reference in a new issue