mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-29 18:36:22 +02:00
Feature/pkgsplit (#83)
* Starting to spawn base package * More package hacking * Bedrock and VertexAI * Parquet split * Updated templates * Utils
This commit is contained in:
parent
3fb75c617b
commit
9b91d5eee3
262 changed files with 630 additions and 420 deletions
6
trustgraph-base/trustgraph/base/__init__.py
Normal file
6
trustgraph-base/trustgraph/base/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
from . base_processor import BaseProcessor
|
||||
from . consumer import Consumer
|
||||
from . producer import Producer
|
||||
from . consumer_producer import ConsumerProducer
|
||||
|
||||
119
trustgraph-base/trustgraph/base/base_processor.py
Normal file
119
trustgraph-base/trustgraph/base/base_processor.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
import os
|
||||
import argparse
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import time
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. log_level import LogLevel
|
||||
|
||||
class BaseProcessor:
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
self.client = None
|
||||
|
||||
if not hasattr(__class__, "params_metric"):
|
||||
__class__.params_metric = Info(
|
||||
'params', 'Parameters configuration'
|
||||
)
|
||||
|
||||
# FIXME: Maybe outputs information it should not
|
||||
__class__.params_metric.info({
|
||||
k: str(params[k])
|
||||
for k in params
|
||||
})
|
||||
|
||||
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
||||
log_level = params.get("log_level", LogLevel.INFO)
|
||||
|
||||
self.pulsar_host = pulsar_host
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if self.client:
|
||||
self.client.close()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=__class__.default_pulsar_host,
|
||||
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--metrics',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help=f'Metrics enabled (default: true)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-P', '--metrics-port',
|
||||
type=int,
|
||||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
def run(self):
|
||||
raise RuntimeError("Something should have implemented the run method")
|
||||
|
||||
@classmethod
|
||||
def start(cls, prog, doc):
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=prog,
|
||||
description=doc
|
||||
)
|
||||
|
||||
cls.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
print(args)
|
||||
|
||||
if args["metrics"]:
|
||||
start_http_server(args["metrics_port"])
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = cls(**args)
|
||||
p.run()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(type(e))
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(4)
|
||||
107
trustgraph-base/trustgraph/base/consumer.py
Normal file
107
trustgraph-base/trustgraph/base/consumer.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Histogram, Info, Counter, Enum
|
||||
import time
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
class Consumer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
if not hasattr(__class__, "state_metric"):
|
||||
__class__.state_metric = Enum(
|
||||
'processor_state', 'Processor state',
|
||||
states=['starting', 'running', 'stopped']
|
||||
)
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
super(Consumer, self).__init__(**params)
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
})
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
__class__.state_metric.state('running')
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except TooManyRequests:
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
print("TooManyRequests: will retry")
|
||||
__class__.processing_metric.labels(status="rate-limit").inc()
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser, default_input_queue, default_subscriber):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
139
trustgraph-base/trustgraph/base/consumer_producer.py
Normal file
139
trustgraph-base/trustgraph/base/consumer_producer.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Histogram, Info, Counter, Enum
|
||||
import time
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# FIXME: Derive from consumer? And producer?
|
||||
|
||||
class ConsumerProducer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
if not hasattr(__class__, "state_metric"):
|
||||
__class__.state_metric = Enum(
|
||||
'processor_state', 'Processor state',
|
||||
states=['starting', 'running', 'stopped']
|
||||
)
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
output_queue = params.get("output_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(ConsumerProducer, self).__init__(**params)
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
__class__.state_metric.state('running')
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
resp = self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except TooManyRequests:
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
print("TooManyRequests: will retry")
|
||||
__class__.processing_metric.labels(status="rate-limit").inc()
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
55
trustgraph-base/trustgraph/base/producer.py
Normal file
55
trustgraph-base/trustgraph/base/producer.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Info, Counter
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
|
||||
class Producer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
output_queue = params.get("output_queue")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"output_queue": output_queue,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(Producer, self).__init__(**params)
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
0
trustgraph-base/trustgraph/clients/__init__.py
Normal file
0
trustgraph-base/trustgraph/clients/__init__.py
Normal file
125
trustgraph-base/trustgraph/clients/base.py
Normal file
125
trustgraph-base/trustgraph/clients/base.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import hashlib
|
||||
import uuid
|
||||
import time
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
from .. exceptions import *
|
||||
|
||||
# Default timeout for a request/response. In seconds.
|
||||
DEFAULT_TIMEOUT=300
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class BaseClient:
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
input_schema=None,
|
||||
output_schema=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None: raise RuntimeError("Need input_queue")
|
||||
if output_queue == None: raise RuntimeError("Need output_queue")
|
||||
if input_schema == None: raise RuntimeError("Need input_schema")
|
||||
if output_schema == None: raise RuntimeError("Need output_schema")
|
||||
|
||||
if subscriber == None:
|
||||
subscriber = str(uuid.uuid4())
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=input_queue,
|
||||
schema=JsonSchema(input_schema),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
output_queue, subscriber,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
self.input_schema = input_schema
|
||||
self.output_schema = output_schema
|
||||
|
||||
def call(self, **args):
|
||||
|
||||
timeout = args.get("timeout", DEFAULT_TIMEOUT)
|
||||
|
||||
if "timeout" in args:
|
||||
del args["timeout"]
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
r = self.input_schema(**args)
|
||||
|
||||
end_time = time.time() + timeout
|
||||
|
||||
self.producer.send(r, properties={ "id": id })
|
||||
|
||||
while time.time() < end_time:
|
||||
|
||||
try:
|
||||
msg = self.consumer.receive(timeout_millis=2500)
|
||||
except pulsar.exceptions.Timeout:
|
||||
continue
|
||||
|
||||
mid = msg.properties()["id"]
|
||||
|
||||
if mid == id:
|
||||
|
||||
value = msg.value()
|
||||
|
||||
if value.error:
|
||||
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
if value.error.type == "llm-error":
|
||||
raise LlmError(value.error.message)
|
||||
|
||||
elif value.error.type == "too-many-requests":
|
||||
raise TooManyRequests(value.error.message)
|
||||
|
||||
elif value.error.type == "ParseError":
|
||||
raise ParseError(value.error.message)
|
||||
|
||||
else:
|
||||
|
||||
raise RuntimeError(
|
||||
f"{value.error.type}: {value.error.message}"
|
||||
)
|
||||
|
||||
resp = msg.value()
|
||||
self.consumer.acknowledge(msg)
|
||||
return resp
|
||||
|
||||
# Ignore messages with wrong ID
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
raise TimeoutError("Timed out waiting for response")
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if hasattr(self, "consumer"):
|
||||
self.consumer.close()
|
||||
|
||||
if hasattr(self, "producer"):
|
||||
self.producer.flush()
|
||||
self.producer.close()
|
||||
|
||||
self.client.close()
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. schema import document_embeddings_request_queue
|
||||
from .. schema import document_embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class DocumentEmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = document_embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = document_embeddings_response_queue
|
||||
|
||||
super(DocumentEmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=DocumentEmbeddingsRequest,
|
||||
output_schema=DocumentEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, vectors, limit=10, timeout=300):
|
||||
return self.call(
|
||||
vectors=vectors, limit=limit, timeout=timeout
|
||||
).documents
|
||||
|
||||
46
trustgraph-base/trustgraph/clients/document_rag_client.py
Normal file
46
trustgraph-base/trustgraph/clients/document_rag_client.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import DocumentRagQuery, DocumentRagResponse
|
||||
from .. schema import document_rag_request_queue, document_rag_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class DocumentRagClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = document_rag_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = document_rag_response_queue
|
||||
|
||||
super(DocumentRagClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=DocumentRagQuery,
|
||||
output_schema=DocumentRagResponse,
|
||||
)
|
||||
|
||||
def request(self, query, timeout=500):
|
||||
|
||||
return self.call(
|
||||
query=query, timeout=timeout
|
||||
).response
|
||||
|
||||
44
trustgraph-base/trustgraph/clients/embeddings_client.py
Normal file
44
trustgraph-base/trustgraph/clients/embeddings_client.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from .. schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from .. schema import embeddings_request_queue, embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
import _pulsar
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class EmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
subscriber=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue=embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue=embeddings_response_queue
|
||||
|
||||
super(EmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=EmbeddingsRequest,
|
||||
output_schema=EmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, text, timeout=300):
|
||||
return self.call(text=text, timeout=timeout).vectors
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import graph_embeddings_request_queue
|
||||
from .. schema import graph_embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class GraphEmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = graph_embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = graph_embeddings_response_queue
|
||||
|
||||
super(GraphEmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=GraphEmbeddingsRequest,
|
||||
output_schema=GraphEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, vectors, limit=10, timeout=300):
|
||||
return self.call(
|
||||
vectors=vectors, limit=limit, timeout=timeout
|
||||
).entities
|
||||
|
||||
46
trustgraph-base/trustgraph/clients/graph_rag_client.py
Normal file
46
trustgraph-base/trustgraph/clients/graph_rag_client.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import GraphRagQuery, GraphRagResponse
|
||||
from .. schema import graph_rag_request_queue, graph_rag_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class GraphRagClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = graph_rag_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = graph_rag_response_queue
|
||||
|
||||
super(GraphRagClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=GraphRagQuery,
|
||||
output_schema=GraphRagResponse,
|
||||
)
|
||||
|
||||
def request(self, query, timeout=500):
|
||||
|
||||
return self.call(
|
||||
query=query, timeout=timeout
|
||||
).response
|
||||
|
||||
40
trustgraph-base/trustgraph/clients/llm_client.py
Normal file
40
trustgraph-base/trustgraph/clients/llm_client.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .. schema import text_completion_request_queue
|
||||
from .. schema import text_completion_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class LlmClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue is None: input_queue = text_completion_request_queue
|
||||
if output_queue is None: output_queue = text_completion_response_queue
|
||||
|
||||
super(LlmClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=TextCompletionRequest,
|
||||
output_schema=TextCompletionResponse,
|
||||
)
|
||||
|
||||
def request(self, prompt, timeout=300):
|
||||
return self.call(prompt=prompt, timeout=timeout).response
|
||||
|
||||
100
trustgraph-base/trustgraph/clients/prompt_client.py
Normal file
100
trustgraph-base/trustgraph/clients/prompt_client.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import PromptRequest, PromptResponse, Fact, RowSchema, Field
|
||||
from .. schema import prompt_request_queue
|
||||
from .. schema import prompt_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class PromptClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = prompt_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = prompt_response_queue
|
||||
|
||||
super(PromptClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=PromptRequest,
|
||||
output_schema=PromptResponse,
|
||||
)
|
||||
|
||||
def request_definitions(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-definitions", chunk=chunk,
|
||||
timeout=timeout
|
||||
).definitions
|
||||
|
||||
def request_topics(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-topics", chunk=chunk,
|
||||
timeout=timeout
|
||||
).topics
|
||||
|
||||
def request_relationships(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-relationships", chunk=chunk,
|
||||
timeout=timeout
|
||||
).relationships
|
||||
|
||||
def request_rows(self, schema, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-rows", chunk=chunk,
|
||||
row_schema=RowSchema(
|
||||
name=schema.name,
|
||||
description=schema.description,
|
||||
fields=[
|
||||
Field(
|
||||
name=f.name, type=str(f.type), size=f.size,
|
||||
primary=f.primary, description=f.description,
|
||||
)
|
||||
for f in schema.fields
|
||||
]
|
||||
),
|
||||
timeout=timeout
|
||||
).rows
|
||||
|
||||
def request_kg_prompt(self, query, kg, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="kg-prompt",
|
||||
query=query,
|
||||
kg=[
|
||||
Fact(s=v[0], p=v[1], o=v[2])
|
||||
for v in kg
|
||||
],
|
||||
timeout=timeout
|
||||
).answer
|
||||
|
||||
def request_document_prompt(self, query, documents, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="document-prompt",
|
||||
query=query,
|
||||
documents=documents,
|
||||
timeout=timeout
|
||||
).answer
|
||||
|
||||
59
trustgraph-base/trustgraph/clients/triples_query_client.py
Normal file
59
trustgraph-base/trustgraph/clients/triples_query_client.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
||||
from .. schema import triples_request_queue
|
||||
from .. schema import triples_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class TriplesQueryClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = triples_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = triples_response_queue
|
||||
|
||||
super(TriplesQueryClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=TriplesQueryRequest,
|
||||
output_schema=TriplesQueryResponse,
|
||||
)
|
||||
|
||||
def create_value(self, ent):
|
||||
|
||||
if ent == None: return None
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
|
||||
return Value(value=ent, is_uri=False)
|
||||
|
||||
def request(self, s, p, o, limit=10, timeout=60):
|
||||
return self.call(
|
||||
s=self.create_value(s),
|
||||
p=self.create_value(p),
|
||||
o=self.create_value(o),
|
||||
limit=limit,
|
||||
timeout=timeout,
|
||||
).triples
|
||||
|
||||
14
trustgraph-base/trustgraph/exceptions.py
Normal file
14
trustgraph-base/trustgraph/exceptions.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
class TooManyRequests(Exception):
|
||||
pass
|
||||
|
||||
class LlmError(Exception):
|
||||
pass
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
20
trustgraph-base/trustgraph/log_level.py
Normal file
20
trustgraph-base/trustgraph/log_level.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
from enum import Enum
|
||||
import _pulsar
|
||||
|
||||
class LogLevel(Enum):
|
||||
DEBUG = 'debug'
|
||||
INFO = 'info'
|
||||
WARN = 'warn'
|
||||
ERROR = 'error'
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
def to_pulsar(self):
|
||||
if self == LogLevel.DEBUG: return _pulsar.LoggerLevel.Debug
|
||||
if self == LogLevel.INFO: return _pulsar.LoggerLevel.Info
|
||||
if self == LogLevel.WARN: return _pulsar.LoggerLevel.Warn
|
||||
if self == LogLevel.ERROR: return _pulsar.LoggerLevel.Error
|
||||
raise RuntimeError("Log level mismatch")
|
||||
|
||||
0
trustgraph-base/trustgraph/objects/__init__.py
Normal file
0
trustgraph-base/trustgraph/objects/__init__.py
Normal file
72
trustgraph-base/trustgraph/objects/field.py
Normal file
72
trustgraph-base/trustgraph/objects/field.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
class FieldType(Enum):
|
||||
STRING = 0
|
||||
INT = 1
|
||||
LONG = 2
|
||||
BOOL = 3
|
||||
FLOAT = 4
|
||||
DOUBLE = 5
|
||||
|
||||
def __str__(self):
|
||||
return self.name.lower()
|
||||
|
||||
@dataclass
|
||||
class Field:
|
||||
name: str
|
||||
size: int = -1
|
||||
primary: bool = False
|
||||
type: str = "undefined"
|
||||
description: str = ""
|
||||
|
||||
@staticmethod
|
||||
def parse(defn):
|
||||
|
||||
if defn == "" or defn is None:
|
||||
raise RuntimeError("Field definition cannot be empty")
|
||||
|
||||
parts = defn.split(":")
|
||||
|
||||
if len(parts) == 0:
|
||||
raise RuntimeError("Field definition cannot be empty")
|
||||
|
||||
if len(parts) == 1: parts.append("string")
|
||||
if len(parts) == 2: parts.append("0")
|
||||
if len(parts) == 3: parts.append("")
|
||||
if len(parts) == 4: parts.append("")
|
||||
|
||||
name, type, size, pri, description = parts
|
||||
|
||||
size = int(size)
|
||||
|
||||
try:
|
||||
type = FieldType[type.upper()]
|
||||
except:
|
||||
raise RuntimeError(f"Field type {type} is not known")
|
||||
|
||||
pri = True if pri == "pri" else False
|
||||
|
||||
return Field(
|
||||
name=name, type=type, size=size, primary=pri,
|
||||
description=description
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
name = self.name
|
||||
type = self.type
|
||||
size = self.size
|
||||
pri = "pri" if self.primary else ""
|
||||
description = self.description
|
||||
|
||||
return f"{name}:{type}:{size}:{pri}:{description}"
|
||||
|
||||
def __str__(self):
|
||||
name = self.name
|
||||
type = self.type
|
||||
size = self.size
|
||||
pri = "pri" if self.primary else ""
|
||||
description = self.description
|
||||
|
||||
return f"{name}:{type}:{size}:{pri}:{description}"
|
||||
8
trustgraph-base/trustgraph/objects/object.py
Normal file
8
trustgraph-base/trustgraph/objects/object.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
|
||||
class Schema:
|
||||
def __init__(self, name, description, fields):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.fields = fields
|
||||
|
||||
|
||||
6
trustgraph-base/trustgraph/rdf.py
Normal file
6
trustgraph-base/trustgraph/rdf.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
RDF_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
||||
DEFINITION = "http://www.w3.org/2004/02/skos/core#definition"
|
||||
|
||||
TRUSTGRAPH_ENTITIES = "http://trustgraph.ai/e/"
|
||||
|
||||
12
trustgraph-base/trustgraph/schema/__init__.py
Normal file
12
trustgraph-base/trustgraph/schema/__init__.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
from . types import *
|
||||
from . prompt import *
|
||||
from . documents import *
|
||||
from . models import *
|
||||
from . object import *
|
||||
from . topic import *
|
||||
from . graph import *
|
||||
from . retrieval import *
|
||||
|
||||
|
||||
|
||||
68
trustgraph-base/trustgraph/schema/documents.py
Normal file
68
trustgraph-base/trustgraph/schema/documents.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
|
||||
class Source(Record):
|
||||
source = String()
|
||||
id = String()
|
||||
title = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
source = Source()
|
||||
data = Bytes()
|
||||
|
||||
document_ingest_queue = topic('document-load')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
source = Source()
|
||||
text = Bytes()
|
||||
|
||||
text_ingest_queue = topic('text-document-load')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
source = Source()
|
||||
chunk = Bytes()
|
||||
|
||||
chunk_ingest_queue = topic('chunk-load')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunk embeddings are an embeddings associated with a text chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
chunk = Bytes()
|
||||
|
||||
chunk_embeddings_ingest_queue = topic('chunk-embeddings-load')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
documents = Array(Bytes())
|
||||
|
||||
document_embeddings_request_queue = topic(
|
||||
'doc-embeddings', kind='non-persistent', namespace='request'
|
||||
)
|
||||
document_embeddings_response_queue = topic(
|
||||
'doc-embeddings-response', kind='non-persistent', namespace='response',
|
||||
)
|
||||
69
trustgraph-base/trustgraph/schema/graph.py
Normal file
69
trustgraph-base/trustgraph/schema/graph.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
|
||||
from . documents import Source
|
||||
from . types import Error, Value
|
||||
from . topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings are embeddings associated with a graph entity
|
||||
|
||||
class GraphEmbeddings(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
entity = Value()
|
||||
|
||||
graph_embeddings_store_queue = topic('graph-embeddings-store')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings query
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
|
||||
graph_embeddings_request_queue = topic(
|
||||
'graph-embeddings', kind='non-persistent', namespace='request'
|
||||
)
|
||||
graph_embeddings_response_queue = topic(
|
||||
'graph-embeddings-response', kind='non-persistent', namespace='response',
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triple(Record):
|
||||
source = Source()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
|
||||
triples_store_queue = topic('triples-store')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Triples query
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
|
||||
triples_request_queue = topic(
|
||||
'triples', kind='non-persistent', namespace='request'
|
||||
)
|
||||
triples_response_queue = topic(
|
||||
'triples-response', kind='non-persistent', namespace='response',
|
||||
)
|
||||
44
trustgraph-base/trustgraph/schema/models.py
Normal file
44
trustgraph-base/trustgraph/schema/models.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Double, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
# LLM text completion
|
||||
|
||||
class TextCompletionRequest(Record):
|
||||
prompt = String()
|
||||
|
||||
class TextCompletionResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
in_token = Integer()
|
||||
out_token = Integer()
|
||||
model = String()
|
||||
|
||||
text_completion_request_queue = topic(
|
||||
'text-completion', kind='non-persistent', namespace='request'
|
||||
)
|
||||
text_completion_response_queue = topic(
|
||||
'text-completion-response', kind='non-persistent', namespace='response',
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Embeddings
|
||||
|
||||
class EmbeddingsRequest(Record):
|
||||
text = String()
|
||||
|
||||
class EmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
embeddings_request_queue = topic(
|
||||
'embeddings', kind='non-persistent', namespace='request'
|
||||
)
|
||||
embeddings_response_queue = topic(
|
||||
'embeddings-response', kind='non-persistent', namespace='response'
|
||||
)
|
||||
33
trustgraph-base/trustgraph/schema/object.py
Normal file
33
trustgraph-base/trustgraph/schema/object.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array
|
||||
from pulsar.schema import Double, Map
|
||||
|
||||
from . documents import Source
|
||||
from . types import Value, RowSchema
|
||||
from . topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class ObjectEmbeddings(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
|
||||
object_embeddings_store_queue = topic('object-embeddings-store')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
source = Source()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
|
||||
rows_store_queue = topic('rows-store')
|
||||
|
||||
65
trustgraph-base/trustgraph/schema/prompt.py
Normal file
65
trustgraph-base/trustgraph/schema/prompt.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error, RowSchema
|
||||
|
||||
############################################################################
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
|
||||
# extract-definitions:
|
||||
# chunk -> definitions
|
||||
# extract-relationships:
|
||||
# chunk -> relationships
|
||||
# kg-prompt:
|
||||
# query, triples -> answer
|
||||
# document-prompt:
|
||||
# query, documents -> answer
|
||||
# extract-rows
|
||||
# schema, chunk -> rows
|
||||
|
||||
class PromptRequest(Record):
|
||||
kind = String()
|
||||
chunk = String()
|
||||
query = String()
|
||||
kg = Array(Fact())
|
||||
documents = Array(Bytes())
|
||||
row_schema = RowSchema()
|
||||
|
||||
class PromptResponse(Record):
|
||||
error = Error()
|
||||
answer = String()
|
||||
definitions = Array(Definition())
|
||||
topics = Array(Topic())
|
||||
relationships = Array(Relationship())
|
||||
rows = Array(Map(String()))
|
||||
|
||||
prompt_request_queue = topic(
|
||||
'prompt', kind='non-persistent', namespace='request'
|
||||
)
|
||||
prompt_response_queue = topic(
|
||||
'prompt-response', kind='non-persistent', namespace='response'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
40
trustgraph-base/trustgraph/schema/retrieval.py
Normal file
40
trustgraph-base/trustgraph/schema/retrieval.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error, Value
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph RAG text retrieval
|
||||
|
||||
class GraphRagQuery(Record):
|
||||
query = String()
|
||||
|
||||
class GraphRagResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
|
||||
graph_rag_request_queue = topic(
|
||||
'graph-rag', kind='non-persistent', namespace='request'
|
||||
)
|
||||
graph_rag_response_queue = topic(
|
||||
'graph-rag-response', kind='non-persistent', namespace='response'
|
||||
)
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document RAG text retrieval
|
||||
|
||||
class DocumentRagQuery(Record):
|
||||
query = String()
|
||||
|
||||
class DocumentRagResponse(Record):
|
||||
error = Error()
|
||||
response = String()
|
||||
|
||||
document_rag_request_queue = topic(
|
||||
'doc-rag', kind='non-persistent', namespace='request'
|
||||
)
|
||||
document_rag_response_queue = topic(
|
||||
'doc-rag-response', kind='non-persistent', namespace='response'
|
||||
)
|
||||
4
trustgraph-base/trustgraph/schema/topic.py
Normal file
4
trustgraph-base/trustgraph/schema/topic.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
def topic(topic, kind='persistent', tenant='tg', namespace='flow'):
|
||||
return f"{kind}://{tenant}/{namespace}/{topic}"
|
||||
|
||||
25
trustgraph-base/trustgraph/schema/types.py
Normal file
25
trustgraph-base/trustgraph/schema/types.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
from pulsar.schema import Record, String, Boolean, Array, Integer
|
||||
|
||||
class Error(Record):
|
||||
type = String()
|
||||
message = String()
|
||||
|
||||
class Value(Record):
|
||||
value = String()
|
||||
is_uri = Boolean()
|
||||
type = String()
|
||||
|
||||
class Field(Record):
|
||||
name = String()
|
||||
# int, string, long, bool, float, double
|
||||
type = String()
|
||||
size = Integer()
|
||||
primary = Boolean()
|
||||
description = String()
|
||||
|
||||
class RowSchema(Record):
|
||||
name = String()
|
||||
description = String()
|
||||
fields = Array(Field())
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue