mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-20 04:45:12 +02:00
Feature/pkgsplit (#83)
* Starting to spawn base package * More package hacking * Bedrock and VertexAI * Parquet split * Updated templates * Utils
This commit is contained in:
parent
3fb75c617b
commit
9b91d5eee3
262 changed files with 630 additions and 420 deletions
0
trustgraph-base/trustgraph/clients/__init__.py
Normal file
0
trustgraph-base/trustgraph/clients/__init__.py
Normal file
125
trustgraph-base/trustgraph/clients/base.py
Normal file
125
trustgraph-base/trustgraph/clients/base.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import hashlib
|
||||
import uuid
|
||||
import time
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
from .. exceptions import *
|
||||
|
||||
# Default timeout for a request/response. In seconds.
|
||||
DEFAULT_TIMEOUT=300
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class BaseClient:
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
input_schema=None,
|
||||
output_schema=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None: raise RuntimeError("Need input_queue")
|
||||
if output_queue == None: raise RuntimeError("Need output_queue")
|
||||
if input_schema == None: raise RuntimeError("Need input_schema")
|
||||
if output_schema == None: raise RuntimeError("Need output_schema")
|
||||
|
||||
if subscriber == None:
|
||||
subscriber = str(uuid.uuid4())
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=input_queue,
|
||||
schema=JsonSchema(input_schema),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
output_queue, subscriber,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
self.input_schema = input_schema
|
||||
self.output_schema = output_schema
|
||||
|
||||
def call(self, **args):
|
||||
|
||||
timeout = args.get("timeout", DEFAULT_TIMEOUT)
|
||||
|
||||
if "timeout" in args:
|
||||
del args["timeout"]
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
r = self.input_schema(**args)
|
||||
|
||||
end_time = time.time() + timeout
|
||||
|
||||
self.producer.send(r, properties={ "id": id })
|
||||
|
||||
while time.time() < end_time:
|
||||
|
||||
try:
|
||||
msg = self.consumer.receive(timeout_millis=2500)
|
||||
except pulsar.exceptions.Timeout:
|
||||
continue
|
||||
|
||||
mid = msg.properties()["id"]
|
||||
|
||||
if mid == id:
|
||||
|
||||
value = msg.value()
|
||||
|
||||
if value.error:
|
||||
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
if value.error.type == "llm-error":
|
||||
raise LlmError(value.error.message)
|
||||
|
||||
elif value.error.type == "too-many-requests":
|
||||
raise TooManyRequests(value.error.message)
|
||||
|
||||
elif value.error.type == "ParseError":
|
||||
raise ParseError(value.error.message)
|
||||
|
||||
else:
|
||||
|
||||
raise RuntimeError(
|
||||
f"{value.error.type}: {value.error.message}"
|
||||
)
|
||||
|
||||
resp = msg.value()
|
||||
self.consumer.acknowledge(msg)
|
||||
return resp
|
||||
|
||||
# Ignore messages with wrong ID
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
raise TimeoutError("Timed out waiting for response")
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if hasattr(self, "consumer"):
|
||||
self.consumer.close()
|
||||
|
||||
if hasattr(self, "producer"):
|
||||
self.producer.flush()
|
||||
self.producer.close()
|
||||
|
||||
self.client.close()
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. schema import document_embeddings_request_queue
|
||||
from .. schema import document_embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class DocumentEmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = document_embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = document_embeddings_response_queue
|
||||
|
||||
super(DocumentEmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=DocumentEmbeddingsRequest,
|
||||
output_schema=DocumentEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, vectors, limit=10, timeout=300):
|
||||
return self.call(
|
||||
vectors=vectors, limit=limit, timeout=timeout
|
||||
).documents
|
||||
|
||||
46
trustgraph-base/trustgraph/clients/document_rag_client.py
Normal file
46
trustgraph-base/trustgraph/clients/document_rag_client.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import DocumentRagQuery, DocumentRagResponse
|
||||
from .. schema import document_rag_request_queue, document_rag_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class DocumentRagClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = document_rag_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = document_rag_response_queue
|
||||
|
||||
super(DocumentRagClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=DocumentRagQuery,
|
||||
output_schema=DocumentRagResponse,
|
||||
)
|
||||
|
||||
def request(self, query, timeout=500):
|
||||
|
||||
return self.call(
|
||||
query=query, timeout=timeout
|
||||
).response
|
||||
|
||||
44
trustgraph-base/trustgraph/clients/embeddings_client.py
Normal file
44
trustgraph-base/trustgraph/clients/embeddings_client.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from .. schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from .. schema import embeddings_request_queue, embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
import _pulsar
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class EmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
subscriber=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue=embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue=embeddings_response_queue
|
||||
|
||||
super(EmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=EmbeddingsRequest,
|
||||
output_schema=EmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, text, timeout=300):
|
||||
return self.call(text=text, timeout=timeout).vectors
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import graph_embeddings_request_queue
|
||||
from .. schema import graph_embeddings_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class GraphEmbeddingsClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = graph_embeddings_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = graph_embeddings_response_queue
|
||||
|
||||
super(GraphEmbeddingsClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=GraphEmbeddingsRequest,
|
||||
output_schema=GraphEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, vectors, limit=10, timeout=300):
|
||||
return self.call(
|
||||
vectors=vectors, limit=limit, timeout=timeout
|
||||
).entities
|
||||
|
||||
46
trustgraph-base/trustgraph/clients/graph_rag_client.py
Normal file
46
trustgraph-base/trustgraph/clients/graph_rag_client.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import GraphRagQuery, GraphRagResponse
|
||||
from .. schema import graph_rag_request_queue, graph_rag_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class GraphRagClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = graph_rag_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = graph_rag_response_queue
|
||||
|
||||
super(GraphRagClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=GraphRagQuery,
|
||||
output_schema=GraphRagResponse,
|
||||
)
|
||||
|
||||
def request(self, query, timeout=500):
|
||||
|
||||
return self.call(
|
||||
query=query, timeout=timeout
|
||||
).response
|
||||
|
||||
40
trustgraph-base/trustgraph/clients/llm_client.py
Normal file
40
trustgraph-base/trustgraph/clients/llm_client.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .. schema import text_completion_request_queue
|
||||
from .. schema import text_completion_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class LlmClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue is None: input_queue = text_completion_request_queue
|
||||
if output_queue is None: output_queue = text_completion_response_queue
|
||||
|
||||
super(LlmClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=TextCompletionRequest,
|
||||
output_schema=TextCompletionResponse,
|
||||
)
|
||||
|
||||
def request(self, prompt, timeout=300):
|
||||
return self.call(prompt=prompt, timeout=timeout).response
|
||||
|
||||
100
trustgraph-base/trustgraph/clients/prompt_client.py
Normal file
100
trustgraph-base/trustgraph/clients/prompt_client.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import PromptRequest, PromptResponse, Fact, RowSchema, Field
|
||||
from .. schema import prompt_request_queue
|
||||
from .. schema import prompt_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class PromptClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = prompt_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = prompt_response_queue
|
||||
|
||||
super(PromptClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=PromptRequest,
|
||||
output_schema=PromptResponse,
|
||||
)
|
||||
|
||||
def request_definitions(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-definitions", chunk=chunk,
|
||||
timeout=timeout
|
||||
).definitions
|
||||
|
||||
def request_topics(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-topics", chunk=chunk,
|
||||
timeout=timeout
|
||||
).topics
|
||||
|
||||
def request_relationships(self, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-relationships", chunk=chunk,
|
||||
timeout=timeout
|
||||
).relationships
|
||||
|
||||
def request_rows(self, schema, chunk, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="extract-rows", chunk=chunk,
|
||||
row_schema=RowSchema(
|
||||
name=schema.name,
|
||||
description=schema.description,
|
||||
fields=[
|
||||
Field(
|
||||
name=f.name, type=str(f.type), size=f.size,
|
||||
primary=f.primary, description=f.description,
|
||||
)
|
||||
for f in schema.fields
|
||||
]
|
||||
),
|
||||
timeout=timeout
|
||||
).rows
|
||||
|
||||
def request_kg_prompt(self, query, kg, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="kg-prompt",
|
||||
query=query,
|
||||
kg=[
|
||||
Fact(s=v[0], p=v[1], o=v[2])
|
||||
for v in kg
|
||||
],
|
||||
timeout=timeout
|
||||
).answer
|
||||
|
||||
def request_document_prompt(self, query, documents, timeout=300):
|
||||
|
||||
return self.call(
|
||||
kind="document-prompt",
|
||||
query=query,
|
||||
documents=documents,
|
||||
timeout=timeout
|
||||
).answer
|
||||
|
||||
59
trustgraph-base/trustgraph/clients/triples_query_client.py
Normal file
59
trustgraph-base/trustgraph/clients/triples_query_client.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import _pulsar
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
||||
from .. schema import triples_request_queue
|
||||
from .. schema import triples_response_queue
|
||||
from . base import BaseClient
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class TriplesQueryClient(BaseClient):
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR,
|
||||
subscriber=None,
|
||||
input_queue=None,
|
||||
output_queue=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if input_queue == None:
|
||||
input_queue = triples_request_queue
|
||||
|
||||
if output_queue == None:
|
||||
output_queue = triples_response_queue
|
||||
|
||||
super(TriplesQueryClient, self).__init__(
|
||||
log_level=log_level,
|
||||
subscriber=subscriber,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
pulsar_host=pulsar_host,
|
||||
input_schema=TriplesQueryRequest,
|
||||
output_schema=TriplesQueryResponse,
|
||||
)
|
||||
|
||||
def create_value(self, ent):
|
||||
|
||||
if ent == None: return None
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
|
||||
return Value(value=ent, is_uri=False)
|
||||
|
||||
def request(self, s, p, o, limit=10, timeout=60):
|
||||
return self.call(
|
||||
s=self.create_value(s),
|
||||
p=self.create_value(p),
|
||||
o=self.create_value(o),
|
||||
limit=limit,
|
||||
timeout=timeout,
|
||||
).triples
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue