mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-05 19:32:11 +02:00
Fix startup
This commit is contained in:
parent
0b08c930da
commit
9216e47da2
13 changed files with 189 additions and 220 deletions
|
|
@ -17,25 +17,22 @@ default_subscriber = 'chunker-recursive'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
chunk_size=2000,
|
||||
chunk_overlap=100,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
chunk_size = params.get("chunk_size", 2000)
|
||||
chunk_overlap = params.get("chunk_overlap", 100)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=TextDocument,
|
||||
output_schema=Chunk,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": TextDocument,
|
||||
"output_schema": Chunk,
|
||||
}
|
||||
)
|
||||
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
|
|
|
|||
|
|
@ -18,23 +18,20 @@ default_subscriber = 'pdf-decoder'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=Document,
|
||||
output_schema=TextDocument,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Document,
|
||||
"output_schema": TextDocument,
|
||||
}
|
||||
)
|
||||
|
||||
print("PDF inited")
|
||||
|
|
|
|||
|
|
@ -17,24 +17,21 @@ default_model="all-MiniLM-L6-v2"
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
model=default_model,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
model = params.get("model", default_model)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=EmbeddingsRequest,
|
||||
output_schema=EmbeddingsResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": EmbeddingsRequest,
|
||||
"output_schema": EmbeddingsResponse,
|
||||
}
|
||||
)
|
||||
|
||||
self.embeddings = HuggingFaceEmbeddings(model_name=model)
|
||||
|
|
|
|||
|
|
@ -17,25 +17,20 @@ default_ollama = 'http://localhost:11434'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
model=default_model,
|
||||
ollama=default_ollama,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=EmbeddingsRequest,
|
||||
output_schema=EmbeddingsResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": EmbeddingsRequest,
|
||||
"output_schema": EmbeddingsResponse,
|
||||
}
|
||||
)
|
||||
|
||||
self.embeddings = OllamaEmbeddings(base_url=ollama, model=model)
|
||||
|
|
|
|||
|
|
@ -15,26 +15,23 @@ default_subscriber = 'embeddings-vectorizer'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=Chunk,
|
||||
output_schema=VectorsChunk,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Chunk,
|
||||
"output_schema": VectorsChunk,
|
||||
}
|
||||
)
|
||||
|
||||
self.embeddings = EmbeddingsClient(pulsar_host=pulsar_host)
|
||||
self.embeddings = EmbeddingsClient(pulsar_host=self.pulsar_host)
|
||||
|
||||
def emit(self, source, chunk, vectors):
|
||||
|
||||
|
|
|
|||
|
|
@ -20,27 +20,22 @@ default_graph_host='localhost'
|
|||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
subscriber=default_subscriber,
|
||||
graph_host=default_graph_host,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
graph_host = params.get("graph_host", default_graph_host)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=Triple,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Triple,
|
||||
}
|
||||
)
|
||||
|
||||
self.tg = TrustGraph([graph_host])
|
||||
|
||||
self.count = 0
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
|
@ -51,11 +46,6 @@ class Processor(Consumer):
|
|||
v.o.value
|
||||
)
|
||||
|
||||
self.count += 1
|
||||
|
||||
if (self.count % 1000) == 0:
|
||||
print(self.count, "...", flush=True)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
|
|||
|
|
@ -22,23 +22,20 @@ default_subscriber = 'kg-extract-definitions'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=VectorsChunk,
|
||||
output_schema=Triple,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk,
|
||||
"output_schema": Triple,
|
||||
}
|
||||
)
|
||||
|
||||
self.llm = LlmClient(pulsar_host=pulsar_host)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ graph edges.
|
|||
|
||||
import urllib.parse
|
||||
import json
|
||||
import os
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
from ... schema import VectorsChunk, Triple, VectorsAssociation, Source, Value
|
||||
|
|
@ -25,24 +26,21 @@ default_vector_queue='vectors-load'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
vector_queue=default_vector_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
vector_queue = params.get("vector_queue", default_vector_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=VectorsChunk,
|
||||
output_schema=Triple,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk,
|
||||
"output_schema": Triple,
|
||||
}
|
||||
)
|
||||
|
||||
self.vec_prod = self.client.create_producer(
|
||||
|
|
@ -50,7 +48,17 @@ class Processor(ConsumerProducer):
|
|||
schema=JsonSchema(VectorsAssociation),
|
||||
)
|
||||
|
||||
self.llm = LlmClient(pulsar_host=pulsar_host)
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"vector_queue": vector_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk.__name__,
|
||||
"output_schema": Triple.__name__,
|
||||
"vector_schema": VectorsAssociation.__name__,
|
||||
})
|
||||
|
||||
self.llm = LlmClient(pulsar_host=self.pulsar_host)
|
||||
|
||||
def to_uri(self, text):
|
||||
|
||||
|
|
|
|||
|
|
@ -17,25 +17,22 @@ default_subscriber = 'llm-azure-text'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
endpoint=None,
|
||||
token=None,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
endpoint = params.get("endpoint")
|
||||
token = params.get("token")
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=TextCompletionRequest,
|
||||
output_schema=TextCompletionResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": TextCompletionRequest,
|
||||
"output_schema": TextCompletionResponse,
|
||||
}
|
||||
)
|
||||
|
||||
self.endpoint = endpoint
|
||||
|
|
|
|||
|
|
@ -15,27 +15,25 @@ default_output_queue = 'llm-complete-text-response'
|
|||
default_subscriber = 'llm-claude-text'
|
||||
default_model = 'claude-3-5-sonnet-20240620'
|
||||
|
||||
class Processor:
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
model=default_model,
|
||||
api_key="",
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
model = params.get("model", default_model)
|
||||
api_key = params.get("api_key")
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=TextCompletionRequest,
|
||||
output_schema=TextCompletionResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": TextCompletionRequest,
|
||||
"output_schema": TextCompletionResponse,
|
||||
"model": model,
|
||||
}
|
||||
)
|
||||
|
||||
self.model = model
|
||||
|
|
|
|||
|
|
@ -31,26 +31,23 @@ default_subscriber = 'llm-vertexai-text'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
region="us-west1",
|
||||
model="gemini-1.0-pro-001",
|
||||
private_key=None,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
region = params.get("region", "us-west1")
|
||||
model = params.get("model", "gemini-1.0-pro-001")
|
||||
private_key = params.get("private_key")
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=TextCompletionRequest,
|
||||
output_schema=TextCompletionResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": TextCompletionRequest,
|
||||
"output_schema": TextCompletionResponse,
|
||||
}
|
||||
)
|
||||
|
||||
self.parameters = {
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ default_vector_store = 'http://localhost:19530'
|
|||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
output_queue=default_output_queue,
|
||||
subscriber=default_subscriber,
|
||||
log_level=LogLevel.INFO,
|
||||
graph_hosts=default_graph_hosts,
|
||||
vector_store=default_vector_store,
|
||||
entity_limit=50,
|
||||
triple_limit=30,
|
||||
max_subgraph_size=3000,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
output_queue = params.get("output_queue", default_output_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
graph_hosts = params.get("graph_hosts", default_graph_hosts)
|
||||
vector_store = params.get("vector_store", default_vector_store)
|
||||
entity_limit = params.get("entity_limit", 50)
|
||||
triple_limit = params.get("triple_limit", 30)
|
||||
max_subgraph_size = params.get("max_subgraph_size", 3000)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
output_queue=output_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=GraphRagQuery,
|
||||
output_schema=GraphRagResponse,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": GraphRagQuery,
|
||||
"output_schema": GraphRagResponse,
|
||||
"entity_limit": entity_limit,
|
||||
"triple_limit": triple_limit,
|
||||
"max_subgraph_size": max_subgraph_size,
|
||||
}
|
||||
)
|
||||
|
||||
self.rag = GraphRag(
|
||||
pulsar_host=pulsar_host,
|
||||
pulsar_host=self.pulsar_host,
|
||||
graph_hosts=graph_hosts.split(","),
|
||||
vector_store=vector_store,
|
||||
verbose=True,
|
||||
|
|
|
|||
|
|
@ -14,21 +14,19 @@ default_store_uri = 'http://localhost:19530'
|
|||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host=None,
|
||||
input_queue=default_input_queue,
|
||||
subscriber=default_subscriber,
|
||||
store_uri=default_store_uri,
|
||||
log_level=LogLevel.INFO,
|
||||
):
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
pulsar_host=pulsar_host,
|
||||
log_level=log_level,
|
||||
input_queue=input_queue,
|
||||
subscriber=subscriber,
|
||||
input_schema=VectorsAssociation,
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsAssociation,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.vecstore = TripleVectors(store_uri)
|
||||
|
|
@ -40,6 +38,7 @@ class Processor(Consumer):
|
|||
if v.entity.value != "":
|
||||
for vec in v.vectors:
|
||||
self.vecstore.insert(vec, v.entity.value)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue