mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Refactor names (#4)
- Downsize embeddings model to mini-lm in docker-compose files - Rename for structure - Default queues defined in schema file - Standardize naming: graph embeddings, chunk embeddings, triples
This commit is contained in:
parent
cbddf197ad
commit
3947920ee8
71 changed files with 764 additions and 585 deletions
2
Makefile
2
Makefile
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
# VERSION=$(shell git describe | sed 's/^v//')
|
||||
VERSION=0.4.2
|
||||
VERSION=0.5.1
|
||||
|
||||
all: container
|
||||
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
|
|
@ -127,7 +127,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
|
|
@ -135,7 +135,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
|
|
@ -143,17 +143,17 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-m"
|
||||
- "mixedbread-ai/mxbai-embed-large-v1"
|
||||
# - "-m"
|
||||
# - "mixedbread-ai/mxbai-embed-large-v1"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
|
|
@ -161,37 +161,37 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-graph-embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "ge-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-triples:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "triples-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
text-completion:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "llm-azure-text"
|
||||
- "text-completion-azure"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
|
|
@ -201,7 +201,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
graph-rag:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-rag"
|
||||
- "-p"
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
|
|
@ -127,7 +127,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
|
|
@ -135,7 +135,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
|
|
@ -143,17 +143,17 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-m"
|
||||
- "mixedbread-ai/mxbai-embed-large-v1"
|
||||
# - "-m"
|
||||
# - "mixedbread-ai/mxbai-embed-large-v1"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
|
|
@ -161,37 +161,37 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-graph-embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "ge-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-triples:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "triples-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
text-completion:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "llm-claude-text"
|
||||
- "text-completion-claude"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
|
|
@ -199,7 +199,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
graph-rag:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-rag"
|
||||
- "-p"
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
|
|
@ -127,7 +127,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
|
|
@ -135,7 +135,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
|
|
@ -143,17 +143,17 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-m"
|
||||
- "mixedbread-ai/mxbai-embed-large-v1"
|
||||
# - "-m"
|
||||
# - "mixedbread-ai/mxbai-embed-large-v1"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
|
|
@ -161,37 +161,37 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-graph-embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "ge-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-triples:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "triples-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
text-completion:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "llm-ollama-text"
|
||||
- "text-completion-ollama"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-r"
|
||||
|
|
@ -199,7 +199,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
graph-rag:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-rag"
|
||||
- "-p"
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
|
|
@ -127,7 +127,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
|
|
@ -135,7 +135,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
|
|
@ -143,17 +143,17 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-m"
|
||||
- "mixedbread-ai/mxbai-embed-large-v1"
|
||||
# - "-m"
|
||||
# - "mixedbread-ai/mxbai-embed-large-v1"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
|
|
@ -161,37 +161,37 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-graph-embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "ge-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
store-triples:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "triples-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
text-completion:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "llm-vertexai-text"
|
||||
- "text-completion-ollama"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
|
|
@ -203,7 +203,7 @@ services:
|
|||
restart: on-failure:100
|
||||
|
||||
graph-rag:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.4.2
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.5.1
|
||||
command:
|
||||
- "graph-rag"
|
||||
- "-p"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.chunker.recursive import run
|
||||
from trustgraph.chunking.recursive import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
|
|||
6
scripts/ge-write-milvus
Executable file
6
scripts/ge-write-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.storage.graph_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.rag.graph import run
|
||||
from trustgraph.retrieval.graph_rag import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.graph.cassandra_write import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.azure_text import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.claude_text import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.ollama_text import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.vertexai_text import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.decoder.pdf import run
|
||||
from trustgraph.decoding.pdf import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
|
|||
6
scripts/text-completion-azure
Executable file
6
scripts/text-completion-azure
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.model.text_completion.azure import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/text-completion-claude
Executable file
6
scripts/text-completion-claude
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.model.text_completion.claude import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/text-completion-ollama
Executable file
6
scripts/text-completion-ollama
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.model.text_completion.ollama import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/text-completion-vertexai
Executable file
6
scripts/text-completion-vertexai
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.model.text_completion.vertexai import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/triples-write-cassandra
Executable file
6
scripts/triples-write-cassandra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.storage.triples.cassandra import run
|
||||
|
||||
run()
|
||||
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.vector.milvus_write import run
|
||||
|
||||
run()
|
||||
|
||||
14
setup.py
14
setup.py
|
|
@ -4,7 +4,7 @@ import os
|
|||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
version = "0.4.2"
|
||||
version = "0.5.1"
|
||||
|
||||
setuptools.setup(
|
||||
name="trustgraph",
|
||||
|
|
@ -50,21 +50,21 @@ setuptools.setup(
|
|||
"scripts/embeddings-hf",
|
||||
"scripts/embeddings-ollama",
|
||||
"scripts/embeddings-vectorize",
|
||||
"scripts/ge-write-milvus",
|
||||
"scripts/graph-rag",
|
||||
"scripts/graph-show",
|
||||
"scripts/graph-to-turtle",
|
||||
"scripts/graph-write-cassandra",
|
||||
"scripts/init-pulsar-manager",
|
||||
"scripts/kg-extract-definitions",
|
||||
"scripts/kg-extract-relationships",
|
||||
"scripts/llm-azure-text",
|
||||
"scripts/llm-claude-text",
|
||||
"scripts/llm-ollama-text",
|
||||
"scripts/llm-vertexai-text",
|
||||
"scripts/loader",
|
||||
"scripts/pdf-decoder",
|
||||
"scripts/query",
|
||||
"scripts/run-processing",
|
||||
"scripts/vector-write-milvus",
|
||||
"scripts/text-completion-azure",
|
||||
"scripts/text-completion-claude",
|
||||
"scripts/text-completion-ollama",
|
||||
"scripts/text-completion-vertexai",
|
||||
"scripts/triples-write-cassandra",
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
|
||||
from . processor import *
|
||||
from . base_processor import BaseProcessor
|
||||
from . consumer import Consumer
|
||||
from . producer import Producer
|
||||
from . consumer_producer import ConsumerProducer
|
||||
|
||||
|
|
|
|||
117
trustgraph/base/base_processor.py
Normal file
117
trustgraph/base/base_processor.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
|
||||
import os
|
||||
import argparse
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import time
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. log_level import LogLevel
|
||||
|
||||
class BaseProcessor:
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
self.client = None
|
||||
|
||||
if not hasattr(__class__, "params_metric"):
|
||||
__class__.params_metric = Info(
|
||||
'params', 'Parameters configuration'
|
||||
)
|
||||
|
||||
# FIXME: Maybe outputs information it should not
|
||||
__class__.params_metric.info({
|
||||
k: str(params[k])
|
||||
for k in params
|
||||
})
|
||||
|
||||
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
||||
log_level = params.get("log_level", LogLevel.INFO)
|
||||
|
||||
self.pulsar_host = pulsar_host
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if self.client:
|
||||
self.client.close()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=__class__.default_pulsar_host,
|
||||
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-M', '--metrics-enabled',
|
||||
type=bool,
|
||||
default=True,
|
||||
help=f'Pulsar host (default: true)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-P', '--metrics-port',
|
||||
type=int,
|
||||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
def run(self):
|
||||
raise RuntimeError("Something should have implemented the run method")
|
||||
|
||||
@classmethod
|
||||
def start(cls, prog, doc):
|
||||
|
||||
while True:
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=prog,
|
||||
description=doc
|
||||
)
|
||||
|
||||
cls.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
if args["metrics_enabled"]:
|
||||
start_http_server(args["metrics_port"])
|
||||
|
||||
try:
|
||||
|
||||
p = cls(**args)
|
||||
p.run()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(type(e))
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
87
trustgraph/base/consumer.py
Normal file
87
trustgraph/base/consumer.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import start_http_server, Histogram, Info, Counter
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
|
||||
class Consumer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
super(Consumer, self).__init__(**params)
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
})
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser, default_input_queue, default_subscriber):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
168
trustgraph/base/consumer_producer.py
Normal file
168
trustgraph/base/consumer_producer.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Histogram, Info, Counter
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
|
||||
# FIXME: Derive from consumer? And producer?
|
||||
|
||||
class ConsumerProducer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
output_queue = params.get("output_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(ConsumerProducer, self).__init__(**params)
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
resp = self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
class Producer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
output_queue = params.get("output_queue")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"output_queue": output_queue,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(Producer, self).__init__(**params)
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
|
@ -1,360 +0,0 @@
|
|||
|
||||
import os
|
||||
import argparse
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import time
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import start_http_server, Histogram, Info, Counter
|
||||
|
||||
from .. log_level import LogLevel
|
||||
|
||||
class BaseProcessor:
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
self.client = None
|
||||
|
||||
if not hasattr(__class__, "params_metric"):
|
||||
__class__.params_metric = Info(
|
||||
'params', 'Parameters configuration'
|
||||
)
|
||||
|
||||
# FIXME: Maybe outputs information it should not
|
||||
__class__.params_metric.info({
|
||||
k: str(params[k])
|
||||
for k in params
|
||||
})
|
||||
|
||||
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
||||
log_level = params.get("log_level", LogLevel.INFO)
|
||||
|
||||
self.pulsar_host = pulsar_host
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if self.client:
|
||||
self.client.close()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=__class__.default_pulsar_host,
|
||||
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-M', '--metrics-enabled',
|
||||
type=bool,
|
||||
default=True,
|
||||
help=f'Pulsar host (default: true)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-P', '--metrics-port',
|
||||
type=int,
|
||||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
def run(self):
|
||||
raise RuntimeError("Something should have implemented the run method")
|
||||
|
||||
@classmethod
|
||||
def start(cls, prog, doc):
|
||||
|
||||
while True:
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=prog,
|
||||
description=doc
|
||||
)
|
||||
|
||||
cls.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
if args["metrics_enabled"]:
|
||||
start_http_server(args["metrics_port"])
|
||||
|
||||
try:
|
||||
|
||||
p = cls(**args)
|
||||
p.run()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(type(e))
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
class Consumer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
super(Consumer, self).__init__(**params)
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
})
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser, default_input_queue, default_subscriber):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
class ConsumerProducer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
output_queue = params.get("output_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(ConsumerProducer, self).__init__(**params)
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
resp = self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
class Producer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
output_queue = params.get("output_queue")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"output_queue": output_queue,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(Producer, self).__init__(**params)
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
55
trustgraph/base/producer.py
Normal file
55
trustgraph/base/producer.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Info, Counter
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
|
||||
class Producer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
output_queue = params.get("output_queue")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"output_queue": output_queue,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(Producer, self).__init__(**params)
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
|
@ -8,12 +8,15 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|||
|
||||
|
||||
from ... schema import TextDocument, Chunk, Source
|
||||
from ... schema import text_ingest_queue, chunk_ingest_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'text-doc-load'
|
||||
default_output_queue = 'chunk-load'
|
||||
default_subscriber = 'chunker-recursive'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = text_ingest_queue
|
||||
default_output_queue = chunk_ingest_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -92,5 +95,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start('chunker', __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -9,12 +9,15 @@ import base64
|
|||
from langchain_community.document_loaders import PyPDFLoader
|
||||
|
||||
from ... schema import Document, TextDocument, Source
|
||||
from ... schema import document_ingest_queue, text_ingest_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'document-load'
|
||||
default_output_queue = 'text-doc-load'
|
||||
default_subscriber = 'pdf-decoder'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = document_ingest_queue
|
||||
default_output_queue = text_ingest_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -80,5 +83,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("pdf-decoder", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -7,12 +7,15 @@ Input is text, output is embeddings vector.
|
|||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
from ... schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from ... schema import embeddings_request_queue, embeddings_response_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'embeddings'
|
||||
default_output_queue = 'embeddings-response'
|
||||
default_subscriber = 'embeddings-hf'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = embeddings_request_queue
|
||||
default_output_queue = embeddings_response_queue
|
||||
default_subscriber = module
|
||||
default_model="all-MiniLM-L6-v2"
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
|
@ -70,5 +73,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("embeddings-hf", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,15 @@ Input is text, output is embeddings vector.
|
|||
from langchain_community.embeddings import OllamaEmbeddings
|
||||
|
||||
from ... schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from ... schema import embeddings_request_queue, embeddings_response_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'embeddings'
|
||||
default_output_queue = 'embeddings-response'
|
||||
default_subscriber = 'embeddings-ollama'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = embeddings_request_queue
|
||||
default_output_queue = embeddings_response_queue
|
||||
default_subscriber = module
|
||||
default_model="mxbai-embed-large"
|
||||
default_ollama = 'http://localhost:11434'
|
||||
|
||||
|
|
@ -77,5 +80,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start('embeddings-ollama', __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,17 @@ Vectorizer, calls the embeddings service to get embeddings for a chunk.
|
|||
Input is text chunk, output is chunk and vectors.
|
||||
"""
|
||||
|
||||
from ... schema import Chunk, VectorsChunk
|
||||
from ... schema import Chunk, ChunkEmbeddings
|
||||
from ... schema import chunk_ingest_queue, chunk_embeddings_ingest_queue
|
||||
from ... embeddings_client import EmbeddingsClient
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'chunk-load'
|
||||
default_output_queue = 'vectors-chunk-load'
|
||||
default_subscriber = 'embeddings-vectorizer'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = chunk_ingest_queue
|
||||
default_output_queue = chunk_embeddings_ingest_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -27,7 +30,7 @@ class Processor(ConsumerProducer):
|
|||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Chunk,
|
||||
"output_schema": VectorsChunk,
|
||||
"output_schema": ChunkEmbeddings,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -35,7 +38,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def emit(self, source, chunk, vectors):
|
||||
|
||||
r = VectorsChunk(source=source, chunk=chunk, vectors=vectors)
|
||||
r = ChunkEmbeddings(source=source, chunk=chunk, vectors=vectors)
|
||||
self.producer.send(r)
|
||||
|
||||
def handle(self, msg):
|
||||
|
|
@ -70,5 +73,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("embeddings-vectorize", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from . schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from . schema import embeddings_request_queue, embeddings_response_queue
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
|
|
@ -31,13 +32,13 @@ class EmbeddingsClient:
|
|||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='embeddings',
|
||||
topic=embeddings_request_queue,
|
||||
schema=JsonSchema(EmbeddingsRequest),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'embeddings-response', client_id,
|
||||
embeddings_response_queue, client_id,
|
||||
schema=JsonSchema(EmbeddingsResponse),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@
|
|||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import GraphRagQuery, GraphRagResponse
|
||||
from . schema import GraphRagQuery, GraphRagResponse
|
||||
from . schema import graph_rag_request_queue, graph_rag_response_queue
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
|
|
@ -29,13 +31,13 @@ class GraphRagClient:
|
|||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='graph-rag-query',
|
||||
topic=graph_rag_request_queue,
|
||||
schema=JsonSchema(GraphRagQuery),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'graph-rag-response', client_id,
|
||||
graph_rag_response_queue, client_id,
|
||||
schema=JsonSchema(GraphRagResponse),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts vector+text chunks input, applies entity analysis to
|
||||
Simple decoder, accepts embeddings+text chunks input, applies entity analysis to
|
||||
get entity definitions which are output as graph edges.
|
||||
"""
|
||||
|
||||
import urllib.parse
|
||||
import json
|
||||
|
||||
from ... schema import VectorsChunk, Triple, Source, Value
|
||||
from ... schema import ChunkEmbeddings, Triple, Source, Value
|
||||
from ... schema import chunk_embeddings_ingest_queue, triples_store_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... llm_client import LlmClient
|
||||
from ... prompts import to_definitions
|
||||
|
|
@ -16,9 +17,11 @@ from ... base import ConsumerProducer
|
|||
|
||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
||||
|
||||
default_input_queue = 'vectors-chunk-load'
|
||||
default_output_queue = 'graph-load'
|
||||
default_subscriber = 'kg-extract-definitions'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = chunk_embeddings_ingest_queue
|
||||
default_output_queue = triples_store_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -33,7 +36,7 @@ class Processor(ConsumerProducer):
|
|||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk,
|
||||
"input_schema": ChunkEmbeddings,
|
||||
"output_schema": Triple,
|
||||
}
|
||||
)
|
||||
|
|
@ -101,5 +104,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("kg-extract-definitions", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ import json
|
|||
import os
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
from ... schema import VectorsChunk, Triple, VectorsAssociation, Source, Value
|
||||
from ... schema import ChunkEmbeddings, Triple, GraphEmbeddings, Source, Value
|
||||
from ... schema import chunk_embeddings_ingest_queue, triples_store_queue, graph_embeddings_store_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... llm_client import LlmClient
|
||||
from ... prompts import to_relationships
|
||||
|
|
@ -19,10 +20,12 @@ from ... base import ConsumerProducer
|
|||
|
||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
||||
|
||||
default_input_queue = 'vectors-chunk-load'
|
||||
default_output_queue = 'graph-load'
|
||||
default_subscriber = 'kg-extract-relationships'
|
||||
default_vector_queue='vectors-load'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = chunk_embeddings_ingest_queue
|
||||
default_output_queue = triples_store_queue
|
||||
default_vector_queue = graph_embeddings_store_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -38,14 +41,14 @@ class Processor(ConsumerProducer):
|
|||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk,
|
||||
"input_schema": ChunkEmbeddings,
|
||||
"output_schema": Triple,
|
||||
}
|
||||
)
|
||||
|
||||
self.vec_prod = self.client.create_producer(
|
||||
topic=vector_queue,
|
||||
schema=JsonSchema(VectorsAssociation),
|
||||
schema=JsonSchema(GraphEmbeddings),
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
|
|
@ -53,9 +56,9 @@ class Processor(ConsumerProducer):
|
|||
"output_queue": output_queue,
|
||||
"vector_queue": vector_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsChunk.__name__,
|
||||
"input_schema": ChunkEmbeddings.__name__,
|
||||
"output_schema": Triple.__name__,
|
||||
"vector_schema": VectorsAssociation.__name__,
|
||||
"vector_schema": GraphEmbeddings.__name__,
|
||||
})
|
||||
|
||||
self.llm = LlmClient(pulsar_host=self.pulsar_host)
|
||||
|
|
@ -84,7 +87,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def emit_vec(self, ent, vec):
|
||||
|
||||
r = VectorsAssociation(entity=ent, vectors=vec)
|
||||
r = GraphEmbeddings(entity=ent, vectors=vec)
|
||||
self.vec_prod.send(r)
|
||||
|
||||
def handle(self, msg):
|
||||
|
|
@ -171,5 +174,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("kg-extract-relationships", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,10 +3,13 @@
|
|||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import TextCompletionRequest, TextCompletionResponse
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from . schema import TextCompletionRequest, TextCompletionResponse
|
||||
from . schema import text_completion_request_queue
|
||||
from . schema import text_completion_response_queue
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
|
|
@ -29,13 +32,13 @@ class LlmClient:
|
|||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='llm-complete-text',
|
||||
topic=text_completion_request_queue,
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'llm-complete-text-response', client_id,
|
||||
text_completion_response_queue, client_id,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,13 +7,17 @@ serverless endpoint service. Input is prompt, output is response.
|
|||
import requests
|
||||
import json
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
from .... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .... schema import text_completion_request_queue
|
||||
from .... schema import text_completion_response_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-azure-text'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = text_completion_request_queue
|
||||
default_output_queue = text_completion_response_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -121,4 +125,4 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("llm-azure-text", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
|
@ -6,13 +6,17 @@ Input is prompt, output is response.
|
|||
|
||||
import anthropic
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
from .... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .... schema import text_completion_request_queue
|
||||
from .... schema import text_completion_response_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-claude-text'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = text_completion_request_queue
|
||||
default_output_queue = text_completion_response_queue
|
||||
default_subscriber = module
|
||||
default_model = 'claude-3-5-sonnet-20240620'
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
|
@ -101,6 +105,6 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("llm-claude-text", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
||||
|
|
@ -7,13 +7,17 @@ Input is prompt, output is response.
|
|||
from langchain_community.llms import Ollama
|
||||
from prometheus_client import Histogram, Info, Counter
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
from .... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .... schema import text_completion_request_queue
|
||||
from .... schema import text_completion_response_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-ollama-text'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = text_completion_request_queue
|
||||
default_output_queue = text_completion_response_queue
|
||||
default_subscriber = module
|
||||
default_model = 'gemma2'
|
||||
default_ollama = 'http://localhost:11434'
|
||||
|
||||
|
|
@ -93,6 +97,6 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("llm-ollama-text", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
||||
|
|
@ -21,13 +21,17 @@ from vertexai.preview.generative_models import (
|
|||
Tool,
|
||||
)
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
from .... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from .... schema import text_completion_request_queue
|
||||
from .... schema import text_completion_response_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-vertexai-text'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = text_completion_request_queue
|
||||
default_output_queue = text_completion_response_queue
|
||||
default_subscriber = module
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -169,5 +173,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("llm-vertexai-text", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -5,13 +5,16 @@ Input is query, output is response.
|
|||
"""
|
||||
|
||||
from ... schema import GraphRagQuery, GraphRagResponse
|
||||
from ... schema import graph_rag_request_queue, graph_rag_response_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... graph_rag import GraphRag
|
||||
from ... base import ConsumerProducer
|
||||
|
||||
default_input_queue = 'graph-rag-query'
|
||||
default_output_queue = 'graph-rag-response'
|
||||
default_subscriber = 'graph-rag'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = graph_rag_request_queue
|
||||
default_output_queue = graph_rag_response_queue
|
||||
default_subscriber = module
|
||||
default_graph_hosts = 'localhost'
|
||||
default_vector_store = 'http://localhost:19530'
|
||||
|
||||
|
|
@ -112,5 +115,5 @@ class Processor(ConsumerProducer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start('graph-rag', __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -3,11 +3,7 @@ from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
|||
|
||||
from enum import Enum
|
||||
|
||||
#class Command(Enum):
|
||||
# reindex = 1
|
||||
|
||||
#class IndexCommand(Record):
|
||||
# command = Command
|
||||
############################################################################
|
||||
|
||||
class Value(Record):
|
||||
value = String()
|
||||
|
|
@ -19,49 +15,111 @@ class Source(Record):
|
|||
id = String()
|
||||
title = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
source = Source()
|
||||
data = Bytes()
|
||||
|
||||
document_ingest_queue = 'document-load'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
source = Source()
|
||||
text = Bytes()
|
||||
|
||||
text_ingest_queue = 'text-document-load'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
source = Source()
|
||||
chunk = Bytes()
|
||||
|
||||
class VectorsChunk(Record):
|
||||
chunk_ingest_queue = 'chunk-load'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunk embeddings are an embeddings associated with a text chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
chunk = Bytes()
|
||||
|
||||
class VectorsAssociation(Record):
|
||||
chunk_embeddings_ingest_queue = 'chunk-embeddings-load'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings are embeddings associated with a graph entity
|
||||
|
||||
class GraphEmbeddings(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
entity = Value()
|
||||
|
||||
graph_embeddings_store_queue = 'graph-embeddings-store'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triple(Record):
|
||||
source = Source()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
|
||||
triples_store_queue = 'triples-store'
|
||||
|
||||
############################################################################
|
||||
|
||||
# chunk_embeddings_store_queue = 'chunk-embeddings-store'
|
||||
|
||||
############################################################################
|
||||
|
||||
# LLM text completion
|
||||
|
||||
class TextCompletionRequest(Record):
|
||||
prompt = String()
|
||||
|
||||
class TextCompletionResponse(Record):
|
||||
response = String()
|
||||
|
||||
text_completion_request_queue = 'text-completion'
|
||||
text_completion_response_queue = 'text-completion-response'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Embeddings
|
||||
|
||||
class EmbeddingsRequest(Record):
|
||||
text = String()
|
||||
|
||||
class EmbeddingsResponse(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
embeddings_request_queue = 'embeddings'
|
||||
embeddings_response_queue = 'embeddings-response'
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph RAG text retrieval
|
||||
|
||||
class GraphRagQuery(Record):
|
||||
query = String()
|
||||
|
||||
class GraphRagResponse(Record):
|
||||
response = String()
|
||||
|
||||
graph_rag_request_queue = 'graph-rag'
|
||||
graph_rag_response_queue = 'graph-rag-response'
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
|
|||
0
trustgraph/storage/graph_embeddings/__init__.py
Normal file
0
trustgraph/storage/graph_embeddings/__init__.py
Normal file
|
|
@ -3,13 +3,16 @@
|
|||
Accepts entity/vector pairs and writes them to a Milvus store.
|
||||
"""
|
||||
|
||||
from ... schema import VectorsAssociation
|
||||
from ... log_level import LogLevel
|
||||
from ... triple_vectors import TripleVectors
|
||||
from ... base import Consumer
|
||||
from .... schema import GraphEmbeddings
|
||||
from .... schema import graph_embeddings_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... triple_vectors import TripleVectors
|
||||
from .... base import Consumer
|
||||
|
||||
default_input_queue = 'vectors-load'
|
||||
default_subscriber = 'vector-write-milvus'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = graph_embeddings_store_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
|
@ -24,7 +27,7 @@ class Processor(Consumer):
|
|||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": VectorsAssociation,
|
||||
"input_schema": GraphEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
|
@ -54,6 +57,5 @@ class Processor(Consumer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("vector-write-milvus", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
||||
0
trustgraph/storage/triples/__init__.py
Normal file
0
trustgraph/storage/triples/__init__.py
Normal file
|
|
@ -9,13 +9,16 @@ import os
|
|||
import argparse
|
||||
import time
|
||||
|
||||
from ... trustgraph import TrustGraph
|
||||
from ... schema import Triple
|
||||
from ... log_level import LogLevel
|
||||
from ... base import Consumer
|
||||
from .... trustgraph import TrustGraph
|
||||
from .... schema import Triple
|
||||
from .... schema import triples_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
default_input_queue = 'graph-load'
|
||||
default_subscriber = 'graph-write-cassandra'
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = triples_store_queue
|
||||
default_subscriber = module
|
||||
default_graph_host='localhost'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
|
@ -61,5 +64,5 @@ class Processor(Consumer):
|
|||
|
||||
def run():
|
||||
|
||||
Processor.start("graph-write-cassandra", __doc__)
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue