Breakout store queries (#8)

- Break out store queries, so not locked into a Milvus/Cassandra backend
- Break out prompting into a separate module, so that prompts can be tailored to other LLMs
- Jsonnet used to generate docker compose templates
- Version to 0.6.0
This commit is contained in:
cybermaggedon 2024-08-13 17:30:59 +01:00 committed by GitHub
parent a9a0e28f49
commit a3ea1301d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
70 changed files with 4286 additions and 2394 deletions

View file

@ -1,6 +1,6 @@
# VERSION=$(shell git describe | sed 's/^v//')
VERSION=0.5.9
VERSION=0.6.0
DOCKER=podman
@ -30,4 +30,14 @@ clean:
rm -rf wheels/
set-version:
sed -i 's/trustgraph-flow:[0-9]*\.[0-9]*\.[0-9]*/trustgraph-flow:'${VERSION}'/' docker-compose*.yaml
# sed -i 's/trustgraph-flow:[0-9]*\.[0-9]*\.[0-9]*/trustgraph-flow:'${VERSION}'/' docker-compose*.yaml
echo '"${VERSION}"' > templates/version.jsonnet
TEMPLATES=azure bedrock claude cohere mix ollama openai vertexai
DCS=$(foreach template,${TEMPLATES},${template:%=docker-compose-%.yaml})
update-templates: set-version ${DCS}
docker-compose-%.yaml: templates/docker-compose-%.jsonnet templates/version.jsonnet
jsonnet -S ${@:docker-compose-%.yaml=templates/docker-compose-%.jsonnet} > $@

View file

@ -1,241 +1,256 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-azure"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${AZURE_TOKEN}
- "-e"
- ${AZURE_ENDPOINT}
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-azure"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${AZURE_TOKEN}
- "-e"
- ${AZURE_ENDPOINT}
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-azure"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${AZURE_TOKEN}"
- "-e"
- "${AZURE_ENDPOINT}"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-azure"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${AZURE_TOKEN}"
- "-e"
- "${AZURE_ENDPOINT}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,267 +1,264 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "2000"
- "--chunk-overlap"
- "100"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-bedrock"
- "-p"
- "pulsar://pulsar:6650"
- "-z"
- "${AWS_ID_KEY}"
- "-k"
- "${AWS_SECRET_KEY}"
- "-r"
- "us-west-2"
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-bedrock"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mistral.mistral-large-2407-v1:0"
- "-z"
- "${AWS_ID_KEY}"
- "-k"
- "${AWS_SECRET_KEY}"
- "-r"
- "us-west-2"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
#text-completion-rag:
# image: docker.io/trustgraph/trustgraph-flow:0.5.9
# command:
# - "text-completion-ollama"
# - "-p"
# - "pulsar://pulsar:6650"
# - "-r"
# - "http://${OLLAMA_HOST}:11434/"
# - "-i"
# - "non-persistent://tg/request/text-completion-rag"
# - "-o"
# - "non-persistent://tg/response/text-completion-rag-response"
# - "-m"
# - "gemma2:2b"
# restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "2000"
- "--chunk-overlap"
- "100"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-bedrock"
- "-p"
- "pulsar://pulsar:6650"
- "-z"
- "${AWS_ID_KEY}"
- "-k"
- "${AWS_SECRET_KEY}"
- "-r"
- "us-west-2"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-bedrock"
- "-p"
- "pulsar://pulsar:6650"
- "-z"
- "${AWS_ID_KEY}"
- "-k"
- "${AWS_SECRET_KEY}"
- "-r"
- "us-west-2"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,236 +1,252 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-claude"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${CLAUDE_KEY}
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-ollama"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${CLAUDE_KEY}
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-claude"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${CLAUDE_KEY}"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-claude"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${CLAUDE_KEY}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,223 +1,256 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "1000"
- "--chunk-overlap"
- "50"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${COHERE_KEY}
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "1000"
- "--chunk-overlap"
- "50"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${COHERE_KEY}"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${COHERE_KEY}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,261 +1,260 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "4000"
- "--chunk-overlap"
- "120"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${COHERE_KEY}
- "-m"
- "c4ai-aya-23-35b"
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${COHERE_KEY}
- "-m"
- "c4ai-aya-23-8b"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
#text-completion-rag:
# image: docker.io/trustgraph/trustgraph-flow:0.5.9
# command:
# - "text-completion-ollama"
# - "-p"
# - "pulsar://pulsar:6650"
# - "-r"
# - "http://${OLLAMA_HOST}:11434/"
# - "-i"
# - "non-persistent://tg/request/text-completion-rag"
# - "-o"
# - "non-persistent://tg/response/text-completion-rag-response"
# - "-m"
# - "gemma2:2b"
# restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
- "--chunk-size"
- "4000"
- "--chunk-overlap"
- "120"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${COHERE_KEY}"
- "-m"
- "c4ai-aya-23-35b"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-cohere"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${COHERE_KEY}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
- "-m"
- "c4ai-aya-23-8b"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,241 +1,252 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-ollama"
- "-p"
- "pulsar://pulsar:6650"
- "-r"
- "http://${OLLAMA_HOST}:11434/"
# - "-m"
# - "llama3.1:8b"
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-ollama"
- "-p"
- "pulsar://pulsar:6650"
- "-r"
- "http://${OLLAMA_HOST}:11434/"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
- "-m"
- "llama3.1:8b"
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-ollama"
- "-p"
- "pulsar://pulsar:6650"
- "-r"
- "${OLLAMA_HOST}"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-ollama"
- "-p"
- "pulsar://pulsar:6650"
- "-r"
- "${OLLAMA_HOST}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,236 +1,252 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-openai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${OPENAI_KEY}
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-openai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- ${OPENAI_KEY}
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-openai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${OPENAI_KEY}"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion-rag":
"command":
- "text-completion-openai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "${OPENAI_KEY}"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

View file

@ -1,92 +0,0 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100

View file

@ -1,243 +1,260 @@
volumes:
cassandra:
pulsar-conf:
pulsar-data:
etcd:
minio-data:
milvus:
prometheus-data:
grafana-storage:
services:
cassandra:
image: docker.io/cassandra:4.1.5
ports:
- "9042:9042"
volumes:
- "cassandra:/var/lib/cassandra"
restart: on-failure:100
pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command: bin/pulsar standalone
ports:
- "6650:6650"
- "8080:8080"
volumes:
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
restart: on-failure:100
init-pulsar:
image: docker.io/apachepulsar/pulsar:3.3.0
command:
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
depends_on:
pulsar:
condition: service_started
restart: on-failure:100
pulsar-manager:
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
ports:
- "9527:9527"
- "7750:7750"
environment:
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
restart: on-failure:100
etcd:
image: quay.io/coreos/etcd:v3.5.5
command:
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
environment:
ETCD_AUTO_COMPACTION_MODE: revision
ETCD_AUTO_COMPACTION_RETENTION: "1000"
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
ETCD_SNAPSHOT_COUNT: "50000"
ports:
- "2379:2379"
volumes:
- "etcd:/etcd"
restart: on-failure:100
minio:
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
command:
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9001:9001"
volumes:
- "minio-data:/minio_data"
restart: on-failure:100
milvus:
image: docker.io/milvusdb/milvus:v2.4.5
command:
- "milvus"
- "run"
- "standalone"
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "9091:9091"
- "19530:19530"
volumes:
- "milvus:/var/lib/milvus"
restart: on-failure:100
prometheus:
image: docker.io/prom/prometheus:v2.53.1
ports:
- "9090:9090"
volumes:
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
restart: on-failure:100
grafana:
image: docker.io/grafana/grafana:10.0.0
ports:
- "3000:3000"
volumes:
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
environment:
# GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
# GF_AUTH_ANONYMOUS_ENABLED: true
# GF_ORG_ROLE: Admin
GF_ORG_NAME: trustgraph.ai
# GF_SERVER_ROOT_URL: https://example.com
restart: on-failure:100
pdf-decoder:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
chunker:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
vectorize:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
# - "-m"
# - "mixedbread-ai/mxbai-embed-large-v1"
restart: on-failure:100
kg-extract-definitions:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
kg-extract-relationships:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
restart: on-failure:100
store-graph-embeddings:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
restart: on-failure:100
store-triples:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
restart: on-failure:100
text-completion:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-vertexai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "/vertexai/private.json"
- "-r"
- "us-west1"
volumes:
- "./vertexai:/vertexai"
restart: on-failure:100
text-completion-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "text-completion-vertexai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "/vertexai/private.json"
- "-r"
- "us-west1"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
graph-rag:
image: docker.io/trustgraph/trustgraph-flow:0.5.9
command:
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
restart: on-failure:100
"services":
"cassandra":
"image": "docker.io/cassandra:4.1.5"
"ports":
- "9042:9042"
"restart": "on-failure:100"
"volumes":
- "cassandra:/var/lib/cassandra"
"chunker":
"command":
- "chunker-recursive"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"embeddings":
"command":
- "embeddings-hf"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"etcd":
"command":
- "etcd"
- "-advertise-client-urls=http://127.0.0.1:2379"
- "-listen-client-urls"
- "http://0.0.0.0:2379"
- "--data-dir"
- "/etcd"
"environment":
"ETCD_AUTO_COMPACTION_MODE": "revision"
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
"ETCD_SNAPSHOT_COUNT": "50000"
"image": "quay.io/coreos/etcd:v3.5.5"
"ports":
- "2379:2379"
"restart": "on-failure:100"
"volumes":
- "etcd:/etcd"
"grafana":
"environment":
"GF_ORG_NAME": "trustgraph.ai"
"image": "docker.io/grafana/grafana:10.0.0"
"ports":
- "3000:3000"
"restart": "on-failure:100"
"volumes":
- "grafana-storage:/var/lib/grafana"
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
"graph-rag":
"command":
- "graph-rag"
- "-p"
- "pulsar://pulsar:6650"
- "--prompt-request-queue"
- "non-persistent://tg/request/prompt-rag"
- "--prompt-response-queue"
- "non-persistent://tg/response/prompt-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"init-pulsar":
"command":
- "sh"
- "-c"
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
"depends_on":
"pulsar":
"condition": "service_started"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"restart": "on-failure:100"
"kg-extract-definitions":
"command":
- "kg-extract-definitions"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"kg-extract-relationships":
"command":
- "kg-extract-relationships"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"milvus":
"command":
- "milvus"
- "run"
- "standalone"
"environment":
"ETCD_ENDPOINTS": "etcd:2379"
"MINIO_ADDRESS": "minio:9000"
"image": "docker.io/milvusdb/milvus:v2.4.5"
"ports":
- "9091:9091"
- "19530:19530"
"restart": "on-failure:100"
"volumes":
- "milvus:/var/lib/milvus"
"minio":
"command":
- "minio"
- "server"
- "/minio_data"
- "--console-address"
- ":9001"
"environment":
"MINIO_ROOT_PASSWORD": "minioadmin"
"MINIO_ROOT_USER": "minioadmin"
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
"ports":
- "9001:9001"
"restart": "on-failure:100"
"volumes":
- "minio-data:/minio_data"
"pdf-decoder":
"command":
- "pdf-decoder"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prometheus":
"image": "docker.io/prom/prometheus:v2.53.1"
"ports":
- "9090:9090"
"restart": "on-failure:100"
"volumes":
- "./prometheus:/etc/prometheus"
- "prometheus-data:/prometheus"
"prompt":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"prompt-rag":
"command":
- "prompt-generic"
- "-p"
- "pulsar://pulsar:6650"
- "-i"
- "non-persistent://tg/request/prompt-rag"
- "-o"
- "non-persistent://tg/response/prompt-rag-response"
- "--text-completion-request-queue"
- "non-persistent://tg/request/text-completion-rag"
- "--text-completion-response-queue"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"pulsar":
"command": "bin/pulsar standalone"
"image": "docker.io/apachepulsar/pulsar:3.3.0"
"ports":
- "6650:6650"
- "8080:8080"
"restart": "on-failure:100"
"volumes":
- "pulsar-conf:/pulsar/conf"
- "pulsar-data:/pulsar/data"
"pulsar-manager":
"environment":
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
"ports":
- "9527:9527"
- "7750:7750"
"restart": "on-failure:100"
"query-graph-embeddings":
"command":
- "ge-query-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"query-triples":
"command":
- "triples-query-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-graph-embeddings":
"command":
- "ge-write-milvus"
- "-p"
- "pulsar://pulsar:6650"
- "-t"
- "http://milvus:19530"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"store-triples":
"command":
- "triples-write-cassandra"
- "-p"
- "pulsar://pulsar:6650"
- "-g"
- "cassandra"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"text-completion":
"command":
- "text-completion-vertexai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "/vertexai/private.json"
- "-r"
- "us-west1"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
- "./vertexai:/vertexai"
"text-completion-rag":
"command":
- "text-completion-vertexai"
- "-p"
- "pulsar://pulsar:6650"
- "-k"
- "/vertexai/private.json"
- "-r"
- "us-west1"
- "-i"
- "non-persistent://tg/request/text-completion-rag"
- "-o"
- "non-persistent://tg/response/text-completion-rag-response"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
- "./vertexai:/vertexai"
"vectorize":
"command":
- "embeddings-vectorize"
- "-p"
- "pulsar://pulsar:6650"
"image": "docker.io/trustgraph/trustgraph-flow:0.6.0"
"restart": "on-failure:100"
"volumes":
"cassandra": {}
"etcd": {}
"grafana-storage": {}
"milvus": {}
"minio-data": {}
"prometheus-data": {}
"pulsar-conf": {}
"pulsar-data": {}

6
scripts/ge-query-milvus Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.query.graph_embeddings.milvus import run
run()

View file

@ -7,7 +7,7 @@ Connects to the trustgraph graph hosts and dumps all graph edges.
import argparse
import time
from trustgraph.trustgraph import TrustGraph
from trustgraph.direct.cassandra import TrustGraph
def show_graph(graph_hosts):

6
scripts/prompt-generic Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.model.prompt.generic import run
run()

View file

@ -7,9 +7,9 @@ query = " ".join(sys.argv[1:])
gr = GraphRag(
verbose=True,
vector_store="http://localhost:19530",
pulsar_host="pulsar://localhost:6650",
graph_hosts=["localhost"],
pr_request_queue="non-persistent://tg/request/prompt",
pr_response_queue="non-persistent://tg/response/prompt-response",
)
if query == "":

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.query.triples.cassandra import run
run()

View file

@ -4,7 +4,7 @@ import os
with open("README.md", "r") as fh:
long_description = fh.read()
version = "0.5.9"
version = "0.6.0"
setuptools.setup(
name="trustgraph",
@ -57,6 +57,7 @@ setuptools.setup(
"scripts/embeddings-ollama",
"scripts/embeddings-vectorize",
"scripts/ge-dump-parquet",
"scripts/ge-query-milvus",
"scripts/ge-write-milvus",
"scripts/graph-rag",
"scripts/graph-show",
@ -68,16 +69,18 @@ setuptools.setup(
"scripts/load-triples",
"scripts/loader",
"scripts/pdf-decoder",
"scripts/prompt-generic",
"scripts/query",
"scripts/run-processing",
"scripts/text-completion-azure",
"scripts/text-completion-bedrock",
"scripts/text-completion-claude",
"scripts/text-completion-cohere",
"scripts/text-completion-ollama",
"scripts/text-completion-openai",
"scripts/text-completion-vertexai",
"scripts/text-completion-cohere",
"scripts/triples-dump-parquet",
"scripts/triples-query-cassandra",
"scripts/triples-write-cassandra",
]
)

3
templates/base.jsonnet Normal file
View file

@ -0,0 +1,3 @@
{
restart: "on-failure:100",
}

View file

@ -0,0 +1,18 @@
local base = import "base.jsonnet";
local images = import "images.jsonnet";
{
volumes +: {
cassandra: {},
},
services +: {
cassandra: base + {
image: images.cassandra,
ports: [
"9042:9042"
],
volumes: [
"cassandra:/var/lib/cassandra"
],
}
},
}

View file

@ -0,0 +1,49 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-azure",
"-p",
url.pulsar,
"-k",
"${AZURE_TOKEN}",
"-e",
"${AZURE_ENDPOINT}",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-azure",
"-p",
url.pulsar,
"-k",
"${AZURE_TOKEN}",
"-e",
"${AZURE_ENDPOINT}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,68 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
chunker: base + {
image: images.trustgraph,
command: [
"chunker-recursive",
"-p",
url.pulsar,
"--chunk-size",
"2000",
"--chunk-overlap",
"100",
],
},
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-bedrock",
"-p",
url.pulsar,
"-z",
"${AWS_ID_KEY}",
"-k",
"${AWS_SECRET_KEY}",
"-r",
"us-west-2",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-bedrock",
"-p",
url.pulsar,
// "-m",
// "mistral.mistral-large-2407-v1:0",
"-z",
"${AWS_ID_KEY}",
"-k",
"${AWS_SECRET_KEY}",
"-r",
"us-west-2",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,45 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-claude",
"-p",
url.pulsar,
"-k",
"${CLAUDE_KEY}",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-claude",
"-p",
url.pulsar,
"-k",
"${CLAUDE_KEY}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,58 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
chunker: base + {
image: images.trustgraph,
command: [
"chunker-recursive",
"-p",
url.pulsar,
"--chunk-size",
"1000",
"--chunk-overlap",
"50",
],
},
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-cohere",
"-p",
url.pulsar,
"-k",
"${COHERE_KEY}",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-cohere",
"-p",
url.pulsar,
"-k",
"${COHERE_KEY}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,62 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
chunker: base + {
image: images.trustgraph,
command: [
"chunker-recursive",
"-p",
url.pulsar,
"--chunk-size",
"4000",
"--chunk-overlap",
"120",
],
},
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-cohere",
"-p",
url.pulsar,
"-k",
"${COHERE_KEY}",
"-m",
"c4ai-aya-23-35b",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-cohere",
"-p",
url.pulsar,
"-k",
"${COHERE_KEY}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
"-m",
"c4ai-aya-23-8b",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,49 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-ollama",
"-p",
url.pulsar,
// "-m",
// "llama3.1:8b",
"-r",
"${OLLAMA_HOST}",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-ollama",
"-p",
url.pulsar,
// "-m",
// "llama3.1:8b",
"-r",
"${OLLAMA_HOST}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,45 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-openai",
"-p",
url.pulsar,
"-k",
"${OPENAI_KEY}",
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-openai",
"-p",
url.pulsar,
"-k",
"${OPENAI_KEY}",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,55 @@
local base = import "base.jsonnet";
local url = import "url.jsonnet";
local images = import "images.jsonnet";
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph + {
services +: {
"text-completion": base + {
image: images.trustgraph,
command: [
"text-completion-vertexai",
"-p",
url.pulsar,
"-k",
"/vertexai/private.json",
"-r",
"us-west1",
],
volumes: [
"./vertexai:/vertexai"
],
},
"text-completion-rag": base + {
image: images.trustgraph,
command: [
"text-completion-vertexai",
"-p",
url.pulsar,
"-k",
"/vertexai/private.json",
"-r",
"us-west1",
"-i",
"non-persistent://tg/request/text-completion-rag",
"-o",
"non-persistent://tg/response/text-completion-rag-response",
],
volumes: [
"./vertexai:/vertexai"
],
},
}
};
std.manifestYamlDoc(config)

View file

@ -0,0 +1,11 @@
local cassandra = import "cassandra.jsonnet";
local pulsar = import "pulsar.jsonnet";
local milvus = import "milvus.jsonnet";
local grafana = import "grafana.jsonnet";
local trustgraph = import "trustgraph.jsonnet";
local config = cassandra + pulsar + milvus + grafana + trustgraph;
std.manifestYamlDoc(config)

41
templates/grafana.jsonnet Normal file
View file

@ -0,0 +1,41 @@
local base = import "base.jsonnet";
local images = import "images.jsonnet";
{
volumes +: {
"prometheus-data": {},
"grafana-storage": {},
},
services +: {
prometheus: base + {
image: images.prometheus,
ports: [
"9090:9090",
],
volumes: [
"./prometheus:/etc/prometheus",
"prometheus-data:/prometheus",
],
},
grafana: base + {
image: images.grafana,
ports: [
"3000:3000",
],
volumes: [
"grafana-storage:/var/lib/grafana",
"./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml",
"./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml",
"./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json",
],
environment: {
// GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin",
// GF_AUTH_ANONYMOUS_ENABLED: "true",
// GF_ORG_ROLE: "Admin",
GF_ORG_NAME: "trustgraph.ai",
// GF_SERVER_ROOT_URL: "https://example.com",
},
},
},
}

12
templates/images.jsonnet Normal file
View file

@ -0,0 +1,12 @@
local version = import "version.jsonnet";
{
cassandra: "docker.io/cassandra:4.1.5",
pulsar: "docker.io/apachepulsar/pulsar:3.3.0",
pulsar_manager: "docker.io/apachepulsar/pulsar-manager:v0.3.0",
etcd: "quay.io/coreos/etcd:v3.5.5",
minio: "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z",
milvus: "docker.io/milvusdb/milvus:v2.4.5",
prometheus: "docker.io/prom/prometheus:v2.53.1",
grafana: "docker.io/grafana/grafana:10.0.0",
trustgraph: "docker.io/trustgraph/trustgraph-flow:" + version,
}

71
templates/milvus.jsonnet Normal file
View file

@ -0,0 +1,71 @@
local base = import "base.jsonnet";
local images = import "images.jsonnet";
{
volumes +: {
etcd: {},
"minio-data": {},
milvus: {},
},
services +: {
etcd: base + {
image: images.etcd,
command: [
"etcd",
"-advertise-client-urls=http://127.0.0.1:2379",
"-listen-client-urls",
"http://0.0.0.0:2379",
"--data-dir",
"/etcd",
],
environment: {
ETCD_AUTO_COMPACTION_MODE: "revision",
ETCD_AUTO_COMPACTION_RETENTION: "1000",
ETCD_QUOTA_BACKEND_BYTES: "4294967296",
ETCD_SNAPSHOT_COUNT: "50000"
},
ports: [
"2379:2379",
],
volumes: [
"etcd:/etcd"
],
},
minio: base + {
image: images.minio,
command: [
"minio",
"server",
"/minio_data",
"--console-address",
":9001",
],
environment: {
MINIO_ROOT_USER: "minioadmin",
MINIO_ROOT_PASSWORD: "minioadmin",
},
ports: [
"9001:9001",
],
volumes: [
"minio-data:/minio_data",
],
},
milvus: base + {
image: images.milvus,
command: [
"milvus", "run", "standalone"
],
environment: {
ETCD_ENDPOINTS: "etcd:2379",
MINIO_ADDRESS: "minio:9000",
},
ports: [
"9091:9091",
"19530:19530",
],
volumes: [
"milvus:/var/lib/milvus"
],
},
},
}

45
templates/pulsar.jsonnet Normal file
View file

@ -0,0 +1,45 @@
local base = import "base.jsonnet";
local images = import "images.jsonnet";
{
volumes +: {
"pulsar-conf": {},
"pulsar-data": {},
},
services +: {
pulsar: base + {
image: images.pulsar,
command: "bin/pulsar standalone",
ports: [
"6650:6650",
"8080:8080",
],
volumes: [
"pulsar-conf:/pulsar/conf",
"pulsar-data:/pulsar/data",
]
},
"init-pulsar": base + {
image: images.pulsar,
command: [
"sh",
"-c",
"pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response",
],
depends_on: {
pulsar: {
condition: "service_started",
}
},
},
"pulsar-manager": base + {
image: images.pulsar_manager,
ports: [
"9527:9527",
"7750:7750",
],
environment: {
SPRING_CONFIGURATION_FILE: "/pulsar-manager/pulsar-manager/application.properties",
},
},
}
}

View file

@ -0,0 +1,159 @@
local base = import "base.jsonnet";
local images = import "images.jsonnet";
local url = import "url.jsonnet";
local cassandra_hosts = "cassandra";
{
services +: {
"pdf-decoder": base + {
image: images.trustgraph,
command: [
"pdf-decoder",
"-p",
url.pulsar,
],
},
chunker: base + {
image: images.trustgraph,
command: [
"chunker-recursive",
"-p",
url.pulsar,
],
},
vectorize: base + {
image: images.trustgraph,
command: [
"embeddings-vectorize",
"-p",
url.pulsar,
],
},
embeddings: base + {
image: images.trustgraph,
command: [
"embeddings-hf",
"-p",
url.pulsar,
// "-m",
// "mixedbread-ai/mxbai-embed-large-v1",
],
},
"kg-extract-definitions": base + {
image: images.trustgraph,
command: [
"kg-extract-definitions",
"-p",
url.pulsar,
],
},
"kg-extract-relationships": base + {
image: images.trustgraph,
command: [
"kg-extract-relationships",
"-p",
url.pulsar,
],
},
"store-graph-embeddings": base + {
image: images.trustgraph,
command: [
"ge-write-milvus",
"-p",
url.pulsar,
"-t",
url.milvus,
],
},
"query-graph-embeddings": base + {
image: images.trustgraph,
command: [
"ge-query-milvus",
"-p",
url.pulsar,
"-t",
url.milvus,
],
},
"store-triples": base + {
image: images.trustgraph,
command: [
"triples-write-cassandra",
"-p",
url.pulsar,
"-g",
cassandra_hosts,
],
},
"query-triples": base + {
image: images.trustgraph,
command: [
"triples-query-cassandra",
"-p",
url.pulsar,
"-g",
cassandra_hosts,
],
},
"graph-rag": base + {
image: images.trustgraph,
command: [
"graph-rag",
"-p",
url.pulsar,
"--prompt-request-queue",
"non-persistent://tg/request/prompt-rag",
"--prompt-response-queue",
"non-persistent://tg/response/prompt-rag-response",
],
},
"prompt": base + {
image: images.trustgraph,
command: [
"prompt-generic",
"-p",
url.pulsar,
"--text-completion-request-queue",
"non-persistent://tg/request/text-completion",
"--text-completion-response-queue",
"non-persistent://tg/response/text-completion-response",
],
},
"prompt-rag": base + {
image: images.trustgraph,
command: [
"prompt-generic",
"-p",
url.pulsar,
"-i",
"non-persistent://tg/request/prompt-rag",
"-o",
"non-persistent://tg/response/prompt-rag-response",
"--text-completion-request-queue",
"non-persistent://tg/request/text-completion-rag",
"--text-completion-response-queue",
"non-persistent://tg/response/text-completion-rag-response",
],
},
},
}

4
templates/url.jsonnet Normal file
View file

@ -0,0 +1,4 @@
{
pulsar: "pulsar://pulsar:6650",
milvus: "http://milvus:19530",
}

22
tests/test-graph-embeddings Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env python3
import pulsar
from trustgraph.graph_embeddings_client import GraphEmbeddingsClient
from trustgraph.embeddings_client import EmbeddingsClient
ec = EmbeddingsClient(pulsar_host="pulsar://localhost:6650")
vectors = ec.request("What caused the space shuttle to explode?")
print(vectors)
llm = GraphEmbeddingsClient(pulsar_host="pulsar://localhost:6650")
limit=10
resp = llm.request(vectors, limit)
print("Response...")
for val in resp:
print(val.value)

View file

@ -6,8 +6,8 @@ from trustgraph.graph_rag_client import GraphRagClient
rag = GraphRagClient(pulsar_host="pulsar://localhost:6650")
query="""
Identify any facts which provide an explanation of the explosion of the
space shuttle rocket boosters"""
This knowledge graph describes the Space Shuttle disaster.
Present 20 facts which are present in the knowledge graph."""
resp = rag.request(query)

18
tests/test-lang-definition Executable file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env python3
import pulsar
from trustgraph.prompt_client import PromptClient
p = PromptClient(pulsar_host="pulsar://localhost:6650")
chunk = """I noticed a cat in my garden. It is a four-legged animal
which is a mammal and can be tame or wild. I wonder if it will be friends
with me. I think the cat's name is Fred and it has 4 legs"""
resp = p.request_definitions(
chunk=chunk,
)
for d in resp:
print(d.name, ":", d.definition)

72
tests/test-lang-kg-prompt Executable file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env python3
import pulsar
from trustgraph.prompt_client import PromptClient
p = PromptClient(pulsar_host="pulsar://localhost:6650")
facts = [
("accident", "evoked", "a wide range of deeply felt public responses"),
("Space Shuttle concept", "had", "genesis"),
("Commission", "had", "a mandate to develop recommendations for corrective or other action based upon the Commission's findings and determinations"),
("Commission", "established", "teams of persons"),
("Space Shuttle Challenger", "http://www.w3.org/2004/02/skos/core#definition", "A space shuttle that was destroyed in an accident during mission 51-L."),
("The mid fuselage", "contains", "the payload bay"),
("Volume I", "contains", "Chapter IX"),
("accident", "resulted in", "firm national resolve that those men and women be forever enshrined in the annals of American heroes"),
("Volume I", "contains", "Chapter IV"),
("Volume I", "contains", "Appendix A"),
("Volume I", "contains", "Appendix B"),
("Volume I", "contains", "The Staff"),
("Commission", "required", "detailed investigation"),
("Commission", "focused", "safety aspects of future flights"),
("Commission", "http://www.w3.org/2004/02/skos/core#definition", "An independent group appointed to investigate the Space Shuttle Challenger accident."),
("Commission", "moved forward with", "its investigation"),
("President", "appointed", "an independent Commission"),
("accident", "interrupted", "one of the most productive engineering, scientific and exploratory programs in history"),
("Volume I", "contains", "Preface"),
("Commission", "believes", "investigation"),
("Volume I", "contains", "Chapter I"),
("President", "was moved and troubled", "by this accident in a very personal way"),
("PRESIDENTIAL COMMISSION", "Report to", "President"),
("Volume I", "contains", "Chapter VI"),
("Commission", "held", "public hearings dealing with the facts leading up to the accident"),
("Volume I", "http://www.w3.org/2004/02/skos/core#definition", "The first volume of a multi-volume publication."),
("Space Shuttle Challenger", "was involved in", "an accident"),
("Volume I", "contains", "Chapter VII"),
("Volume I", "contains", "Chapter II"),
("Volume I", "contains", "Chapter V"),
("Commission", "believes", "its investigation and report have been responsive to the request of the President and hopes that they will serve the best interests of the nation in restoring the United States space program to its preeminent position in the world"),
("Commission", "supported", "panels"),
("Volume I", "contains", "Chapter VIII"),
("NASA", "cooperated", "Commission"),
("liquid oxygen tank", "contains", "oxidizer"),
("President", "http://www.w3.org/2004/02/skos/core#definition", "The head of state of the United States."),
("Volume I", "contains", "Chapter III"),
("Apollo lunar landing spacecraft", "had", "not yet flown"),
("Commission", "construe", "mandate"),
("accident", "became", "a milestone on the way to achieving the full potential that space offers to mankind"),
("Volume I", "contains", "The Commission"),
("Commission", "focused", "attention"),
("Commission", "learned", "lessons"),
("Commission", "required", "interfere with or supersede Congress"),
("Commission", "was made up of", "persons not connected with the mission"),
("Commission", "required", "review budgetary matters"),
("Space Shuttle", "became", "focus of NASA's near-term future"),
("Volume I", "contains", "Appendix C"),
("accident", "caused", "grief and sadness for the loss of seven brave members of the crew"),
("Commission", "http://www.w3.org/2004/02/skos/core#definition", "A group established to investigate the space shuttle accident"),
("Volume I", "contains", "Appendix D"),
("Commission", "had", "a mandate to review the circumstances surrounding the accident to establish the probable cause or causes of the accident"),
("Volume I", "contains", "Recommendations")
]
query="Present 20 facts which are present in the knowledge graph."
resp = p.request_kg_prompt(
query=query,
kg=facts,
)
print(resp)

21
tests/test-lang-relationships Executable file
View file

@ -0,0 +1,21 @@
#!/usr/bin/env python3
import pulsar
from trustgraph.prompt_client import PromptClient
p = PromptClient(pulsar_host="pulsar://localhost:6650")
chunk = """I noticed a cat in my garden. It is a four-legged animal
which is a mammal and can be tame or wild. I wonder if it will be friends
with me. I think the cat's name is Fred and it has 4 legs"""
resp = p.request_relationships(
chunk=chunk,
)
for d in resp:
print(d.s)
print(" ", d.p)
print(" ", d.o)
print(" ", d.o_entity)

View file

@ -2,7 +2,7 @@
from langchain_huggingface import HuggingFaceEmbeddings
from trustgraph.triple_vectors import TripleVectors
from trustgraph.direct.milvus import TripleVectors
client = TripleVectors()

70
tests/test-triples Executable file
View file

@ -0,0 +1,70 @@
#!/usr/bin/env python3
import pulsar
from trustgraph.triples_query_client import TriplesQueryClient
tq = TriplesQueryClient(pulsar_host="pulsar://localhost:6650")
e = "http://trustgraph.ai/e/shuttle"
limit=3
def dump(resp):
print("Response...")
for t in resp:
print(t.s.value, t.p.value, t.o.value)
print("-- * ---------------------------")
resp = tq.request(None, None, None, limit)
dump(resp)
print("-- s ---------------------------")
resp = tq.request("http://trustgraph.ai/e/shuttle", None, None, limit)
dump(resp)
print("-- p ---------------------------")
resp = tq.request(None, "http://trustgraph.ai/e/landed", None, limit)
dump(resp)
print("-- p ---------------------------")
resp = tq.request(None, None, "President", limit)
dump(resp)
print("-- sp ---------------------------")
resp = tq.request(
"http://trustgraph.ai/e/shuttle", "http://trustgraph.ai/e/landed", None,
limit
)
dump(resp)
print("-- so ---------------------------")
resp = tq.request(
"http://trustgraph.ai/e/shuttle", None, "the tower",
limit
)
dump(resp)
print("-- po ---------------------------")
resp = tq.request(
"http://trustgraph.ai/e/shuttle", "http://trustgraph.ai/e/landed",
None,
limit
)
dump(resp)
print("-- spo ---------------------------")
resp = tq.request(
"http://trustgraph.ai/e/shuttle", "http://trustgraph.ai/e/landed",
"on the concrete runway at Kennedy Space Center",
limit
)
dump(resp)

View file

View file

@ -67,7 +67,7 @@ class TrustGraph:
def get_s(self, s, limit=10):
return self.session.execute(
f"select p, o from triples where s = %s",
f"select p, o from triples where s = %s limit {limit}",
(s,)
)
@ -97,7 +97,7 @@ class TrustGraph:
def get_os(self, o, s, limit=10):
return self.session.execute(
f"select s from triples where o = %s and s = %s limit {limit}",
f"select p from triples where o = %s and s = %s limit {limit}",
(o, s)
)

View file

@ -9,10 +9,8 @@ import os
import argparse
import time
from .... trustgraph import TrustGraph
from .... schema import GraphEmbeddings
from .... schema import graph_embeddings_store_queue
from .... log_level import LogLevel
from .... base import Consumer
from . writer import ParquetWriter

View file

@ -9,10 +9,8 @@ import os
import argparse
import time
from .... trustgraph import TrustGraph
from .... schema import Triple
from .... schema import triples_store_queue
from .... log_level import LogLevel
from .... base import Consumer
from . writer import ParquetWriter

View file

@ -0,0 +1,89 @@
#!/usr/bin/env python3
import pulsar
import _pulsar
from pulsar.schema import JsonSchema
import hashlib
import uuid
from . schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from . schema import graph_embeddings_request_queue
from . schema import graph_embeddings_response_queue
# Ugly
ERROR=_pulsar.LoggerLevel.Error
WARN=_pulsar.LoggerLevel.Warn
INFO=_pulsar.LoggerLevel.Info
DEBUG=_pulsar.LoggerLevel.Debug
class GraphEmbeddingsClient:
def __init__(
self, log_level=ERROR,
subscriber=None,
input_queue=None,
output_queue=None,
pulsar_host="pulsar://pulsar:6650",
):
if input_queue == None:
input_queue = graph_embeddings_request_queue
if output_queue == None:
output_queue = graph_embeddings_response_queue
if subscriber == None:
subscriber = str(uuid.uuid4())
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level),
)
self.producer = self.client.create_producer(
topic=input_queue,
schema=JsonSchema(GraphEmbeddingsRequest),
chunking_enabled=True,
)
self.consumer = self.client.subscribe(
output_queue, subscriber,
schema=JsonSchema(GraphEmbeddingsResponse),
)
def request(self, vectors, limit=10, timeout=500):
id = str(uuid.uuid4())
r = GraphEmbeddingsRequest(
vectors=vectors,
limit=limit,
)
self.producer.send(r, properties={ "id": id })
while True:
msg = self.consumer.receive(timeout_millis=timeout * 1000)
mid = msg.properties()["id"]
if mid == id:
resp = msg.value().entities
self.consumer.acknowledge(msg)
return resp
# Ignore messages with wrong ID
self.consumer.acknowledge(msg)
def __del__(self):
if hasattr(self, "consumer"):
self.consumer.close()
if hasattr(self, "producer"):
self.producer.flush()
self.producer.close()
self.client.close()

View file

@ -1,11 +1,19 @@
from trustgraph.trustgraph import TrustGraph
from trustgraph.triple_vectors import TripleVectors
from trustgraph.trustgraph import TrustGraph
from trustgraph.llm_client import LlmClient
from trustgraph.embeddings_client import EmbeddingsClient
from . schema import text_completion_request_queue
from . schema import text_completion_response_queue
from . graph_embeddings_client import GraphEmbeddingsClient
from . triples_query_client import TriplesQueryClient
from . embeddings_client import EmbeddingsClient
from . prompt_client import PromptClient
from . schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
from . schema import TriplesQueryRequest, TriplesQueryResponse
from . schema import prompt_request_queue
from . schema import prompt_response_queue
from . schema import embeddings_request_queue
from . schema import embeddings_response_queue
from . schema import graph_embeddings_request_queue
from . schema import graph_embeddings_response_queue
from . schema import triples_request_queue
from . schema import triples_response_queue
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
@ -14,13 +22,15 @@ class GraphRag:
def __init__(
self,
graph_hosts=None,
pulsar_host="pulsar://pulsar:6650",
vector_store="http://milvus:19530",
completion_request_queue=None,
completion_response_queue=None,
pr_request_queue=None,
pr_response_queue=None,
emb_request_queue=None,
emb_response_queue=None,
ge_request_queue=None,
ge_response_queue=None,
tpl_request_queue=None,
tpl_response_queue=None,
verbose=False,
entity_limit=50,
triple_limit=30,
@ -30,25 +40,46 @@ class GraphRag:
self.verbose=verbose
if completion_request_queue == None:
completion_request_queue = text_completion_request_queue
if pr_request_queue is None:
pr_request_queue = prompt_request_queue
if completion_response_queue == None:
completion_response_queue = text_completion_response_queue
if pr_response_queue is None:
pr_response_queue = prompt_response_queue
if emb_request_queue == None:
if emb_request_queue is None:
emb_request_queue = embeddings_request_queue
if emb_response_queue == None:
if emb_response_queue is None:
emb_response_queue = embeddings_response_queue
if graph_hosts == None:
graph_hosts = ["cassandra"]
if ge_request_queue is None:
ge_request_queue = graph_embeddings_request_queue
if ge_response_queue is None:
ge_response_queue = graph_embeddings_response_queue
if tpl_request_queue is None:
tpl_request_queue = triples_request_queue
if tpl_response_queue is None:
tpl_response_queue = triples_response_queue
if self.verbose:
print("Initialising...", flush=True)
self.graph = TrustGraph(graph_hosts)
self.ge_client = GraphEmbeddingsClient(
pulsar_host=pulsar_host,
subscriber=module + "-ge",
input_queue=ge_request_queue,
output_queue=ge_response_queue,
)
self.triples_client = TriplesQueryClient(
pulsar_host=pulsar_host,
subscriber=module + "-tpl",
input_queue=tpl_request_queue,
output_queue=tpl_response_queue
)
self.embeddings = EmbeddingsClient(
pulsar_host=pulsar_host,
@ -57,19 +88,17 @@ class GraphRag:
subscriber=module + "-emb",
)
self.vecstore = TripleVectors(vector_store)
self.entity_limit=entity_limit
self.query_limit=triple_limit
self.max_subgraph_size=max_subgraph_size
self.label_cache = {}
self.llm = LlmClient(
self.lang = PromptClient(
pulsar_host=pulsar_host,
input_queue=completion_request_queue,
output_queue=completion_response_queue,
subscriber=module + "-llm",
input_queue=prompt_request_queue,
output_queue=prompt_response_queue,
subscriber=module + "-prompt",
)
if self.verbose:
@ -89,70 +118,43 @@ class GraphRag:
def get_entities(self, query):
everything = []
vectors = self.get_vector(query)
if self.verbose:
print("Get entities...", flush=True)
for vector in vectors:
entities = self.ge_client.request(
vectors, self.entity_limit
)
res = self.vecstore.search(
vector,
limit=self.entity_limit
)
print("Obtained", len(res), "entities")
entities = set([
item["entity"]["entity"]
for item in res
])
everything.extend(entities)
entities = [
e.value
for e in entities
]
if self.verbose:
print("Entities:", flush=True)
for ent in everything:
for ent in entities:
print(" ", ent, flush=True)
return everything
return entities
def maybe_label(self, e):
if e in self.label_cache:
return self.label_cache[e]
res = self.graph.get_sp(e, LABEL)
res = list(res)
res = self.triples_client.request(
e, LABEL, None, limit=1
)
if len(res) == 0:
self.label_cache[e] = e
return e
self.label_cache[e] = res[0][0]
self.label_cache[e] = res[0].o.value
return self.label_cache[e]
def get_nodes(self, query):
ents = self.get_entities(query)
if self.verbose:
print("Get labels...", flush=True)
nodes = [
self.maybe_label(e)
for e in ents
]
if self.verbose:
print("Nodes:", flush=True)
for node in nodes:
print(" ", node, flush=True)
return nodes
def get_subgraph(self, query):
entities = self.get_entities(query)
@ -164,17 +166,35 @@ class GraphRag:
for e in entities:
res = self.graph.get_s(e, limit=self.query_limit)
for p, o in res:
subgraph.add((e, p, o))
res = self.triples_client.request(
e, None, None,
limit=self.query_limit
)
res = self.graph.get_p(e, limit=self.query_limit)
for s, o in res:
subgraph.add((s, e, o))
for triple in res:
subgraph.add(
(triple.s.value, triple.p.value, triple.o.value)
)
res = self.graph.get_o(e, limit=self.query_limit)
for s, p in res:
subgraph.add((s, p, e))
res = self.triples_client.request(
None, e, None,
limit=self.query_limit
)
for triple in res:
subgraph.add(
(triple.s.value, triple.p.value, triple.o.value)
)
res = self.triples_client.request(
None, None, e,
limit=self.query_limit
)
for triple in res:
subgraph.add(
(triple.s.value, triple.p.value, triple.o.value)
)
subgraph = list(subgraph)
@ -209,47 +229,19 @@ class GraphRag:
return sg2
def get_cypher(self, query):
sg = self.get_labelgraph(query)
sg2 = []
for s, p, o in sg:
sg2.append(f"({s})-[{p}]->({o})")
kg = "\n".join(sg2)
kg = kg.replace("\\", "-")
return kg
def get_graph_prompt(self, query):
kg = self.get_cypher(query)
prompt=f"""Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.
Here's the knowledge statements:
{kg}
Use only the provided knowledge statements to respond to the following:
{query}
"""
return prompt
def query(self, query):
if self.verbose:
print("Construct prompt...", flush=True)
prompt = self.get_graph_prompt(query)
kg = self.get_labelgraph(query)
if self.verbose:
print("Invoke LLM...", flush=True)
print(kg)
print(query)
resp = self.llm.request(prompt)
resp = self.lang.request_kg_prompt(query, kg)
if self.verbose:
print("Done", flush=True)

View file

@ -9,11 +9,10 @@ import json
from ... schema import ChunkEmbeddings, Triple, Source, Value
from ... schema import chunk_embeddings_ingest_queue, triples_store_queue
from ... schema import text_completion_request_queue
from ... schema import text_completion_response_queue
from ... schema import prompt_request_queue
from ... schema import prompt_response_queue
from ... log_level import LogLevel
from ... llm_client import LlmClient
from ... prompts import to_definitions
from ... prompt_client import PromptClient
from ... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
from ... base import ConsumerProducer
@ -32,11 +31,11 @@ class Processor(ConsumerProducer):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
tc_request_queue = params.get(
"text_completion_request_queue", text_completion_request_queue
pr_request_queue = params.get(
"prompt_request_queue", prompt_request_queue
)
tc_response_queue = params.get(
"text_completion_response_queue", text_completion_response_queue
pr_response_queue = params.get(
"prompt_response_queue", prompt_response_queue
)
super(Processor, self).__init__(
@ -46,16 +45,16 @@ class Processor(ConsumerProducer):
"subscriber": subscriber,
"input_schema": ChunkEmbeddings,
"output_schema": Triple,
"text_completion_request_queue": tc_request_queue,
"text_completion_response_queue": tc_response_queue,
"prompt_request_queue": pr_request_queue,
"prompt_response_queue": pr_response_queue,
}
)
self.llm = LlmClient(
self.prompt = PromptClient(
pulsar_host=self.pulsar_host,
input_queue=tc_request_queue,
output_queue=tc_response_queue,
subscriber = module + "-llm",
input_queue=pr_request_queue,
output_queue=pr_response_queue,
subscriber = module + "-prompt",
)
def to_uri(self, text):
@ -68,12 +67,7 @@ class Processor(ConsumerProducer):
def get_definitions(self, chunk):
prompt = to_definitions(chunk)
resp = self.llm.request(prompt)
defs = json.loads(resp)
return defs
return self.prompt.request_definitions(chunk)
def emit_edge(self, s, p, o):
@ -90,14 +84,13 @@ class Processor(ConsumerProducer):
try:
defs = self.get_definitions(chunk)
print(json.dumps(defs, indent=4), flush=True)
for defn in defs:
s = defn["entity"]
s = defn.name
s_uri = self.to_uri(s)
o = defn["definition"]
o = defn.definition
if s == "": continue
if o == "": continue
@ -121,15 +114,15 @@ class Processor(ConsumerProducer):
)
parser.add_argument(
'--text-completion-request-queue',
default=text_completion_request_queue,
help=f'Text completion request queue (default: {text_completion_request_queue})',
'--prompt-request-queue',
default=prompt_request_queue,
help=f'Prompt request queue (default: {prompt_request_queue})',
)
parser.add_argument(
'--text-completion-response-queue',
default=text_completion_response_queue,
help=f'Text completion response queue (default: {text_completion_response_queue})',
'--prompt-completion-response-queue',
default=prompt_response_queue,
help=f'Prompt response queue (default: {prompt_response_queue})',
)
def run():

View file

@ -6,18 +6,16 @@ graph edges.
"""
import urllib.parse
import json
import os
from pulsar.schema import JsonSchema
from ... schema import ChunkEmbeddings, Triple, GraphEmbeddings, Source, Value
from ... schema import chunk_embeddings_ingest_queue, triples_store_queue
from ... schema import graph_embeddings_store_queue
from ... schema import text_completion_request_queue
from ... schema import text_completion_response_queue
from ... schema import prompt_request_queue
from ... schema import prompt_response_queue
from ... log_level import LogLevel
from ... llm_client import LlmClient
from ... prompts import to_relationships
from ... prompt_client import PromptClient
from ... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES
from ... base import ConsumerProducer
@ -38,11 +36,11 @@ class Processor(ConsumerProducer):
output_queue = params.get("output_queue", default_output_queue)
vector_queue = params.get("vector_queue", default_vector_queue)
subscriber = params.get("subscriber", default_subscriber)
tc_request_queue = params.get(
"text_completion_request_queue", text_completion_request_queue
pr_request_queue = params.get(
"prompt_request_queue", prompt_request_queue
)
tc_response_queue = params.get(
"text_completion_response_queue", text_completion_response_queue
pr_response_queue = params.get(
"prompt_response_queue", prompt_response_queue
)
super(Processor, self).__init__(
@ -52,8 +50,8 @@ class Processor(ConsumerProducer):
"subscriber": subscriber,
"input_schema": ChunkEmbeddings,
"output_schema": Triple,
"text_completion_request_queue": tc_request_queue,
"text_completion_response_queue": tc_response_queue,
"prompt_request_queue": pr_request_queue,
"prompt_response_queue": pr_response_queue,
}
)
@ -66,19 +64,19 @@ class Processor(ConsumerProducer):
"input_queue": input_queue,
"output_queue": output_queue,
"vector_queue": vector_queue,
"text_completion_request_queue": tc_request_queue,
"text_completion_response_queue": tc_response_queue,
"prompt_request_queue": pr_request_queue,
"prompt_response_queue": pr_response_queue,
"subscriber": subscriber,
"input_schema": ChunkEmbeddings.__name__,
"output_schema": Triple.__name__,
"vector_schema": GraphEmbeddings.__name__,
})
self.llm = LlmClient(
pulsar_host = self.pulsar_host,
input_queue=tc_request_queue,
output_queue=tc_response_queue,
subscriber = module + "-llm",
self.prompt = PromptClient(
pulsar_host=self.pulsar_host,
input_queue=pr_request_queue,
output_queue=pr_response_queue,
subscriber = module + "-prompt",
)
def to_uri(self, text):
@ -91,12 +89,7 @@ class Processor(ConsumerProducer):
def get_relationships(self, chunk):
prompt = to_relationships(chunk)
resp = self.llm.request(prompt)
rels = json.loads(resp)
return rels
return self.prompt.request_relationships(chunk)
def emit_edge(self, s, p, o):
@ -118,13 +111,12 @@ class Processor(ConsumerProducer):
try:
rels = self.get_relationships(chunk)
print(json.dumps(rels, indent=4), flush=True)
for rel in rels:
s = rel["subject"]
p = rel["predicate"]
o = rel["object"]
s = rel.s
p = rel.p
o = rel.o
if s == "": continue
if p == "": continue
@ -136,7 +128,7 @@ class Processor(ConsumerProducer):
p_uri = self.to_uri(p)
p_value = Value(value=str(p_uri), is_uri=True)
if rel["object-entity"]:
if rel.o_entity:
o_uri = self.to_uri(o)
o_value = Value(value=str(o_uri), is_uri=True)
else:
@ -162,7 +154,7 @@ class Processor(ConsumerProducer):
Value(value=str(p), is_uri=False)
)
if rel["object-entity"]:
if rel.o_entity:
# Label for o
self.emit_edge(
o_value,
@ -172,7 +164,7 @@ class Processor(ConsumerProducer):
self.emit_vec(s_value, v.vectors)
self.emit_vec(p_value, v.vectors)
if rel["object-entity"]:
if rel.o_entity:
self.emit_vec(o_value, v.vectors)
except Exception as e:
@ -195,15 +187,15 @@ class Processor(ConsumerProducer):
)
parser.add_argument(
'--text-completion-request-queue',
default=text_completion_request_queue,
help=f'Text completion request queue (default: {text_completion_request_queue})',
'--prompt-request-queue',
default=prompt_request_queue,
help=f'Prompt request queue (default: {prompt_request_queue})',
)
parser.add_argument(
'--text-completion-response-queue',
default=text_completion_response_queue,
help=f'Text completion response queue (default: {text_completion_response_queue})',
'--prompt-response-queue',
default=prompt_response_queue,
help=f'Prompt response queue (default: {prompt_response_queue})',
)
def run():

View file

View file

@ -0,0 +1,3 @@
from . service import *

View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
from . service import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,81 @@
def to_relationships(text):
prompt = f"""<instructions>
Study the following text and derive entity relationships. For each
relationship, derive the subject, predicate and object of the relationship.
Output relationships in JSON format as an arary of objects with fields:
- subject: the subject of the relationship
- predicate: the predicate
- object: the object of the relationship
- object-entity: false if the object is a simple data type: name, value or date. true if it is an entity.
</instructions>
<text>
{text}
</text>
<requirements>
You will respond only with raw JSON format data. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract must be written as plain text. Do not add markdown formatting
or headers or prefixes.
</requirements>"""
return prompt
def to_definitions(text):
prompt = f"""<instructions>
Study the following text and derive definitions for any discovered entities.
Do not provide definitions for entities whose definitions are incomplete
or unknown.
Output relationships in JSON format as an arary of objects with fields:
- entity: the name of the entity
- definition: English text which defines the entity
</instructions>
<text>
{text}
</text>
<requirements>
You will respond only with raw JSON format data. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract will be written as plain text. Do not add markdown formatting
or headers or prefixes. Do not include null or unknown definitions.
</requirements>"""
return prompt
def get_cypher(kg):
sg2 = []
for f in kg:
print(f)
sg2.append(f"({f.s})-[{f.p}]->({f.o})")
print(sg2)
kg = "\n".join(sg2)
kg = kg.replace("\\", "-")
return kg
def to_kg_query(query, kg):
cypher = get_cypher(kg)
prompt=f"""Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.
Here's the knowledge statements:
{cypher}
Use only the provided knowledge statements to respond to the following:
{query}
"""
return prompt

View file

@ -0,0 +1,195 @@
"""
Language service abstracts prompt engineering from LLM.
"""
import json
from .... schema import Definition, Relationship, Triple
from .... schema import PromptRequest, PromptResponse
from .... schema import TextCompletionRequest, TextCompletionResponse
from .... schema import text_completion_request_queue
from .... schema import text_completion_response_queue
from .... schema import prompt_request_queue, prompt_response_queue
from .... base import ConsumerProducer
from .... llm_client import LlmClient
from . prompts import to_definitions, to_relationships, to_kg_query
module = ".".join(__name__.split(".")[1:-1])
default_input_queue = prompt_request_queue
default_output_queue = prompt_response_queue
default_subscriber = module
class Processor(ConsumerProducer):
def __init__(self, **params):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
tc_request_queue = params.get(
"text_completion_request_queue", text_completion_request_queue
)
tc_response_queue = params.get(
"text_completion_response_queue", text_completion_response_queue
)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"output_queue": output_queue,
"subscriber": subscriber,
"input_schema": PromptRequest,
"output_schema": PromptResponse,
"text_completion_request_queue": tc_request_queue,
"text_completion_response_queue": tc_response_queue,
}
)
self.llm = LlmClient(
subscriber=subscriber,
input_queue=tc_request_queue,
output_queue=tc_response_queue,
pulsar_host = self.pulsar_host
)
def handle(self, msg):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
kind = v.kind
print(f"Handling kind {kind}...", flush=True)
if kind == "extract-definitions":
self.handle_extract_definitions(id, v)
return
elif kind == "extract-relationships":
self.handle_extract_relationships(id, v)
return
elif kind == "kg-prompt":
self.handle_kg_prompt(id, v)
return
else:
print("Invalid kind.", flush=True)
return
def handle_extract_definitions(self, id, v):
prompt = to_definitions(v.chunk)
print(prompt)
ans = self.llm.request(prompt)
print(ans)
defs = json.loads(ans)
output = []
for defn in defs:
try:
e = defn["entity"]
d = defn["definition"]
output.append(
Definition(
name=e, definition=d
)
)
except:
pass
print("Send response...", flush=True)
r = PromptResponse(definitions=output)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
def handle_extract_relationships(self, id, v):
prompt = to_relationships(v.chunk)
ans = self.llm.request(prompt)
defs = json.loads(ans)
output = []
for defn in defs:
try:
output.append(
Relationship(
s = defn["subject"],
p = defn["predicate"],
o = defn["object"],
o_entity = defn["object-entity"],
)
)
except Exception as e:
print(e)
print("Send response...", flush=True)
r = PromptResponse(relationships=output)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
def handle_kg_prompt(self, id, v):
prompt = to_kg_query(v.query, v.kg)
print(prompt)
ans = self.llm.request(prompt)
print(ans)
print("Send response...", flush=True)
r = PromptResponse(answer=ans)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'--text-completion-request-queue',
default=text_completion_request_queue,
help=f'Text completion request queue (default: {text_completion_request_queue})',
)
parser.add_argument(
'--text-completion-response-queue',
default=text_completion_response_queue,
help=f'Text completion response queue (default: {text_completion_response_queue})',
)
def run():
Processor.start(module, __doc__)

143
trustgraph/prompt_client.py Normal file
View file

@ -0,0 +1,143 @@
#!/usr/bin/env python3
import pulsar
import _pulsar
from pulsar.schema import JsonSchema
import hashlib
import uuid
from . schema import PromptRequest, PromptResponse, Fact
from . schema import prompt_request_queue
from . schema import prompt_response_queue
# Ugly
ERROR=_pulsar.LoggerLevel.Error
WARN=_pulsar.LoggerLevel.Warn
INFO=_pulsar.LoggerLevel.Info
DEBUG=_pulsar.LoggerLevel.Debug
class PromptClient:
def __init__(
self, log_level=ERROR,
subscriber=None,
input_queue=None,
output_queue=None,
pulsar_host="pulsar://pulsar:6650",
):
if input_queue == None:
input_queue = prompt_request_queue
if output_queue == None:
output_queue = prompt_response_queue
if subscriber == None:
subscriber = str(uuid.uuid4())
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level),
)
self.producer = self.client.create_producer(
topic=input_queue,
schema=JsonSchema(PromptRequest),
chunking_enabled=True,
)
self.consumer = self.client.subscribe(
output_queue, subscriber,
schema=JsonSchema(PromptResponse),
)
def request_definitions(self, chunk, timeout=500):
id = str(uuid.uuid4())
r = PromptRequest(
kind="extract-definitions",
chunk=chunk,
)
self.producer.send(r, properties={ "id": id })
while True:
msg = self.consumer.receive(timeout_millis=timeout * 1000)
mid = msg.properties()["id"]
if mid == id:
resp = msg.value().definitions
self.consumer.acknowledge(msg)
return resp
# Ignore messages with wrong ID
self.consumer.acknowledge(msg)
def request_relationships(self, chunk, timeout=500):
id = str(uuid.uuid4())
r = PromptRequest(
kind="extract-relationships",
chunk=chunk,
)
self.producer.send(r, properties={ "id": id })
while True:
msg = self.consumer.receive(timeout_millis=timeout * 1000)
mid = msg.properties()["id"]
if mid == id:
resp = msg.value().relationships
self.consumer.acknowledge(msg)
return resp
# Ignore messages with wrong ID
self.consumer.acknowledge(msg)
def request_kg_prompt(self, query, kg, timeout=500):
id = str(uuid.uuid4())
r = PromptRequest(
kind="kg-prompt",
query=query,
kg=[
Fact(s=v[0], p=v[1], o=v[2])
for v in kg
],
)
self.producer.send(r, properties={ "id": id })
while True:
msg = self.consumer.receive(timeout_millis=timeout * 1000)
mid = msg.properties()["id"]
if mid == id:
resp = msg.value().answer
self.consumer.acknowledge(msg)
return resp
# Ignore messages with wrong ID
self.consumer.acknowledge(msg)
def __del__(self):
if hasattr(self, "consumer"):
self.consumer.close()
if hasattr(self, "producer"):
self.producer.flush()
self.producer.close()
self.client.close()

View file

@ -1,138 +0,0 @@
def turtle_extract(text):
prompt = f"""<instructions>
Study the following text and extract knowledge as
information in Turtle RDF format.
When declaring any new URIs, use <https://trustgraph.ai/e#> prefix,
and declare appropriate namespace tags.
</instructions>
<text>
{text}
</text>
<requirements>
Do not use placeholders for information you do not know.
You will respond only with raw Turtle RDF data. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract must be written as plain text. Do not add markdown formatting.
</requirements>"""
return prompt
def scholar(text):
# Build the prompt for Article style extraction
jsonexample = """{
"title": "Article title here",
"abstract": "Abstract text here",
"keywords": ["keyword1", "keyword2", "keyword3"],
"people": ["person1", "person2", "person3"]
}"""
promptscholar = f"""Your task is to read the provided text and write a scholarly abstract to fully explain all of the concepts described in the provided text. The abstract must include all conceptual details.
<text>
{text}
</text>
<instructions>
- Structure: For the provided text, write a title, abstract, keywords,
and people for the concepts found in the provided text. Ignore
document formatting in the provided text such as table of contents,
headers, footers, section metadata, and URLs.
- Focus on Concepts The abstract must focus on concepts found in the
provided text. The abstract must be factually accurate. Do not
write any concepts not found in the provided text. Do not
speculate. Do not omit any conceptual details.
- Completeness: The abstract must capture all topics the reader will
need to understand the concepts found in the provided text. Describe
all terms, definitions, entities, people, events, concepts,
conceptual relationships, and any other topics necessary for the
reader to understand the concepts of the provided text.
- Format: Respond in the form of a valid JSON object.
</instructions>
<example>
{jsonexample}
</example>
<requirements>
You will respond only with the JSON object. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract must be written as plain text.
</requirements>"""
return promptscholar
def to_json_ld(text):
prompt = f"""<instructions>
Study the following text and output any facts you discover in
well-structured JSON-LD format.
Use any schema you understand from schema.org to describe the facts.
</instructions>
<text>
{text}
</text>
<requirements>
You will respond only with raw JSON-LD data in JSON format. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract must be written as plain text. Do not add markdown formatting
or headers or prefixes. Do not use information which is not present in
the input text.
</requirements>"""
return prompt
def to_relationships(text):
prompt = f"""<instructions>
Study the following text and derive entity relationships. For each
relationship, derive the subject, predicate and object of the relationship.
Output relationships in JSON format as an arary of objects with fields:
- subject: the subject of the relationship
- predicate: the predicate
- object: the object of the relationship
- object-entity: false if the object is a simple data type: name, value or date. true if it is an entity.
</instructions>
<text>
{text}
</text>
<requirements>
You will respond only with raw JSON format data. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract must be written as plain text. Do not add markdown formatting
or headers or prefixes.
</requirements>"""
return prompt
def to_definitions(text):
prompt = f"""<instructions>
Study the following text and derive definitions for any discovered entities.
Do not provide definitions for entities whose definitions are incomplete
or unknown.
Output relationships in JSON format as an arary of objects with fields:
- entity: the name of the entity
- definition: English text which defines the entity
</instructions>
<text>
{text}
</text>
<requirements>
You will respond only with raw JSON format data. Do not provide
explanations. Do not use special characters in the abstract text. The
abstract will be written as plain text. Do not add markdown formatting
or headers or prefixes. Do not include null or unknown definitions.
</requirements>"""
return prompt

View file

View file

@ -0,0 +1,3 @@
from . service import *

View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
from . hf import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,100 @@
"""
Graph embeddings query service. Input is vector, output is list of
entities
"""
from .... direct.milvus import TripleVectors
from .... schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, Value
from .... schema import graph_embeddings_request_queue
from .... schema import graph_embeddings_response_queue
from .... base import ConsumerProducer
module = ".".join(__name__.split(".")[1:-1])
default_input_queue = graph_embeddings_request_queue
default_output_queue = graph_embeddings_response_queue
default_subscriber = module
default_store_uri = 'http://localhost:19530'
class Processor(ConsumerProducer):
def __init__(self, **params):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
store_uri = params.get("store_uri", default_store_uri)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"output_queue": output_queue,
"subscriber": subscriber,
"input_schema": GraphEmbeddingsRequest,
"output_schema": GraphEmbeddingsResponse,
"store_uri": store_uri,
}
)
self.vecstore = TripleVectors(store_uri)
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
else:
return Value(value=ent, is_uri=False)
def handle(self, msg):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
entities = set()
for vec in v.vectors:
resp = self.vecstore.search(vec, limit=v.limit)
for r in resp:
ent = r["entity"]["entity"]
entities.add(ent)
# Convert set to list
entities = list(entities)
ents2 = []
for ent in entities:
ents2.append(self.create_value(ent))
entities = ents2
print("Send response...", flush=True)
r = GraphEmbeddingsResponse(entities=entities)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'-t', '--store-uri',
default=default_store_uri,
help=f'Milvus store URI (default: {default_store_uri})'
)
def run():
Processor.start(module, __doc__)

View file

View file

@ -0,0 +1,3 @@
from . service import *

View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
from . hf import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,153 @@
"""
Triples query service. Input is a (s, p, o) triple, some values may be
null. Output is a list of triples.
"""
from .... direct.cassandra import TrustGraph
from .... schema import TriplesQueryRequest, TriplesQueryResponse
from .... schema import Value, Triple
from .... schema import triples_request_queue
from .... schema import triples_response_queue
from .... base import ConsumerProducer
module = ".".join(__name__.split(".")[1:-1])
default_input_queue = triples_request_queue
default_output_queue = triples_response_queue
default_subscriber = module
default_graph_host='localhost'
class Processor(ConsumerProducer):
def __init__(self, **params):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
graph_host = params.get("graph_host", default_graph_host)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"output_queue": output_queue,
"subscriber": subscriber,
"input_schema": TriplesQueryRequest,
"output_schema": TriplesQueryResponse,
"graph_host": graph_host,
}
)
self.tg = TrustGraph([graph_host])
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
else:
return Value(value=ent, is_uri=False)
def handle(self, msg):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
triples = []
if v.s is not None:
if v.p is not None:
if v.o is not None:
resp = self.tg.get_spo(
v.s.value, v.p.value, v.o.value,
limit=v.limit
)
triples.append((v.s.value, v.p.value, v.o.value))
else:
resp = self.tg.get_sp(
v.s.value, v.p.value,
limit=v.limit
)
for t in resp:
triples.append((v.s.value, v.p.value, t.o))
else:
if v.o is not None:
resp = self.tg.get_os(
v.o.value, v.s.value,
limit=v.limit
)
for t in resp:
triples.append((v.s.value, t.p, v.o.value))
else:
resp = self.tg.get_s(
v.s.value,
limit=v.limit
)
for t in resp:
triples.append((v.s.value, t.p, t.o))
else:
if v.p is not None:
if v.o is not None:
resp = self.tg.get_po(
v.p.value, v.o.value,
limit=v.limit
)
for t in resp:
triples.append((t.s, v.p.value, v.o.value))
else:
resp = self.tg.get_p(
v.p.value,
limit=v.limit
)
for t in resp:
triples.append((t.s, v.p.value, t.o))
else:
if v.o is not None:
resp = self.tg.get_o(
v.o.value,
limit=v.limit
)
for t in resp:
triples.append((t.s, t.p, v.o.value))
else:
resp = self.tg.get_all(
limit=v.limit
)
for t in resp:
triples.append((t.s, t.p, t.o))
triples = [
Triple(
s=self.create_value(t[0]),
p=self.create_value(t[1]),
o=self.create_value(t[2])
)
for t in triples
]
print("Send response...", flush=True)
r = TriplesQueryResponse(triples=triples)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'-g', '--graph-host',
default="localhost",
help=f'Graph host (default: localhost)'
)
def run():
Processor.start(module, __doc__)

View file

@ -6,10 +6,14 @@ Input is query, output is response.
from ... schema import GraphRagQuery, GraphRagResponse
from ... schema import graph_rag_request_queue, graph_rag_response_queue
from ... schema import text_completion_request_queue
from ... schema import text_completion_response_queue
from ... schema import prompt_request_queue
from ... schema import prompt_response_queue
from ... schema import embeddings_request_queue
from ... schema import embeddings_response_queue
from ... schema import graph_embeddings_request_queue
from ... schema import graph_embeddings_response_queue
from ... schema import triples_request_queue
from ... schema import triples_response_queue
from ... log_level import LogLevel
from ... graph_rag import GraphRag
from ... base import ConsumerProducer
@ -19,8 +23,6 @@ module = ".".join(__name__.split(".")[1:-1])
default_input_queue = graph_rag_request_queue
default_output_queue = graph_rag_response_queue
default_subscriber = module
default_graph_hosts = 'localhost'
default_vector_store = 'http://localhost:19530'
class Processor(ConsumerProducer):
@ -29,16 +31,14 @@ class Processor(ConsumerProducer):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
graph_hosts = params.get("graph_hosts", default_graph_hosts)
vector_store = params.get("vector_store", default_vector_store)
entity_limit = params.get("entity_limit", 50)
triple_limit = params.get("triple_limit", 30)
max_subgraph_size = params.get("max_subgraph_size", 3000)
tc_request_queue = params.get(
"text_completion_request_queue", text_completion_request_queue
pr_request_queue = params.get(
"prompt_request_queue", prompt_request_queue
)
tc_response_queue = params.get(
"text_completion_response_queue", text_completion_response_queue
pr_response_queue = params.get(
"prompt_response_queue", prompt_response_queue
)
emb_request_queue = params.get(
"embeddings_request_queue", embeddings_request_queue
@ -46,6 +46,18 @@ class Processor(ConsumerProducer):
emb_response_queue = params.get(
"embeddings_response_queue", embeddings_response_queue
)
ge_request_queue = params.get(
"graph_embeddings_request_queue", graph_embeddings_request_queue
)
ge_response_queue = params.get(
"graph_embeddings_response_queue", graph_embeddings_response_queue
)
tpl_request_queue = params.get(
"triples_request_queue", triples_request_queue
)
tpl_response_queue = params.get(
"triples_response_queue", triples_response_queue
)
super(Processor, self).__init__(
**params | {
@ -57,21 +69,27 @@ class Processor(ConsumerProducer):
"entity_limit": entity_limit,
"triple_limit": triple_limit,
"max_subgraph_size": max_subgraph_size,
"text_completion_request_queue": tc_request_queue,
"text_completion_response_queue": tc_response_queue,
"prompt_request_queue": pr_request_queue,
"prompt_response_queue": pr_response_queue,
"embeddings_request_queue": emb_request_queue,
"embeddings_response_queue": emb_response_queue,
"graph_embeddings_request_queue": ge_request_queue,
"graph_embeddings_response_queue": ge_response_queue,
"triples_request_queue": triples_request_queue,
"triples_response_queue": triples_response_queue,
}
)
self.rag = GraphRag(
pulsar_host=self.pulsar_host,
graph_hosts=graph_hosts.split(","),
completion_request_queue=tc_request_queue,
completion_response_queue=tc_response_queue,
pr_request_queue=pr_request_queue,
pr_response_queue=pr_response_queue,
emb_request_queue=emb_request_queue,
emb_response_queue=emb_response_queue,
vector_store=vector_store,
ge_request_queue=ge_request_queue,
ge_response_queue=ge_response_queue,
tpl_request_queue=triples_request_queue,
tpl_response_queue=triples_response_queue,
verbose=True,
entity_limit=entity_limit,
triple_limit=triple_limit,
@ -139,15 +157,15 @@ class Processor(ConsumerProducer):
)
parser.add_argument(
'--text-completion-request-queue',
default=text_completion_request_queue,
help=f'Text completion request queue (default: {text_completion_request_queue})',
'--prompt-request-queue',
default=prompt_request_queue,
help=f'Prompt request queue (default: {prompt_request_queue})',
)
parser.add_argument(
'--text-completion-response-queue',
default=text_completion_response_queue,
help=f'Text completion response queue (default: {text_completion_response_queue})',
'--prompt-response-queue',
default=prompt_response_queue,
help=f'Prompt response queue (default: {prompt_response_queue})',
)
parser.add_argument(
@ -159,7 +177,31 @@ class Processor(ConsumerProducer):
parser.add_argument(
'--embeddings-response-queue',
default=embeddings_response_queue,
help=f'Embeddings request queue (default: {embeddings_response_queue})',
help=f'Embeddings response queue (default: {embeddings_response_queue})',
)
parser.add_argument(
'--graph-embeddings-request-queue',
default=graph_embeddings_request_queue,
help=f'Graph embeddings request queue (default: {graph_embeddings_request_queue})',
)
parser.add_argument(
'--graph_embeddings-response-queue',
default=graph_embeddings_response_queue,
help=f'Graph embeddings response queue (default: {graph_embeddings_response_queue})',
)
parser.add_argument(
'--triples-request-queue',
default=triples_request_queue,
help=f'Triples request queue (default: {triples_request_queue})',
)
parser.add_argument(
'--triples-response-queue',
default=triples_response_queue,
help=f'Triples response queue (default: {triples_response_queue})',
)
def run():

View file

@ -71,6 +71,24 @@ graph_embeddings_store_queue = topic('graph-embeddings-store')
############################################################################
# Graph embeddings query
class GraphEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
class GraphEmbeddingsResponse(Record):
entities = Array(Value())
graph_embeddings_request_queue = topic(
'graph-embeddings', kind='non-persistent', namespace='request'
)
graph_embeddings_response_queue = topic(
'graph-embeddings-response', kind='non-persistent', namespace='response',
)
############################################################################
# Graph triples
class Triple(Record):
@ -83,6 +101,26 @@ triples_store_queue = topic('triples-store')
############################################################################
# Triples query
class TriplesQueryRequest(Record):
s = Value()
p = Value()
o = Value()
limit = Integer()
class TriplesQueryResponse(Record):
triples = Array(Triple())
triples_request_queue = topic(
'triples', kind='non-persistent', namespace='request'
)
triples_response_queue = topic(
'triples-response', kind='non-persistent', namespace='response',
)
############################################################################
# chunk_embeddings_store_queue = topic('chunk-embeddings-store')
############################################################################
@ -138,3 +176,47 @@ graph_rag_response_queue = topic(
############################################################################
# Prompt services, abstract the prompt generation
class Definition(Record):
name = String()
definition = String()
class Relationship(Record):
s = String()
p = String()
o = String()
o_entity = Boolean()
class Fact(Record):
s = String()
p = String()
o = String()
# extract-definitions:
# chunk -> definitions
# extract-relationships:
# chunk -> relationships
# prompt-rag:
# query, triples -> answer
class PromptRequest(Record):
kind = String()
chunk = String()
query = String()
kg = Array(Fact())
class PromptResponse(Record):
answer = String()
definitions = Array(Definition())
relationships = Array(Relationship())
prompt_request_queue = topic(
'prompt', kind='non-persistent', namespace='request'
)
prompt_response_queue = topic(
'prompt-response', kind='non-persistent', namespace='response'
)
############################################################################

View file

@ -6,7 +6,7 @@ Accepts entity/vector pairs and writes them to a Milvus store.
from .... schema import GraphEmbeddings
from .... schema import graph_embeddings_store_queue
from .... log_level import LogLevel
from .... triple_vectors import TripleVectors
from .... direct.milvus import TripleVectors
from .... base import Consumer
module = ".".join(__name__.split(".")[1:-1])
@ -51,8 +51,8 @@ class Processor(Consumer):
parser.add_argument(
'-t', '--store-uri',
default="http://milvus:19530",
help=f'Milvus store URI (default: http://milvus:19530)'
default=default_store_uri,
help=f'Milvus store URI (default: {default_store_uri})'
)
def run():

View file

@ -9,7 +9,7 @@ import os
import argparse
import time
from .... trustgraph import TrustGraph
from .... direct.cassandra import TrustGraph
from .... schema import Triple
from .... schema import triples_store_queue
from .... log_level import LogLevel
@ -34,6 +34,7 @@ class Processor(Consumer):
"input_queue": input_queue,
"subscriber": subscriber,
"input_schema": Triple,
"graph_host": graph_host,
}
)

View file

@ -0,0 +1,100 @@
#!/usr/bin/env python3
import pulsar
import _pulsar
from pulsar.schema import JsonSchema
import hashlib
import uuid
from . schema import TriplesQueryRequest, TriplesQueryResponse, Value
from . schema import triples_request_queue
from . schema import triples_response_queue
# Ugly
ERROR=_pulsar.LoggerLevel.Error
WARN=_pulsar.LoggerLevel.Warn
INFO=_pulsar.LoggerLevel.Info
DEBUG=_pulsar.LoggerLevel.Debug
class TriplesQueryClient:
def __init__(
self, log_level=ERROR,
subscriber=None,
input_queue=None,
output_queue=None,
pulsar_host="pulsar://pulsar:6650",
):
if input_queue == None:
input_queue = triples_request_queue
if output_queue == None:
output_queue = triples_response_queue
if subscriber == None:
subscriber = str(uuid.uuid4())
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level),
)
self.producer = self.client.create_producer(
topic=input_queue,
schema=JsonSchema(TriplesQueryRequest),
chunking_enabled=True,
)
self.consumer = self.client.subscribe(
output_queue, subscriber,
schema=JsonSchema(TriplesQueryResponse),
)
def create_value(self, ent):
if ent == None: return None
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Value(value=ent, is_uri=False)
def request(self, s, p, o, limit=10, timeout=500):
id = str(uuid.uuid4())
r = TriplesQueryRequest(
s=self.create_value(s),
p=self.create_value(p),
o=self.create_value(o),
limit=limit,
)
self.producer.send(r, properties={ "id": id })
while True:
msg = self.consumer.receive(timeout_millis=timeout * 1000)
mid = msg.properties()["id"]
if mid == id:
resp = msg.value().triples
self.consumer.acknowledge(msg)
return resp
# Ignore messages with wrong ID
self.consumer.acknowledge(msg)
def __del__(self):
if hasattr(self, "consumer"):
self.consumer.close()
if hasattr(self, "producer"):
self.producer.flush()
self.producer.close()
self.client.close()