mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Add a docker-compose for just the stores (#13)
* - Added docker-compose-storage.yaml, just the infrastructure bits - Tidied storage invocation * Util, sits on chunker output and reports histogram of chunk sizes
This commit is contained in:
parent
b0fdb4f314
commit
0e2db095e3
12 changed files with 391 additions and 121 deletions
6
Makefile
6
Makefile
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
# VERSION=$(shell git describe | sed 's/^v//')
|
||||
VERSION=0.6.6
|
||||
VERSION=0.6.7
|
||||
|
||||
DOCKER=podman
|
||||
|
||||
|
|
@ -33,7 +33,9 @@ set-version:
|
|||
# sed -i 's/trustgraph-flow:[0-9]*\.[0-9]*\.[0-9]*/trustgraph-flow:'${VERSION}'/' docker-compose*.yaml
|
||||
echo '"${VERSION}"' > templates/components/version.jsonnet
|
||||
|
||||
TEMPLATES=azure bedrock claude cohere mix ollama openai vertexai openai-neo4j
|
||||
TEMPLATES=azure bedrock claude cohere mix ollama openai vertexai \
|
||||
openai-neo4j storage
|
||||
|
||||
DCS=$(foreach template,${TEMPLATES},${template:%=docker-compose-%.yaml})
|
||||
|
||||
update-templates: set-version ${DCS}
|
||||
|
|
|
|||
115
docker-compose-storage.yaml
Normal file
115
docker-compose-storage.yaml
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"services":
|
||||
"cassandra":
|
||||
"image": "docker.io/cassandra:4.1.5"
|
||||
"ports":
|
||||
- "9042:9042"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "cassandra:/var/lib/cassandra"
|
||||
"etcd":
|
||||
"command":
|
||||
- "etcd"
|
||||
- "-advertise-client-urls=http://127.0.0.1:2379"
|
||||
- "-listen-client-urls"
|
||||
- "http://0.0.0.0:2379"
|
||||
- "--data-dir"
|
||||
- "/etcd"
|
||||
"environment":
|
||||
"ETCD_AUTO_COMPACTION_MODE": "revision"
|
||||
"ETCD_AUTO_COMPACTION_RETENTION": "1000"
|
||||
"ETCD_QUOTA_BACKEND_BYTES": "4294967296"
|
||||
"ETCD_SNAPSHOT_COUNT": "50000"
|
||||
"image": "quay.io/coreos/etcd:v3.5.5"
|
||||
"ports":
|
||||
- "2379:2379"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "etcd:/etcd"
|
||||
"grafana":
|
||||
"environment":
|
||||
"GF_ORG_NAME": "trustgraph.ai"
|
||||
"image": "docker.io/grafana/grafana:10.0.0"
|
||||
"ports":
|
||||
- "3000:3000"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "grafana-storage:/var/lib/grafana"
|
||||
- "./grafana/dashboard.yml:/etc/grafana/provisioning/dashboards/dashboard.yml"
|
||||
- "./grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml"
|
||||
- "./grafana/dashboard.json:/var/lib/grafana/dashboards/dashboard.json"
|
||||
"init-pulsar":
|
||||
"command":
|
||||
- "sh"
|
||||
- "-c"
|
||||
- "pulsar-admin --admin-url http://pulsar:8080 tenants create tg && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/flow && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/request && pulsar-admin --admin-url http://pulsar:8080 namespaces create tg/response && pulsar-admin --admin-url http://pulsar:8080 namespaces set-retention --size -1 --time 3m tg/response"
|
||||
"depends_on":
|
||||
"pulsar":
|
||||
"condition": "service_started"
|
||||
"image": "docker.io/apachepulsar/pulsar:3.3.0"
|
||||
"restart": "on-failure:100"
|
||||
"milvus":
|
||||
"command":
|
||||
- "milvus"
|
||||
- "run"
|
||||
- "standalone"
|
||||
"environment":
|
||||
"ETCD_ENDPOINTS": "etcd:2379"
|
||||
"MINIO_ADDRESS": "minio:9000"
|
||||
"image": "docker.io/milvusdb/milvus:v2.4.5"
|
||||
"ports":
|
||||
- "9091:9091"
|
||||
- "19530:19530"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "milvus:/var/lib/milvus"
|
||||
"minio":
|
||||
"command":
|
||||
- "minio"
|
||||
- "server"
|
||||
- "/minio_data"
|
||||
- "--console-address"
|
||||
- ":9001"
|
||||
"environment":
|
||||
"MINIO_ROOT_PASSWORD": "minioadmin"
|
||||
"MINIO_ROOT_USER": "minioadmin"
|
||||
"image": "docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z"
|
||||
"ports":
|
||||
- "9001:9001"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "minio-data:/minio_data"
|
||||
"prometheus":
|
||||
"image": "docker.io/prom/prometheus:v2.53.1"
|
||||
"ports":
|
||||
- "9090:9090"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "./prometheus:/etc/prometheus"
|
||||
- "prometheus-data:/prometheus"
|
||||
"pulsar":
|
||||
"command": "bin/pulsar standalone"
|
||||
"image": "docker.io/apachepulsar/pulsar:3.3.0"
|
||||
"ports":
|
||||
- "6650:6650"
|
||||
- "8080:8080"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
- "pulsar-conf:/pulsar/conf"
|
||||
- "pulsar-data:/pulsar/data"
|
||||
"pulsar-manager":
|
||||
"environment":
|
||||
"SPRING_CONFIGURATION_FILE": "/pulsar-manager/pulsar-manager/application.properties"
|
||||
"image": "docker.io/apachepulsar/pulsar-manager:v0.3.0"
|
||||
"ports":
|
||||
- "9527:9527"
|
||||
- "7750:7750"
|
||||
"restart": "on-failure:100"
|
||||
"volumes":
|
||||
"cassandra": {}
|
||||
"etcd": {}
|
||||
"grafana-storage": {}
|
||||
"milvus": {}
|
||||
"minio-data": {}
|
||||
"prometheus-data": {}
|
||||
"pulsar-conf": {}
|
||||
"pulsar-data": {}
|
||||
2
setup.py
2
setup.py
|
|
@ -4,7 +4,7 @@ import os
|
|||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
version = "0.6.6"
|
||||
version = "0.6.7"
|
||||
|
||||
setuptools.setup(
|
||||
name="trustgraph",
|
||||
|
|
|
|||
|
|
@ -2,21 +2,11 @@ local base = import "base.jsonnet";
|
|||
local images = import "images.jsonnet";
|
||||
local url = import "url.jsonnet";
|
||||
local cassandra_hosts = "cassandra";
|
||||
{
|
||||
volumes +: {
|
||||
cassandra: {},
|
||||
},
|
||||
services +: {
|
||||
local cassandra = import "stores/cassandra.jsonnet";
|
||||
|
||||
cassandra: base + {
|
||||
image: images.cassandra,
|
||||
ports: [
|
||||
"9042:9042"
|
||||
],
|
||||
volumes: [
|
||||
"cassandra:/var/lib/cassandra"
|
||||
],
|
||||
},
|
||||
cassandra + {
|
||||
|
||||
services +: {
|
||||
|
||||
"store-triples": base + {
|
||||
image: images.trustgraph,
|
||||
|
|
|
|||
|
|
@ -1,71 +1,36 @@
|
|||
local base = import "base.jsonnet";
|
||||
local images = import "images.jsonnet";
|
||||
{
|
||||
volumes +: {
|
||||
etcd: {},
|
||||
"minio-data": {},
|
||||
milvus: {},
|
||||
},
|
||||
local url = import "url.jsonnet";
|
||||
local milvus = import "stores/milvus.jsonnet";
|
||||
|
||||
milvus + {
|
||||
|
||||
services +: {
|
||||
etcd: base + {
|
||||
image: images.etcd,
|
||||
|
||||
"store-graph-embeddings": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
"etcd",
|
||||
"-advertise-client-urls=http://127.0.0.1:2379",
|
||||
"-listen-client-urls",
|
||||
"http://0.0.0.0:2379",
|
||||
"--data-dir",
|
||||
"/etcd",
|
||||
],
|
||||
environment: {
|
||||
ETCD_AUTO_COMPACTION_MODE: "revision",
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000",
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296",
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
},
|
||||
ports: [
|
||||
"2379:2379",
|
||||
],
|
||||
volumes: [
|
||||
"etcd:/etcd"
|
||||
"ge-write-milvus",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-t",
|
||||
url.milvus,
|
||||
],
|
||||
},
|
||||
minio: base + {
|
||||
image: images.minio,
|
||||
|
||||
"query-graph-embeddings": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
"minio",
|
||||
"server",
|
||||
"/minio_data",
|
||||
"--console-address",
|
||||
":9001",
|
||||
],
|
||||
environment: {
|
||||
MINIO_ROOT_USER: "minioadmin",
|
||||
MINIO_ROOT_PASSWORD: "minioadmin",
|
||||
},
|
||||
ports: [
|
||||
"9001:9001",
|
||||
],
|
||||
volumes: [
|
||||
"minio-data:/minio_data",
|
||||
],
|
||||
},
|
||||
milvus: base + {
|
||||
image: images.milvus,
|
||||
command: [
|
||||
"milvus", "run", "standalone"
|
||||
],
|
||||
environment: {
|
||||
ETCD_ENDPOINTS: "etcd:2379",
|
||||
MINIO_ADDRESS: "minio:9000",
|
||||
},
|
||||
ports: [
|
||||
"9091:9091",
|
||||
"19530:19530",
|
||||
],
|
||||
volumes: [
|
||||
"milvus:/var/lib/milvus"
|
||||
"ge-query-milvus",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-t",
|
||||
url.milvus,
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +1,12 @@
|
|||
local base = import "base.jsonnet";
|
||||
local images = import "images.jsonnet";
|
||||
local url = import "url.jsonnet";
|
||||
{
|
||||
local neo4j = import "stores/neo4j.jsonnet";
|
||||
|
||||
volumes +: {
|
||||
neo4j: {},
|
||||
},
|
||||
neo4j + {
|
||||
|
||||
services +: {
|
||||
|
||||
neo4j: base + {
|
||||
image: images.neo4j,
|
||||
ports: [
|
||||
"7474:7474",
|
||||
"7687:7687",
|
||||
],
|
||||
environment: {
|
||||
NEO4J_AUTH: "neo4j/password",
|
||||
// NEO4J_server_bolt_listen__address: "0.0.0.0:7687",
|
||||
// NEO4J_server_default__listen__address: "0.0.0.0",
|
||||
// NEO4J_server_http_listen__address: "0.0.0.0:7474",
|
||||
},
|
||||
volumes: [
|
||||
"neo4j:/data"
|
||||
],
|
||||
},
|
||||
|
||||
"query-triples": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
|
|
|
|||
20
templates/components/stores/cassandra.jsonnet
Normal file
20
templates/components/stores/cassandra.jsonnet
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
local base = import "../base.jsonnet";
|
||||
local images = import "../images.jsonnet";
|
||||
{
|
||||
volumes +: {
|
||||
cassandra: {},
|
||||
},
|
||||
services +: {
|
||||
|
||||
cassandra: base + {
|
||||
image: images.cassandra,
|
||||
ports: [
|
||||
"9042:9042"
|
||||
],
|
||||
volumes: [
|
||||
"cassandra:/var/lib/cassandra"
|
||||
],
|
||||
},
|
||||
|
||||
},
|
||||
}
|
||||
79
templates/components/stores/milvus.jsonnet
Normal file
79
templates/components/stores/milvus.jsonnet
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
local base = import "../base.jsonnet";
|
||||
local images = import "../images.jsonnet";
|
||||
|
||||
{
|
||||
|
||||
volumes +: {
|
||||
etcd: {},
|
||||
"minio-data": {},
|
||||
milvus: {},
|
||||
},
|
||||
|
||||
services +: {
|
||||
|
||||
etcd: base + {
|
||||
image: images.etcd,
|
||||
command: [
|
||||
"etcd",
|
||||
"-advertise-client-urls=http://127.0.0.1:2379",
|
||||
"-listen-client-urls",
|
||||
"http://0.0.0.0:2379",
|
||||
"--data-dir",
|
||||
"/etcd",
|
||||
],
|
||||
environment: {
|
||||
ETCD_AUTO_COMPACTION_MODE: "revision",
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000",
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296",
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
},
|
||||
ports: [
|
||||
"2379:2379",
|
||||
],
|
||||
volumes: [
|
||||
"etcd:/etcd"
|
||||
],
|
||||
},
|
||||
|
||||
minio: base + {
|
||||
image: images.minio,
|
||||
command: [
|
||||
"minio",
|
||||
"server",
|
||||
"/minio_data",
|
||||
"--console-address",
|
||||
":9001",
|
||||
],
|
||||
environment: {
|
||||
MINIO_ROOT_USER: "minioadmin",
|
||||
MINIO_ROOT_PASSWORD: "minioadmin",
|
||||
},
|
||||
ports: [
|
||||
"9001:9001",
|
||||
],
|
||||
volumes: [
|
||||
"minio-data:/minio_data",
|
||||
],
|
||||
},
|
||||
|
||||
milvus: base + {
|
||||
image: images.milvus,
|
||||
command: [
|
||||
"milvus", "run", "standalone"
|
||||
],
|
||||
environment: {
|
||||
ETCD_ENDPOINTS: "etcd:2379",
|
||||
MINIO_ADDRESS: "minio:9000",
|
||||
},
|
||||
ports: [
|
||||
"9091:9091",
|
||||
"19530:19530",
|
||||
],
|
||||
volumes: [
|
||||
"milvus:/var/lib/milvus"
|
||||
],
|
||||
},
|
||||
|
||||
},
|
||||
|
||||
}
|
||||
30
templates/components/stores/neo4j.jsonnet
Normal file
30
templates/components/stores/neo4j.jsonnet
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
local base = import "../base.jsonnet";
|
||||
local images = import "../images.jsonnet";
|
||||
{
|
||||
|
||||
volumes +: {
|
||||
neo4j: {},
|
||||
},
|
||||
|
||||
services +: {
|
||||
|
||||
neo4j: base + {
|
||||
image: images.neo4j,
|
||||
ports: [
|
||||
"7474:7474",
|
||||
"7687:7687",
|
||||
],
|
||||
environment: {
|
||||
NEO4J_AUTH: "neo4j/password",
|
||||
// NEO4J_server_bolt_listen__address: "0.0.0.0:7687",
|
||||
// NEO4J_server_default__listen__address: "0.0.0.0",
|
||||
// NEO4J_server_http_listen__address: "0.0.0.0:7474",
|
||||
},
|
||||
volumes: [
|
||||
"neo4j:/data"
|
||||
],
|
||||
},
|
||||
|
||||
},
|
||||
|
||||
}
|
||||
|
|
@ -64,28 +64,6 @@ local url = import "url.jsonnet";
|
|||
],
|
||||
},
|
||||
|
||||
"store-graph-embeddings": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
"ge-write-milvus",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-t",
|
||||
url.milvus,
|
||||
],
|
||||
},
|
||||
|
||||
"query-graph-embeddings": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
"ge-query-milvus",
|
||||
"-p",
|
||||
url.pulsar,
|
||||
"-t",
|
||||
url.milvus,
|
||||
],
|
||||
},
|
||||
|
||||
"graph-rag": base + {
|
||||
image: images.trustgraph,
|
||||
command: [
|
||||
|
|
|
|||
10
templates/docker-compose-storage.jsonnet
Normal file
10
templates/docker-compose-storage.jsonnet
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
local cassandra = import "components/stores/cassandra.jsonnet";
|
||||
local pulsar = import "components/pulsar.jsonnet";
|
||||
local milvus = import "components/stores/milvus.jsonnet";
|
||||
local grafana = import "components/grafana.jsonnet";
|
||||
|
||||
local config = cassandra + pulsar + milvus + grafana;
|
||||
|
||||
std.manifestYamlDoc(config)
|
||||
|
||||
100
tests/report-chunk-sizes
Executable file
100
tests/report-chunk-sizes
Executable file
|
|
@ -0,0 +1,100 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Milvus store.
|
||||
"""
|
||||
|
||||
from trustgraph.schema import Chunk
|
||||
from trustgraph.schema import chunk_ingest_queue
|
||||
from trustgraph.log_level import LogLevel
|
||||
from trustgraph.base import Consumer
|
||||
from threading import Thread, Lock
|
||||
import time
|
||||
|
||||
module = "test-chunk-size"
|
||||
|
||||
default_input_queue = chunk_ingest_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
width = params.get("width", 200)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Chunk,
|
||||
}
|
||||
)
|
||||
|
||||
self.sizes = {}
|
||||
self.width = width
|
||||
self.lock = Lock()
|
||||
|
||||
Thread(target=self.report).start()
|
||||
|
||||
def report(self):
|
||||
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
print()
|
||||
|
||||
with self.lock:
|
||||
tot = 0
|
||||
for i in range(0, 20000, self.width):
|
||||
k = (i, i + self.width)
|
||||
if k in self.sizes:
|
||||
print(f"{i:5d} ..{i+self.width:5d}: {self.sizes[k]}")
|
||||
tot += self.sizes[k]
|
||||
print(f"{'Total':13s}: {tot}")
|
||||
|
||||
|
||||
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
l = len(chunk)
|
||||
|
||||
|
||||
low = int(l / self.width) * self.width
|
||||
high = low + self.width
|
||||
key = (low, high)
|
||||
|
||||
with self.lock:
|
||||
|
||||
if key not in self.sizes:
|
||||
self.sizes[key] = 0
|
||||
|
||||
self.sizes[key] += 1
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--width',
|
||||
type=int,
|
||||
default=200,
|
||||
help=f'Histogram width (default: 200)',
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
run()
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue