This commit is contained in:
elpresidank 2026-04-05 22:44:45 -05:00
parent c386f68743
commit b6536eca38
100 changed files with 17680 additions and 377 deletions

10
ts/deploy/.env.example Normal file
View file

@ -0,0 +1,10 @@
# LLM API Keys
OPENAI_TOKEN=
CLAUDE_KEY=
# Gateway
GATEWAY_SECRET=
GATEWAY_PORT=8088
# Grafana
GF_SECURITY_ADMIN_PASSWORD=admin

View file

@ -0,0 +1,52 @@
# TrustGraph TypeScript — Dev Overrides
# Usage: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
services:
# Live-edit dashboards without rebuilding
grafana:
volumes:
- ./grafana/provisioning/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
- ./grafana/provisioning/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_AUTH_DISABLE_LOGIN_FORM=true
- GF_USERS_DEFAULT_THEME=dark
- GF_EXPLORE_ENABLED=true
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoServiceGraph
# Prometheus config live reload
prometheus:
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
# Loki config live reload
loki:
volumes:
- ./loki/loki-config.yml:/etc/loki/local-config.yaml
- loki-data:/tmp/loki
# NATS CLI tools for debugging
nats-cli:
image: natsio/nats-box:latest
networks:
- trustgraph
environment:
- NATS_URL=nats://nats:4222
entrypoint: ["/bin/sh", "-c", "echo 'NATS Box ready. Use: docker compose exec nats-cli nats ...' && sleep infinity"]
depends_on:
nats:
condition: service_healthy
profiles:
- debug
volumes:
prometheus-data:
loki-data:
networks:
trustgraph:
driver: bridge

View file

@ -0,0 +1,276 @@
# TrustGraph TypeScript — Full Stack
# Usage: docker compose up -d
# Observability UI: http://localhost:3000 (Grafana)
networks:
trustgraph:
driver: bridge
volumes:
nats-data:
falkordb-data:
qdrant-data:
ollama-models:
prometheus-data:
loki-data:
tempo-data:
grafana-data:
services:
# ---------------------------------------------------------------------------
# Infrastructure
# ---------------------------------------------------------------------------
nats:
image: nats:2.10-alpine
command: ["--jetstream", "--http_port", "8222", "--store_dir", "/data"]
ports:
- "4222:4222" # Client connections
- "8222:8222" # Monitoring / metrics
volumes:
- nats-data:/data
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8222/healthz"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
restart: unless-stopped
falkordb:
image: falkordb/falkordb:latest
ports:
- "6379:6379"
volumes:
- falkordb-data:/data
networks:
- trustgraph
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
restart: unless-stopped
qdrant:
image: qdrant/qdrant:latest
ports:
- "6333:6333" # REST API
- "6334:6334" # gRPC
volumes:
- qdrant-data:/qdrant/storage
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:6333/healthz"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
restart: unless-stopped
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama-models:/root/.ollama
networks:
- trustgraph
restart: unless-stopped
# ---------------------------------------------------------------------------
# Observability
# ---------------------------------------------------------------------------
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=7d"
- "--web.enable-remote-write-receiver"
- "--enable-feature=exemplar-storage"
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"]
interval: 10s
timeout: 5s
retries: 3
start_period: 10s
restart: unless-stopped
loki:
image: grafana/loki:3.0.0
ports:
- "3100:3100"
volumes:
- ./loki/loki-config.yml:/etc/loki/local-config.yaml:ro
- loki-data:/tmp/loki
command: ["-config.file=/etc/loki/local-config.yaml"]
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
interval: 10s
timeout: 5s
retries: 5
start_period: 15s
restart: unless-stopped
tempo:
image: grafana/tempo:latest
ports:
- "3200:3200" # Tempo API
volumes:
- ./tempo/tempo-config.yml:/etc/tempo/config.yml:ro
- tempo-data:/tmp/tempo
command: ["-config.file=/etc/tempo/config.yml"]
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3200/ready"]
interval: 10s
timeout: 5s
retries: 5
start_period: 15s
restart: unless-stopped
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
ports:
- "4317:4317" # OTLP gRPC (apps send traces/metrics here)
- "4318:4318" # OTLP HTTP
- "8889:8889" # Prometheus exporter (scraped by Prometheus)
volumes:
- ./otel-collector/config.yml:/etc/otelcol-contrib/config.yaml:ro
depends_on:
tempo:
condition: service_healthy
networks:
- trustgraph
restart: unless-stopped
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
volumes:
- ./grafana/provisioning/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
- ./grafana/provisioning/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:-admin}
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
- GF_AUTH_DISABLE_LOGIN_FORM=false
- GF_USERS_DEFAULT_THEME=dark
- GF_EXPLORE_ENABLED=true
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoServiceGraph
depends_on:
prometheus:
condition: service_healthy
loki:
condition: service_healthy
tempo:
condition: service_healthy
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"]
interval: 10s
timeout: 5s
retries: 3
start_period: 15s
restart: unless-stopped
# ---------------------------------------------------------------------------
# TrustGraph Services (placeholders — will be filled in later)
# ---------------------------------------------------------------------------
#
# gateway:
# build:
# context: ../
# dockerfile: packages/base/Dockerfile
# target: gateway
# ports:
# - "${GATEWAY_PORT:-8088}:8000"
# environment:
# - NATS_URL=nats://nats:4222
# - FALKORDB_URL=redis://falkordb:6379
# - QDRANT_URL=http://qdrant:6333
# - OPENAI_TOKEN=${OPENAI_TOKEN}
# - CLAUDE_KEY=${CLAUDE_KEY}
# - GATEWAY_SECRET=${GATEWAY_SECRET}
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
# - OTEL_SERVICE_NAME=gateway
# depends_on:
# nats:
# condition: service_healthy
# falkordb:
# condition: service_healthy
# qdrant:
# condition: service_healthy
# networks:
# - trustgraph
#
# text-completion:
# build:
# context: ../
# dockerfile: packages/base/Dockerfile
# target: text-completion
# environment:
# - NATS_URL=nats://nats:4222
# - OPENAI_TOKEN=${OPENAI_TOKEN}
# - CLAUDE_KEY=${CLAUDE_KEY}
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
# - OTEL_SERVICE_NAME=text-completion
# depends_on:
# nats:
# condition: service_healthy
# networks:
# - trustgraph
#
# graph-rag:
# build:
# context: ../
# dockerfile: packages/base/Dockerfile
# target: graph-rag
# environment:
# - NATS_URL=nats://nats:4222
# - FALKORDB_URL=redis://falkordb:6379
# - QDRANT_URL=http://qdrant:6333
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
# - OTEL_SERVICE_NAME=graph-rag
# depends_on:
# nats:
# condition: service_healthy
# falkordb:
# condition: service_healthy
# qdrant:
# condition: service_healthy
# networks:
# - trustgraph
#
# workbench:
# build:
# context: ../
# dockerfile: packages/workbench/Dockerfile
# ports:
# - "3001:3000"
# environment:
# - GATEWAY_URL=http://gateway:8000
# depends_on:
# - gateway
# networks:
# - trustgraph

View file

@ -0,0 +1,317 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"title": "LLM Request Latency by Provider",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"id": 1,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\".*text-completion.*\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\".*text-completion.*\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\".*text-completion.*\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 5,
"gradientMode": "scheme",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 5 },
{ "color": "red", "value": 30 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Token Usage (Input vs Output)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"id": 2,
"targets": [
{
"expr": "sum(rate(tg_llm_input_tokens_total[5m])) by (job)",
"legendFormat": "{{job}} input tokens/s",
"refId": "A"
},
{
"expr": "sum(rate(tg_llm_output_tokens_total[5m])) by (job)",
"legendFormat": "{{job}} output tokens/s",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "tokens/s",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "normal" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": [
{
"matcher": { "id": "byRegexp", "options": ".*input.*" },
"properties": [
{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }
]
},
{
"matcher": { "id": "byRegexp", "options": ".*output.*" },
"properties": [
{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }
]
}
]
},
"options": {
"legend": { "calcs": ["mean", "sum"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Rate Limit Events",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"id": 3,
"targets": [
{
"expr": "sum(rate(tg_consumer_rate_limit_total[5m])) by (job)",
"legendFormat": "{{job}} rate limits/s",
"refId": "A"
},
{
"expr": "sum(increase(tg_consumer_rate_limit_total[1h])) by (job)",
"legendFormat": "{{job}} total (1h)",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "bars",
"fillOpacity": 50,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 1 }
]
}
},
"overrides": [
{
"matcher": { "id": "byRegexp", "options": ".*total.*" },
"properties": [
{ "id": "custom.drawStyle", "value": "line" },
{ "id": "custom.axisPlacement", "value": "right" },
{ "id": "custom.fillOpacity", "value": 0 },
{ "id": "custom.lineWidth", "value": 2 }
]
}
]
},
"options": {
"legend": { "calcs": ["sum", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Streaming Chunk Latency",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"id": 4,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_llm_stream_chunk_duration_seconds_bucket[5m])) by (le, job))",
"legendFormat": "{{job}} chunk p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_llm_stream_chunk_duration_seconds_bucket[5m])) by (le, job))",
"legendFormat": "{{job}} chunk p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.50, sum(rate(tg_llm_time_to_first_token_seconds_bucket[5m])) by (le, job))",
"legendFormat": "{{job}} TTFT p50",
"refId": "C"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_llm_time_to_first_token_seconds_bucket[5m])) by (le, job))",
"legendFormat": "{{job}} TTFT p95",
"refId": "D"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 5,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 2 }
]
}
},
"overrides": [
{
"matcher": { "id": "byRegexp", "options": ".*TTFT.*" },
"properties": [
{ "id": "custom.lineStyle", "value": { "fill": "dash", "dash": [10, 10] } }
]
}
]
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
}
],
"schemaVersion": 39,
"tags": ["trustgraph", "llm"],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "TrustGraph - LLM Performance",
"uid": "tg-llm-metrics",
"version": 1
}

View file

@ -0,0 +1,275 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"title": "Service Health",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 0 },
"id": 1,
"targets": [
{
"expr": "up",
"legendFormat": "{{job}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"mappings": [
{ "type": "value", "options": { "0": { "text": "DOWN", "color": "red" } } },
{ "type": "value", "options": { "1": { "text": "UP", "color": "green" } } }
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
},
"color": { "mode": "thresholds" }
},
"overrides": []
},
"options": {
"reduceOptions": {
"values": false,
"calcs": ["lastNotNull"],
"fields": ""
},
"orientation": "auto",
"textMode": "auto",
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto"
}
},
{
"title": "NATS Message Throughput",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 2,
"targets": [
{
"expr": "rate(tg_producer_items_total[5m])",
"legendFormat": "{{job}} produced",
"refId": "A"
},
{
"expr": "rate(tg_consumer_processing_total[5m])",
"legendFormat": "{{job}} consumed ({{status}})",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "msg/s",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Request Latency (p50 / p95 / p99)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 3,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(tg_consumer_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 5,
"gradientMode": "scheme",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Error Rate",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 },
"id": 4,
"targets": [
{
"expr": "sum(rate(tg_consumer_processing_total{status=\"error\"}[5m])) by (job)",
"legendFormat": "{{job}} errors/s",
"refId": "A"
},
{
"expr": "sum(rate(tg_consumer_processing_total{status=\"error\"}[5m])) / sum(rate(tg_consumer_processing_total[5m]))",
"legendFormat": "overall error ratio",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line+area" }
},
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "transparent", "value": null },
{ "color": "red", "value": 0.05 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "overall error ratio" },
"properties": [
{ "id": "unit", "value": "percentunit" },
{ "id": "custom.axisPlacement", "value": "right" },
{ "id": "custom.drawStyle", "value": "line" },
{ "id": "custom.lineWidth", "value": 3 },
{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }
]
}
]
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
}
],
"schemaVersion": 39,
"tags": ["trustgraph", "overview"],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "TrustGraph - Service Health",
"uid": "tg-overview",
"version": 1
}

View file

@ -0,0 +1,404 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"title": "End-to-End RAG Query Latency",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"id": 1,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"graph-rag|document-rag\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"graph-rag|document-rag\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"graph-rag|document-rag\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "scheme",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "line" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 5 },
{ "color": "red", "value": 15 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max", "last"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Concept Extraction Time",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"id": 2,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"kg-extract.*\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"kg-extract.*\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 3 },
{ "color": "red", "value": 10 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Embedding Generation Time",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"id": 3,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"embeddings|document-embeddings|graph-embeddings\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"embeddings|document-embeddings|graph-embeddings\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Graph Traversal Time",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"id": 4,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"query-triples|query-graph-embeddings|query-doc-embeddings\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"query-triples|query-graph-embeddings|query-doc-embeddings\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 2 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "Synthesis Time (Text Completion / RAG)",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"id": 5,
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"text-completion|text-completion-rag|prompt-rag\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(tg_consumer_request_duration_seconds_bucket{job=~\"text-completion|text-completion-rag|prompt-rag\"}[5m])) by (le, job))",
"legendFormat": "{{job}} p95",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "latency",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 5 },
{ "color": "red", "value": 20 }
]
}
},
"overrides": []
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
},
{
"title": "RAG Pipeline Throughput",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 },
"id": 6,
"targets": [
{
"expr": "sum(rate(tg_consumer_processing_total{job=~\"graph-rag|document-rag\", status=\"success\"}[5m])) by (job)",
"legendFormat": "{{job}} success/s",
"refId": "A"
},
{
"expr": "sum(rate(tg_consumer_processing_total{job=~\"graph-rag|document-rag\", status=\"error\"}[5m])) by (job)",
"legendFormat": "{{job}} errors/s",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "queries/s",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "none",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "off" }
},
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": [
{
"matcher": { "id": "byRegexp", "options": ".*errors.*" },
"properties": [
{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } },
{ "id": "custom.fillOpacity", "value": 30 }
]
}
]
},
"options": {
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true },
"tooltip": { "mode": "multi", "sort": "desc" }
}
}
],
"schemaVersion": 39,
"tags": ["trustgraph", "rag", "pipeline"],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "TrustGraph - RAG Pipeline",
"uid": "tg-rag-pipeline",
"version": 1
}

View file

@ -0,0 +1,14 @@
apiVersion: 1
providers:
- name: "TrustGraph"
orgId: 1
folder: "TrustGraph"
folderUid: "trustgraph-dashboards"
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false

View file

@ -0,0 +1,49 @@
apiVersion: 1
prune: true
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
uid: "tg-prometheus"
url: http://prometheus:9090
basicAuth: false
isDefault: true
editable: true
- name: Loki
type: loki
access: proxy
orgId: 1
uid: "tg-loki"
url: http://loki:3100
basicAuth: false
editable: true
- name: Tempo
type: tempo
access: proxy
orgId: 1
uid: "tg-tempo"
url: http://tempo:3200
basicAuth: false
editable: true
jsonData:
tracesToLogsV2:
datasourceUid: "tg-loki"
spanStartTimeShift: "-1h"
spanEndTimeShift: "1h"
filterByTraceID: true
filterBySpanID: false
tracesToMetrics:
datasourceUid: "tg-prometheus"
serviceMap:
datasourceUid: "tg-prometheus"
nodeGraph:
enabled: true
search:
hide: false
lokiSearch:
datasourceUid: "tg-loki"

View file

@ -0,0 +1,52 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
log_level: warn
common:
instance_addr: 127.0.0.1
path_prefix: /tmp/loki
storage:
filesystem:
chunks_directory: /tmp/loki/chunks
rules_directory: /tmp/loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
limits_config:
metric_aggregation_enabled: true
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
pattern_ingester:
enabled: true
metric_aggregation:
loki_address: localhost:3100
ruler:
alertmanager_url: http://localhost:9093
frontend:
encoding: protobuf
analytics:
reporting_enabled: false

View file

@ -0,0 +1,41 @@
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"
http:
endpoint: "0.0.0.0:4318"
processors:
batch:
timeout: 5s
send_batch_size: 1024
exporters:
otlp/tempo:
endpoint: "tempo:4317"
tls:
insecure: true
prometheus:
endpoint: "0.0.0.0:8889"
namespace: "tg"
resource_to_telemetry_conversion:
enabled: true
debug:
verbosity: basic
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp/tempo]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]
telemetry:
logs:
level: warn

View file

@ -0,0 +1,36 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: "trustgraph-ts"
scrape_configs:
# Prometheus self-monitoring
- job_name: "prometheus"
scrape_interval: 15s
static_configs:
- targets:
- "prometheus:9090"
# NATS monitoring
- job_name: "nats"
scrape_interval: 15s
metrics_path: "/varz"
static_configs:
- targets:
- "nats:8222"
# OpenTelemetry Collector (exposes Prometheus metrics from OTLP pipeline)
- job_name: "otel-collector"
scrape_interval: 15s
static_configs:
- targets:
- "otel-collector:8889"
# TrustGraph gateway (enabled when gateway container is running)
- job_name: "gateway"
scrape_interval: 5s
static_configs:
- targets:
- "gateway:8000"

View file

@ -0,0 +1,49 @@
server:
http_listen_port: 3200
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"
http:
endpoint: "0.0.0.0:4318"
ingester:
max_block_duration: 5m
compactor:
compaction:
block_retention: 48h
metrics_generator:
registry:
external_labels:
source: tempo
cluster: trustgraph-dev
storage:
path: /tmp/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
storage:
trace:
backend: local
wal:
path: /tmp/tempo/wal
local:
path: /tmp/tempo/blocks
overrides:
defaults:
metrics_generator:
processors:
- service-graphs
- span-metrics
search_enabled: true
analytics:
reporting_enabled: false