fix: NATS pipeline bugs, add integration tests and service runners

Fix three critical bugs preventing the NATS message pipeline from working:

- FlowProcessor now subscribes to config-push topic (was missing entirely),
  using DeliverPolicy.All to replay config on service restart
- NATS streams use wildcard subjects (tg.flow.>) instead of per-topic
  narrow filters that caused 503 errors on publish
- Subscriber dispatch loop has exponential backoff on errors to prevent
  tight error loops

Add service runner scripts (gateway, config, LLM) and a 7-test
integration suite that verifies config CRUD, WebSocket round-trip,
and full LLM text-completion through the NATS pipeline.

Fix Docker Compose infra: pin Tempo to v2.6.1, remove deprecated Loki
config fields, add user:0 for volume permissions, remap conflicting
ports (FalkorDB 6380, OTLP 4327/4328).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-05 23:41:39 -05:00
parent 0042f9259c
commit 28747e1a92
15 changed files with 826 additions and 107 deletions

View file

@ -42,7 +42,7 @@ services:
falkordb:
image: falkordb/falkordb:latest
ports:
- "6379:6379"
- "6380:6379"
volumes:
- falkordb-data:/data
networks:
@ -111,6 +111,7 @@ services:
loki:
image: grafana/loki:3.0.0
user: "0"
ports:
- "3100:3100"
volumes:
@ -128,12 +129,13 @@ services:
restart: unless-stopped
tempo:
image: grafana/tempo:latest
image: grafana/tempo:2.6.1
user: "0"
ports:
- "3200:3200" # Tempo API
volumes:
- ./tempo/tempo-config.yml:/etc/tempo/config.yml:ro
- tempo-data:/tmp/tempo
- tempo-data:/var/tempo
command: ["-config.file=/etc/tempo/config.yml"]
networks:
- trustgraph
@ -148,8 +150,8 @@ services:
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
ports:
- "4317:4317" # OTLP gRPC (apps send traces/metrics here)
- "4318:4318" # OTLP HTTP
- "4327:4317" # OTLP gRPC (apps send traces/metrics here)
- "4328:4318" # OTLP HTTP
- "8889:8889" # Prometheus exporter (scraped by Prometheus)
volumes:
- ./otel-collector/config.yml:/etc/otelcol-contrib/config.yaml:ro

View file

@ -24,9 +24,6 @@ query_range:
enabled: true
max_size_mb: 100
limits_config:
metric_aggregation_enabled: true
schema_config:
configs:
- from: 2024-01-01
@ -37,16 +34,8 @@ schema_config:
prefix: index_
period: 24h
pattern_ingester:
enabled: true
metric_aggregation:
loki_address: localhost:3100
ruler:
alertmanager_url: http://localhost:9093
frontend:
encoding: protobuf
analytics:
reporting_enabled: false

View file

@ -17,33 +17,10 @@ compactor:
compaction:
block_retention: 48h
metrics_generator:
registry:
external_labels:
source: tempo
cluster: trustgraph-dev
storage:
path: /tmp/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
storage:
trace:
backend: local
wal:
path: /tmp/tempo/wal
path: /var/tempo/wal
local:
path: /tmp/tempo/blocks
overrides:
defaults:
metrics_generator:
processors:
- service-graphs
- span-metrics
search_enabled: true
analytics:
reporting_enabled: false
path: /var/tempo/blocks