fix: comprehensive QA — resolve 13 bugs, add UX improvements across all services

Client SDK: add .catch() to graphRagStreaming/documentRagStreaming (silent timeout),
null-guard JSON.parse in getPrompts/getSystemPrompt/getPrompt.

Backend: implement "getvalues" config operation for token costs, null-check
createTerm() in FalkorDB triples query, add knowledge-cores service entrypoint
and Docker entry, return proper HTTP 400/404 for gateway error responses.

Workbench: cancel button + elapsed timer for chat, clear agent spinner on error,
flow dialog inline validation, responsive header wrapping, knowledge cores
loading timeout, sidebar/page naming consistency, theme toggle indicator.

Infrastructure: enable Grafana Explore for viewers, add gateway Prometheus
scrape target, fix RAG pipeline dashboard layout (6 panels visible),
filter Service Health to configured targets only.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-07 05:20:10 -05:00
parent 72870a7e2e
commit 3a80872482
22 changed files with 202 additions and 54 deletions

View file

@ -66,7 +66,7 @@ services:
networks:
- trustgraph
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:6333/healthz"]
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/6333'"]
interval: 10s
timeout: 5s
retries: 3
@ -179,6 +179,7 @@ services:
- GF_AUTH_DISABLE_LOGIN_FORM=false
- GF_USERS_DEFAULT_THEME=dark
- GF_EXPLORE_ENABLED=true
- GF_USERS_VIEWERS_CAN_EDIT=true
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoServiceGraph
depends_on:
prometheus:
@ -323,6 +324,18 @@ services:
- trustgraph
restart: unless-stopped
knowledge-cores:
image: trustgraph-ts:local
command: ["node", "entrypoints/cores.mjs"]
environment:
- NATS_URL=nats://nats:4222
depends_on:
nats:
condition: service_healthy
networks:
- trustgraph
restart: unless-stopped
# ---------------------------------------------------------------------------
# Document Processing Pipeline
# ---------------------------------------------------------------------------

View file

@ -32,7 +32,7 @@
"id": 1,
"targets": [
{
"expr": "up",
"expr": "up{job=~\"prometheus|otel-collector|gateway\"}",
"legendFormat": "{{job}}",
"refId": "A"
}

View file

@ -28,7 +28,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"id": 1,
"targets": [
{
@ -92,7 +92,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"id": 2,
"targets": [
{
@ -151,7 +151,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"id": 3,
"targets": [
{
@ -210,7 +210,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"id": 4,
"targets": [
{
@ -269,7 +269,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"id": 5,
"targets": [
{
@ -328,7 +328,7 @@
"type": "prometheus",
"uid": "tg-prometheus"
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"id": 6,
"targets": [
{

View file

@ -13,13 +13,10 @@ scrape_configs:
- targets:
- "prometheus:9090"
# NATS monitoring
- job_name: "nats"
scrape_interval: 15s
metrics_path: "/varz"
static_configs:
- targets:
- "nats:8222"
# NATS monitoring (uses nats-prometheus-exporter format)
# NATS exposes JSON at /varz, not Prometheus format.
# To get proper Prometheus metrics, deploy nats-exporter sidecar.
# For now, we rely on NATS healthcheck and JetStream monitoring via /jsz.
# OpenTelemetry Collector (exposes Prometheus metrics from OTLP pipeline)
- job_name: "otel-collector"
@ -28,9 +25,10 @@ scrape_configs:
- targets:
- "otel-collector:8889"
# TrustGraph gateway (enabled when gateway container is running)
# TrustGraph gateway metrics (prom-client)
- job_name: "gateway"
scrape_interval: 5s
scrape_interval: 15s
metrics_path: "/api/v1/metrics"
static_configs:
- targets:
- "gateway:8000"
- "gateway:8088"