Better proc group logging and concurrency (#810)

- Silence pika, cassandra etc. logging at INFO (too much chatter) 
- Add per processor log tags so that logs can be understood in
  processor group.
- Deal with RabbitMQ lag weirdness
- Added more processor group examples
This commit is contained in:
cybermaggedon 2026-04-15 14:52:01 +01:00 committed by GitHub
parent ce3c8b421b
commit 2bf4af294e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1021 additions and 647 deletions

View file

@ -0,0 +1,47 @@
# Control plane. Stateful "always on" services that every flow depends on.
# Cassandra-heavy, low traffic.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.config.service.Processor
params:
<<: *defaults
id: config-svc
cassandra_host: localhost
- class: trustgraph.librarian.Processor
params:
<<: *defaults
id: librarian
cassandra_host: localhost
object_store_endpoint: localhost:3900
object_store_access_key: GK000000000000000000000001
object_store_secret_key: b171f00be9be4c32c734f4c05fe64c527a8ab5eb823b376cfa8c2531f70fc427
object_store_region: garage
- class: trustgraph.cores.service.Processor
params:
<<: *defaults
id: knowledge
cassandra_host: localhost
- class: trustgraph.storage.knowledge.store.Processor
params:
<<: *defaults
id: kg-store
cassandra_host: localhost
- class: trustgraph.metering.Processor
params:
<<: *defaults
id: metering
- class: trustgraph.metering.Processor
params:
<<: *defaults
id: metering-rag

View file

@ -0,0 +1,45 @@
# Embeddings store. All Qdrant-backed vector query/write processors.
# One process owns the Qdrant driver pool for the whole group.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.query.doc_embeddings.qdrant.Processor
params:
<<: *defaults
id: doc-embeddings-query
store_uri: http://localhost:6333
- class: trustgraph.storage.doc_embeddings.qdrant.Processor
params:
<<: *defaults
id: doc-embeddings-write
store_uri: http://localhost:6333
- class: trustgraph.query.graph_embeddings.qdrant.Processor
params:
<<: *defaults
id: graph-embeddings-query
store_uri: http://localhost:6333
- class: trustgraph.storage.graph_embeddings.qdrant.Processor
params:
<<: *defaults
id: graph-embeddings-write
store_uri: http://localhost:6333
- class: trustgraph.query.row_embeddings.qdrant.Processor
params:
<<: *defaults
id: row-embeddings-query
store_uri: http://localhost:6333
- class: trustgraph.storage.row_embeddings.qdrant.Processor
params:
<<: *defaults
id: row-embeddings-write
store_uri: http://localhost:6333

View file

@ -0,0 +1,31 @@
# Embeddings. Memory-hungry — fastembed loads an ML model at startup.
# Keep isolated from other groups so its memory footprint and restart
# latency don't affect siblings.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.embeddings.fastembed.Processor
params:
<<: *defaults
id: embeddings
concurrency: 1
- class: trustgraph.embeddings.document_embeddings.Processor
params:
<<: *defaults
id: document-embeddings
- class: trustgraph.embeddings.graph_embeddings.Processor
params:
<<: *defaults
id: graph-embeddings
- class: trustgraph.embeddings.row_embeddings.Processor
params:
<<: *defaults
id: row-embeddings

View file

@ -0,0 +1,52 @@
# Ingest pipeline. Document-processing hot path. Bursty, correlated
# failures — if chunker dies the extractors have nothing to do anyway.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.chunking.recursive.Processor
params:
<<: *defaults
id: chunker
chunk_size: 2000
chunk_overlap: 50
- class: trustgraph.extract.kg.agent.Processor
params:
<<: *defaults
id: kg-extract-agent
concurrency: 1
- class: trustgraph.extract.kg.definitions.Processor
params:
<<: *defaults
id: kg-extract-definitions
concurrency: 1
- class: trustgraph.extract.kg.ontology.Processor
params:
<<: *defaults
id: kg-extract-ontology
concurrency: 1
- class: trustgraph.extract.kg.relationships.Processor
params:
<<: *defaults
id: kg-extract-relationships
concurrency: 1
- class: trustgraph.extract.kg.rows.Processor
params:
<<: *defaults
id: kg-extract-rows
concurrency: 1
- class: trustgraph.prompt.template.Processor
params:
<<: *defaults
id: prompt
concurrency: 1

View file

@ -0,0 +1,24 @@
# LLM. Outbound text-completion calls. Isolated because the upstream
# LLM API is often the bottleneck and the most likely thing to need
# restart (provider changes, model changes, API flakiness).
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.model.text_completion.openai.Processor
params:
<<: *defaults
id: text-completion
max_output: 8192
temperature: 0.0
- class: trustgraph.model.text_completion.openai.Processor
params:
<<: *defaults
id: text-completion-rag
max_output: 8192
temperature: 0.0

View file

@ -0,0 +1,64 @@
# RAG / retrieval / agent. Query-time serving path. Drives outbound
# LLM calls via prompt-rag. sparql-query lives here because it's a
# read-side serving endpoint, not a backend writer.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.agent.orchestrator.Processor
params:
<<: *defaults
id: agent-manager
- class: trustgraph.retrieval.graph_rag.Processor
params:
<<: *defaults
id: graph-rag
concurrency: 1
entity_limit: 50
triple_limit: 30
edge_limit: 30
edge_score_limit: 10
max_subgraph_size: 100
max_path_length: 2
- class: trustgraph.retrieval.document_rag.Processor
params:
<<: *defaults
id: document-rag
doc_limit: 20
- class: trustgraph.retrieval.nlp_query.Processor
params:
<<: *defaults
id: nlp-query
- class: trustgraph.retrieval.structured_query.Processor
params:
<<: *defaults
id: structured-query
- class: trustgraph.retrieval.structured_diag.Processor
params:
<<: *defaults
id: structured-diag
- class: trustgraph.query.sparql.Processor
params:
<<: *defaults
id: sparql-query
- class: trustgraph.prompt.template.Processor
params:
<<: *defaults
id: prompt-rag
concurrency: 1
- class: trustgraph.agent.mcp_tool.Service
params:
<<: *defaults
id: mcp-tool

View file

@ -0,0 +1,20 @@
# Rows store. Cassandra-backed structured row query/write.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.query.rows.cassandra.Processor
params:
<<: *defaults
id: rows-query
cassandra_host: localhost
- class: trustgraph.storage.rows.cassandra.Processor
params:
<<: *defaults
id: rows-write
cassandra_host: localhost

View file

@ -0,0 +1,20 @@
# Triples store. Cassandra-backed RDF triple query/write.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.query.triples.cassandra.Processor
params:
<<: *defaults
id: triples-query
cassandra_host: localhost
- class: trustgraph.storage.triples.cassandra.Processor
params:
<<: *defaults
id: triples-write
cassandra_host: localhost