Better proc group logging and concurrency (#810)

- Silence pika, cassandra etc. logging at INFO (too much chatter) 
- Add per processor log tags so that logs can be understood in
  processor group.
- Deal with RabbitMQ lag weirdness
- Added more processor group examples
This commit is contained in:
cybermaggedon 2026-04-15 14:52:01 +01:00 committed by GitHub
parent ce3c8b421b
commit 2bf4af294e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1021 additions and 647 deletions

View file

@ -0,0 +1,52 @@
# Ingest pipeline. Document-processing hot path. Bursty, correlated
# failures — if chunker dies the extractors have nothing to do anyway.
_defaults: &defaults
pubsub_backend: rabbitmq
rabbitmq_host: localhost
log_level: INFO
processors:
- class: trustgraph.chunking.recursive.Processor
params:
<<: *defaults
id: chunker
chunk_size: 2000
chunk_overlap: 50
- class: trustgraph.extract.kg.agent.Processor
params:
<<: *defaults
id: kg-extract-agent
concurrency: 1
- class: trustgraph.extract.kg.definitions.Processor
params:
<<: *defaults
id: kg-extract-definitions
concurrency: 1
- class: trustgraph.extract.kg.ontology.Processor
params:
<<: *defaults
id: kg-extract-ontology
concurrency: 1
- class: trustgraph.extract.kg.relationships.Processor
params:
<<: *defaults
id: kg-extract-relationships
concurrency: 1
- class: trustgraph.extract.kg.rows.Processor
params:
<<: *defaults
id: kg-extract-rows
concurrency: 1
- class: trustgraph.prompt.template.Processor
params:
<<: *defaults
id: prompt
concurrency: 1