mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-02 20:03:19 +02:00
Processor group implementation: dev wrapper (#808)
Processor group implementation: A wrapper to launch multiple
processors in a single processor
- trustgraph-base/trustgraph/base/processor_group.py — group runner
module. run_group(config) is the async body; run() is the
endpoint. Loads JSON or YAML config, validates that every entry
has a unique params.id, instantiates each class via importlib,
shares one TaskGroup, mirrors AsyncProcessor.launch's retry loop
and Prometheus startup.
- trustgraph-base/pyproject.toml — added [project.scripts] block
with processor-group = "trustgraph.base.processor_group:run".
Key behaviours:
- Unique id enforced up front — missing or duplicate params.id fails
fast with a clear error, preventing the Prometheus Info label
collision we flagged.
- No registry — dotted class path is the identifier; any
AsyncProcessor descendant importable at runtime is packable.
- YAML import is lazy — only pulled in if the config file ends in
.yaml/.yml, so JSON-only users don't need PyYAML installed.
- Single Prometheus server — start_http_server runs once at
startup, before the retry loop, matching launch()'s pattern.
- Retry loop — same shape as AsyncProcessor.launch: catches
ExceptionGroup from TaskGroup, logs, sleeps 4s,
retries. Fail-group semantics (one processor dying tears down the
group) — simple and surfaces bugs, as discussed.
Example config:
processors:
- class: trustgraph.extract.kg.definitions.extract.Processor
params:
id: kg-extract-definitions
- class: trustgraph.chunking.recursive.Processor
params:
id: chunker-recursive
Run with processor-group -c group.yaml.
This commit is contained in:
parent
8954fa3ad7
commit
f11c0ad0cb
6 changed files with 580 additions and 11 deletions
|
|
@ -9,7 +9,7 @@ from aiohttp import web
|
|||
import logging
|
||||
import os
|
||||
|
||||
from trustgraph.base.logging import setup_logging
|
||||
from trustgraph.base.logging import setup_logging, add_logging_args
|
||||
from trustgraph.base.pubsub import get_pubsub, add_pubsub_args
|
||||
|
||||
from . auth import Authenticator
|
||||
|
|
@ -195,12 +195,7 @@ def run():
|
|||
help=f'Secret API token (default: no auth)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
default='INFO',
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
help=f'Log level (default: INFO)'
|
||||
)
|
||||
add_logging_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'--metrics',
|
||||
|
|
|
|||
|
|
@ -102,10 +102,10 @@ class Processor(FlowProcessor):
|
|||
__class__.cost_metric.labels(model=modelname, direction="input").inc(cost_in)
|
||||
__class__.cost_metric.labels(model=modelname, direction="output").inc(cost_out)
|
||||
|
||||
logger.info(f"Model: {modelname}")
|
||||
logger.info(f"Input Tokens: {num_in}")
|
||||
logger.info(f"Output Tokens: {num_out}")
|
||||
logger.info(f"Cost for call: ${cost_per_call}")
|
||||
logger.debug(
|
||||
f"Model: {modelname}, in={num_in}, out={num_out}, "
|
||||
f"cost=${cost_per_call}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue