mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-12 00:32:37 +02:00
Release/v2.3 -> master
This commit is contained in:
parent
59e269185d
commit
e8bc96ef7e
31 changed files with 1202 additions and 398 deletions
|
|
@ -61,6 +61,7 @@ api-gateway = "trustgraph.gateway:run"
|
|||
chunker-recursive = "trustgraph.chunking.recursive:run"
|
||||
chunker-token = "trustgraph.chunking.token:run"
|
||||
config-svc = "trustgraph.config.service:run"
|
||||
flow-svc = "trustgraph.flow.service:run"
|
||||
doc-embeddings-query-milvus = "trustgraph.query.doc_embeddings.milvus:run"
|
||||
doc-embeddings-query-pinecone = "trustgraph.query.doc_embeddings.pinecone:run"
|
||||
doc-embeddings-query-qdrant = "trustgraph.query.doc_embeddings.qdrant:run"
|
||||
|
|
|
|||
|
|
@ -11,14 +11,10 @@ from trustgraph.schema import ConfigRequest, ConfigResponse, ConfigPush
|
|||
from trustgraph.schema import config_request_queue, config_response_queue
|
||||
from trustgraph.schema import config_push_queue
|
||||
|
||||
from trustgraph.schema import FlowRequest, FlowResponse
|
||||
from trustgraph.schema import flow_request_queue, flow_response_queue
|
||||
|
||||
from trustgraph.base import AsyncProcessor, Consumer, Producer
|
||||
from trustgraph.base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
||||
|
||||
from . config import Configuration
|
||||
from . flow import FlowConfig
|
||||
|
||||
from ... base import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
||||
from ... base import Consumer, Producer
|
||||
|
|
@ -32,9 +28,6 @@ default_config_request_queue = config_request_queue
|
|||
default_config_response_queue = config_response_queue
|
||||
default_config_push_queue = config_push_queue
|
||||
|
||||
default_flow_request_queue = flow_request_queue
|
||||
default_flow_response_queue = flow_response_queue
|
||||
|
||||
default_cassandra_host = "cassandra"
|
||||
|
||||
class Processor(AsyncProcessor):
|
||||
|
|
@ -51,13 +44,6 @@ class Processor(AsyncProcessor):
|
|||
"config_push_queue", default_config_push_queue
|
||||
)
|
||||
|
||||
flow_request_queue = params.get(
|
||||
"flow_request_queue", default_flow_request_queue
|
||||
)
|
||||
flow_response_queue = params.get(
|
||||
"flow_response_queue", default_flow_response_queue
|
||||
)
|
||||
|
||||
cassandra_host = params.get("cassandra_host")
|
||||
cassandra_username = params.get("cassandra_username")
|
||||
cassandra_password = params.get("cassandra_password")
|
||||
|
|
@ -77,16 +63,11 @@ class Processor(AsyncProcessor):
|
|||
|
||||
id = params.get("id")
|
||||
|
||||
flow_request_schema = FlowRequest
|
||||
flow_response_schema = FlowResponse
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"config_request_schema": ConfigRequest.__name__,
|
||||
"config_response_schema": ConfigResponse.__name__,
|
||||
"config_push_schema": ConfigPush.__name__,
|
||||
"flow_request_schema": FlowRequest.__name__,
|
||||
"flow_response_schema": FlowResponse.__name__,
|
||||
"cassandra_host": self.cassandra_host,
|
||||
"cassandra_username": self.cassandra_username,
|
||||
"cassandra_password": self.cassandra_password,
|
||||
|
|
@ -103,12 +84,8 @@ class Processor(AsyncProcessor):
|
|||
processor = self.id, flow = None, name = "config-push"
|
||||
)
|
||||
|
||||
flow_request_metrics = ConsumerMetrics(
|
||||
processor = self.id, flow = None, name = "flow-request"
|
||||
)
|
||||
flow_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "flow-response"
|
||||
)
|
||||
self.config_request_topic = config_request_queue
|
||||
self.config_request_subscriber = id
|
||||
|
||||
self.config_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
|
|
@ -135,24 +112,6 @@ class Processor(AsyncProcessor):
|
|||
metrics = config_push_metrics,
|
||||
)
|
||||
|
||||
self.flow_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
backend = self.pubsub,
|
||||
flow = None,
|
||||
topic = flow_request_queue,
|
||||
subscriber = id,
|
||||
schema = FlowRequest,
|
||||
handler = self.on_flow_request,
|
||||
metrics = flow_request_metrics,
|
||||
)
|
||||
|
||||
self.flow_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = flow_response_queue,
|
||||
schema = FlowResponse,
|
||||
metrics = flow_response_metrics,
|
||||
)
|
||||
|
||||
self.config = Configuration(
|
||||
host = self.cassandra_host,
|
||||
username = self.cassandra_username,
|
||||
|
|
@ -161,15 +120,15 @@ class Processor(AsyncProcessor):
|
|||
push = self.push
|
||||
)
|
||||
|
||||
self.flow = FlowConfig(self.config)
|
||||
|
||||
logger.info("Config service initialized")
|
||||
|
||||
async def start(self):
|
||||
|
||||
await self.pubsub.ensure_queue(
|
||||
self.config_request_topic, self.config_request_subscriber
|
||||
)
|
||||
await self.push() # Startup poke: empty types = everything
|
||||
await self.config_request_consumer.start()
|
||||
await self.flow_request_consumer.start()
|
||||
|
||||
async def push(self, types=None):
|
||||
|
||||
|
|
@ -193,7 +152,7 @@ class Processor(AsyncProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
logger.info(f"Handling config request {id}...")
|
||||
logger.debug(f"Handling config request {id}...")
|
||||
|
||||
resp = await self.config.handle(v)
|
||||
|
||||
|
|
@ -214,36 +173,6 @@ class Processor(AsyncProcessor):
|
|||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
async def on_flow_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
logger.info(f"Handling flow request {id}...")
|
||||
|
||||
resp = await self.flow.handle(v)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
resp = FlowResponse(
|
||||
error=Error(
|
||||
type = "flow-error",
|
||||
message = str(e),
|
||||
),
|
||||
)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
@ -263,18 +192,6 @@ class Processor(AsyncProcessor):
|
|||
|
||||
# Note: --config-push-queue is already added by AsyncProcessor.add_args()
|
||||
|
||||
parser.add_argument(
|
||||
'--flow-request-queue',
|
||||
default=default_flow_request_queue,
|
||||
help=f'Flow request queue (default: {default_flow_request_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--flow-response-queue',
|
||||
default=default_flow_response_queue,
|
||||
help=f'Flow response queue {default_flow_response_queue}',
|
||||
)
|
||||
|
||||
add_cassandra_args(parser)
|
||||
|
||||
def run():
|
||||
|
|
|
|||
|
|
@ -192,8 +192,8 @@ class KnowledgeManager:
|
|||
if "graph-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no graph-embeddings-store")
|
||||
|
||||
t_q = flow["interfaces"]["triples-store"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]
|
||||
t_q = flow["interfaces"]["triples-store"]["flow"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]["flow"]
|
||||
|
||||
# Got this far, it should all work
|
||||
await respond(
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ class Processor(AsyncProcessor):
|
|||
processor = self.id, flow = None, name = "knowledge-response"
|
||||
)
|
||||
|
||||
self.knowledge_request_topic = knowledge_request_queue
|
||||
self.knowledge_request_subscriber = id
|
||||
|
||||
self.knowledge_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
backend = self.pubsub,
|
||||
|
|
@ -116,6 +119,9 @@ class Processor(AsyncProcessor):
|
|||
|
||||
async def start(self):
|
||||
|
||||
await self.pubsub.ensure_queue(
|
||||
self.knowledge_request_topic, self.knowledge_request_subscriber
|
||||
)
|
||||
await super(Processor, self).start()
|
||||
await self.knowledge_request_consumer.start()
|
||||
await self.knowledge_response_producer.start()
|
||||
|
|
|
|||
2
trustgraph-flow/trustgraph/flow/__init__.py
Normal file
2
trustgraph-flow/trustgraph/flow/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
from . service import *
|
||||
2
trustgraph-flow/trustgraph/flow/service/__init__.py
Normal file
2
trustgraph-flow/trustgraph/flow/service/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
from . service import *
|
||||
6
trustgraph-flow/trustgraph/flow/service/__main__.py
Normal file
6
trustgraph-flow/trustgraph/flow/service/__main__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . service import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
|
@ -1,15 +1,22 @@
|
|||
|
||||
from trustgraph.schema import FlowResponse, Error
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Queue deletion retry settings
|
||||
DELETE_RETRIES = 5
|
||||
DELETE_RETRY_DELAY = 2 # seconds
|
||||
|
||||
|
||||
class FlowConfig:
|
||||
def __init__(self, config):
|
||||
def __init__(self, config, pubsub):
|
||||
|
||||
self.config = config
|
||||
self.pubsub = pubsub
|
||||
# Cache for parameter type definitions to avoid repeated lookups
|
||||
self.param_type_cache = {}
|
||||
|
||||
|
|
@ -22,9 +29,12 @@ class FlowConfig:
|
|||
user_params: User-provided parameters dict (may be None or empty)
|
||||
|
||||
Returns:
|
||||
Complete parameter dict with user values and defaults merged (all values as strings)
|
||||
Complete parameter dict with user values and defaults merged
|
||||
(all values as strings)
|
||||
"""
|
||||
|
||||
# If the flow blueprint has no parameters section, return user params as-is (stringified)
|
||||
|
||||
if "parameters" not in flow_blueprint:
|
||||
if not user_params:
|
||||
return {}
|
||||
|
|
@ -49,7 +59,9 @@ class FlowConfig:
|
|||
if param_type not in self.param_type_cache:
|
||||
try:
|
||||
# Fetch parameter type definition from config store
|
||||
type_def = await self.config.get("parameter-type").get(param_type)
|
||||
type_def = await self.config.get(
|
||||
"parameter-type", param_type
|
||||
)
|
||||
if type_def:
|
||||
self.param_type_cache[param_type] = json.loads(type_def)
|
||||
else:
|
||||
|
|
@ -102,32 +114,29 @@ class FlowConfig:
|
|||
|
||||
async def handle_list_blueprints(self, msg):
|
||||
|
||||
names = list(await self.config.get("flow-blueprint").keys())
|
||||
names = list(await self.config.keys("flow-blueprint"))
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
blueprint_names = names,
|
||||
)
|
||||
|
||||
|
||||
async def handle_get_blueprint(self, msg):
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
blueprint_definition = await self.config.get(
|
||||
"flow-blueprint"
|
||||
).get(msg.blueprint_name),
|
||||
"flow-blueprint", msg.blueprint_name
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def handle_put_blueprint(self, msg):
|
||||
|
||||
await self.config.get("flow-blueprint").put(
|
||||
await self.config.put(
|
||||
"flow-blueprint",
|
||||
msg.blueprint_name, msg.blueprint_definition
|
||||
)
|
||||
|
||||
await self.config.inc_version()
|
||||
|
||||
await self.config.push(types=["flow-blueprint"])
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
)
|
||||
|
|
@ -136,28 +145,24 @@ class FlowConfig:
|
|||
|
||||
logger.debug(f"Flow config message: {msg}")
|
||||
|
||||
await self.config.get("flow-blueprint").delete(msg.blueprint_name)
|
||||
|
||||
await self.config.inc_version()
|
||||
|
||||
await self.config.push(types=["flow-blueprint"])
|
||||
await self.config.delete("flow-blueprint", msg.blueprint_name)
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
)
|
||||
|
||||
|
||||
async def handle_list_flows(self, msg):
|
||||
|
||||
names = list(await self.config.get("flow").keys())
|
||||
names = list(await self.config.keys("flow"))
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
flow_ids = names,
|
||||
)
|
||||
|
||||
|
||||
async def handle_get_flow(self, msg):
|
||||
|
||||
flow_data = await self.config.get("flow").get(msg.flow_id)
|
||||
flow_data = await self.config.get("flow", msg.flow_id)
|
||||
flow = json.loads(flow_data)
|
||||
|
||||
return FlowResponse(
|
||||
|
|
@ -166,7 +171,7 @@ class FlowConfig:
|
|||
description = flow.get("description", ""),
|
||||
parameters = flow.get("parameters", {}),
|
||||
)
|
||||
|
||||
|
||||
async def handle_start_flow(self, msg):
|
||||
|
||||
if msg.blueprint_name is None:
|
||||
|
|
@ -175,17 +180,17 @@ class FlowConfig:
|
|||
if msg.flow_id is None:
|
||||
raise RuntimeError("No flow ID")
|
||||
|
||||
if msg.flow_id in await self.config.get("flow").keys():
|
||||
if msg.flow_id in await self.config.keys("flow"):
|
||||
raise RuntimeError("Flow already exists")
|
||||
|
||||
if msg.description is None:
|
||||
raise RuntimeError("No description")
|
||||
|
||||
if msg.blueprint_name not in await self.config.get("flow-blueprint").keys():
|
||||
if msg.blueprint_name not in await self.config.keys("flow-blueprint"):
|
||||
raise RuntimeError("Blueprint does not exist")
|
||||
|
||||
cls = json.loads(
|
||||
await self.config.get("flow-blueprint").get(msg.blueprint_name)
|
||||
await self.config.get("flow-blueprint", msg.blueprint_name)
|
||||
)
|
||||
|
||||
# Resolve parameters by merging user-provided values with defaults
|
||||
|
|
@ -210,6 +215,15 @@ class FlowConfig:
|
|||
|
||||
return result
|
||||
|
||||
# Pre-create flow-level queues so the data path is wired
|
||||
# before processors receive their config and start connecting.
|
||||
queues = self._collect_flow_queues(cls, repl_template_with_params)
|
||||
for topic, subscription in queues:
|
||||
await self.pubsub.create_queue(topic, subscription)
|
||||
|
||||
# Build all processor config updates, then write in a single batch.
|
||||
updates = []
|
||||
|
||||
for kind in ("blueprint", "flow"):
|
||||
|
||||
for k, v in cls[kind].items():
|
||||
|
|
@ -218,37 +232,34 @@ class FlowConfig:
|
|||
|
||||
variant = repl_template_with_params(variant)
|
||||
|
||||
v = {
|
||||
topics = {
|
||||
repl_template_with_params(k2): repl_template_with_params(v2)
|
||||
for k2, v2 in v.items()
|
||||
for k2, v2 in v.get("topics", {}).items()
|
||||
}
|
||||
|
||||
flac = await self.config.get("active-flow").get(processor)
|
||||
if flac is not None:
|
||||
target = json.loads(flac)
|
||||
else:
|
||||
target = {}
|
||||
params = {
|
||||
repl_template_with_params(k2): repl_template_with_params(v2)
|
||||
for k2, v2 in v.get("parameters", {}).items()
|
||||
}
|
||||
|
||||
# The condition if variant not in target: means it only adds
|
||||
# the configuration if the variant doesn't already exist.
|
||||
# If "everything" already exists in the target with old
|
||||
# values, they won't update.
|
||||
entry = {
|
||||
"topics": topics,
|
||||
"parameters": params,
|
||||
}
|
||||
|
||||
if variant not in target:
|
||||
target[variant] = v
|
||||
updates.append((
|
||||
f"processor:{processor}",
|
||||
variant,
|
||||
json.dumps(entry),
|
||||
))
|
||||
|
||||
await self.config.get("active-flow").put(
|
||||
processor, json.dumps(target)
|
||||
)
|
||||
await self.config.put_many(updates)
|
||||
|
||||
def repl_interface(i):
|
||||
if isinstance(i, str):
|
||||
return repl_template_with_params(i)
|
||||
else:
|
||||
return {
|
||||
k: repl_template_with_params(v)
|
||||
for k, v in i.items()
|
||||
}
|
||||
return {
|
||||
k: repl_template_with_params(v)
|
||||
for k, v in i.items()
|
||||
}
|
||||
|
||||
if "interfaces" in cls:
|
||||
interfaces = {
|
||||
|
|
@ -258,8 +269,8 @@ class FlowConfig:
|
|||
else:
|
||||
interfaces = {}
|
||||
|
||||
await self.config.get("flow").put(
|
||||
msg.flow_id,
|
||||
await self.config.put(
|
||||
"flow", msg.flow_id,
|
||||
json.dumps({
|
||||
"description": msg.description,
|
||||
"blueprint-name": msg.blueprint_name,
|
||||
|
|
@ -268,23 +279,131 @@ class FlowConfig:
|
|||
})
|
||||
)
|
||||
|
||||
await self.config.inc_version()
|
||||
|
||||
await self.config.push(types=["active-flow", "flow"])
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
)
|
||||
|
||||
|
||||
async def ensure_existing_flow_queues(self):
|
||||
"""Ensure queues exist for all already-running flows.
|
||||
|
||||
Called on startup to handle flows that were started before this
|
||||
version of the flow service was deployed, or before a restart.
|
||||
"""
|
||||
flow_ids = await self.config.keys("flow")
|
||||
|
||||
for flow_id in flow_ids:
|
||||
try:
|
||||
flow_data = await self.config.get("flow", flow_id)
|
||||
if flow_data is None:
|
||||
continue
|
||||
|
||||
flow = json.loads(flow_data)
|
||||
|
||||
blueprint_name = flow.get("blueprint-name")
|
||||
if blueprint_name is None:
|
||||
continue
|
||||
|
||||
# Skip flows that are mid-shutdown
|
||||
if flow.get("status") == "stopping":
|
||||
continue
|
||||
|
||||
parameters = flow.get("parameters", {})
|
||||
|
||||
blueprint_data = await self.config.get(
|
||||
"flow-blueprint", blueprint_name
|
||||
)
|
||||
if blueprint_data is None:
|
||||
logger.warning(
|
||||
f"Blueprint '{blueprint_name}' not found for "
|
||||
f"flow '{flow_id}', skipping queue creation"
|
||||
)
|
||||
continue
|
||||
|
||||
cls = json.loads(blueprint_data)
|
||||
|
||||
def repl_template(tmp):
|
||||
result = tmp.replace(
|
||||
"{blueprint}", blueprint_name
|
||||
).replace(
|
||||
"{id}", flow_id
|
||||
)
|
||||
for param_name, param_value in parameters.items():
|
||||
result = result.replace(
|
||||
f"{{{param_name}}}", str(param_value)
|
||||
)
|
||||
return result
|
||||
|
||||
queues = self._collect_flow_queues(cls, repl_template)
|
||||
for topic, subscription in queues:
|
||||
await self.pubsub.ensure_queue(topic, subscription)
|
||||
|
||||
logger.info(
|
||||
f"Ensured queues for existing flow '{flow_id}'"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to ensure queues for flow '{flow_id}': {e}"
|
||||
)
|
||||
|
||||
def _collect_flow_queues(self, cls, repl_template):
|
||||
"""Collect (topic, subscription) pairs for all flow-level queues.
|
||||
|
||||
Iterates the blueprint's "flow" section and reads only the
|
||||
"topics" dict from each processor entry.
|
||||
"""
|
||||
queues = []
|
||||
|
||||
for k, v in cls["flow"].items():
|
||||
processor, variant = k.split(":", 1)
|
||||
variant = repl_template(variant)
|
||||
|
||||
for spec_name, topic_template in v.get("topics", {}).items():
|
||||
topic = repl_template(topic_template)
|
||||
subscription = f"{processor}--{variant}--{spec_name}"
|
||||
queues.append((topic, subscription))
|
||||
|
||||
return queues
|
||||
|
||||
async def _delete_queues(self, queues):
|
||||
"""Delete queues with retries. Best-effort — logs failures but
|
||||
does not raise."""
|
||||
for attempt in range(DELETE_RETRIES):
|
||||
remaining = []
|
||||
|
||||
for topic, subscription in queues:
|
||||
try:
|
||||
await self.pubsub.delete_queue(topic, subscription)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Queue delete failed (attempt {attempt + 1}/"
|
||||
f"{DELETE_RETRIES}): {topic}: {e}"
|
||||
)
|
||||
remaining.append((topic, subscription))
|
||||
|
||||
if not remaining:
|
||||
return
|
||||
|
||||
queues = remaining
|
||||
|
||||
if attempt < DELETE_RETRIES - 1:
|
||||
await asyncio.sleep(DELETE_RETRY_DELAY)
|
||||
|
||||
for topic, subscription in queues:
|
||||
logger.error(
|
||||
f"Failed to delete queue after {DELETE_RETRIES} "
|
||||
f"attempts: {topic}"
|
||||
)
|
||||
|
||||
async def handle_stop_flow(self, msg):
|
||||
|
||||
if msg.flow_id is None:
|
||||
raise RuntimeError("No flow ID")
|
||||
|
||||
if msg.flow_id not in await self.config.get("flow").keys():
|
||||
if msg.flow_id not in await self.config.keys("flow"):
|
||||
raise RuntimeError("Flow ID invalid")
|
||||
|
||||
flow = json.loads(await self.config.get("flow").get(msg.flow_id))
|
||||
flow = json.loads(await self.config.get("flow", msg.flow_id))
|
||||
|
||||
if "blueprint-name" not in flow:
|
||||
raise RuntimeError("Internal error: flow has no flow blueprint")
|
||||
|
|
@ -292,7 +411,9 @@ class FlowConfig:
|
|||
blueprint_name = flow["blueprint-name"]
|
||||
parameters = flow.get("parameters", {})
|
||||
|
||||
cls = json.loads(await self.config.get("flow-blueprint").get(blueprint_name))
|
||||
cls = json.loads(
|
||||
await self.config.get("flow-blueprint", blueprint_name)
|
||||
)
|
||||
|
||||
def repl_template(tmp):
|
||||
result = tmp.replace(
|
||||
|
|
@ -305,34 +426,33 @@ class FlowConfig:
|
|||
result = result.replace(f"{{{param_name}}}", str(param_value))
|
||||
return result
|
||||
|
||||
for kind in ("flow",):
|
||||
# Collect queue identifiers before removing config
|
||||
queues = self._collect_flow_queues(cls, repl_template)
|
||||
|
||||
for k, v in cls[kind].items():
|
||||
# Phase 1: Set status to "stopping" and remove processor config.
|
||||
# The config push tells processors to shut down their consumers.
|
||||
flow["status"] = "stopping"
|
||||
await self.config.put(
|
||||
"flow", msg.flow_id, json.dumps(flow)
|
||||
)
|
||||
|
||||
processor, variant = k.split(":", 1)
|
||||
# Delete all processor config entries for this flow.
|
||||
deletes = []
|
||||
|
||||
variant = repl_template(variant)
|
||||
for k, v in cls["flow"].items():
|
||||
|
||||
flac = await self.config.get("active-flow").get(processor)
|
||||
processor, variant = k.split(":", 1)
|
||||
variant = repl_template(variant)
|
||||
|
||||
if flac is not None:
|
||||
target = json.loads(flac)
|
||||
else:
|
||||
target = {}
|
||||
deletes.append((f"processor:{processor}", variant))
|
||||
|
||||
if variant in target:
|
||||
del target[variant]
|
||||
await self.config.delete_many(deletes)
|
||||
|
||||
await self.config.get("active-flow").put(
|
||||
processor, json.dumps(target)
|
||||
)
|
||||
# Phase 2: Delete queues with retries, then remove the flow record.
|
||||
await self._delete_queues(queues)
|
||||
|
||||
if msg.flow_id in await self.config.get("flow").keys():
|
||||
await self.config.get("flow").delete(msg.flow_id)
|
||||
|
||||
await self.config.inc_version()
|
||||
|
||||
await self.config.push(types=["active-flow", "flow"])
|
||||
if msg.flow_id in await self.config.keys("flow"):
|
||||
await self.config.delete("flow", msg.flow_id)
|
||||
|
||||
return FlowResponse(
|
||||
error = None,
|
||||
|
|
@ -368,4 +488,3 @@ class FlowConfig:
|
|||
)
|
||||
|
||||
return resp
|
||||
|
||||
162
trustgraph-flow/trustgraph/flow/service/service.py
Normal file
162
trustgraph-flow/trustgraph/flow/service/service.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
|
||||
"""
|
||||
Flow service. Manages flow lifecycle — starting and stopping flows
|
||||
by coordinating with the config service via pub/sub.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from trustgraph.schema import Error
|
||||
|
||||
from trustgraph.schema import FlowRequest, FlowResponse
|
||||
from trustgraph.schema import flow_request_queue, flow_response_queue
|
||||
from trustgraph.schema import ConfigRequest, ConfigResponse
|
||||
from trustgraph.schema import config_request_queue, config_response_queue
|
||||
|
||||
from trustgraph.base import AsyncProcessor, Consumer, Producer
|
||||
from trustgraph.base import ConsumerMetrics, ProducerMetrics, SubscriberMetrics
|
||||
from trustgraph.base import ConfigClient
|
||||
|
||||
from . flow import FlowConfig
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "flow-svc"
|
||||
|
||||
default_flow_request_queue = flow_request_queue
|
||||
default_flow_response_queue = flow_response_queue
|
||||
|
||||
|
||||
class Processor(AsyncProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
flow_request_queue = params.get(
|
||||
"flow_request_queue", default_flow_request_queue
|
||||
)
|
||||
flow_response_queue = params.get(
|
||||
"flow_response_queue", default_flow_response_queue
|
||||
)
|
||||
|
||||
id = params.get("id")
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"flow_request_schema": FlowRequest.__name__,
|
||||
"flow_response_schema": FlowResponse.__name__,
|
||||
}
|
||||
)
|
||||
|
||||
flow_request_metrics = ConsumerMetrics(
|
||||
processor = self.id, flow = None, name = "flow-request"
|
||||
)
|
||||
flow_response_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "flow-response"
|
||||
)
|
||||
|
||||
self.flow_request_topic = flow_request_queue
|
||||
self.flow_request_subscriber = id
|
||||
|
||||
self.flow_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
backend = self.pubsub,
|
||||
flow = None,
|
||||
topic = flow_request_queue,
|
||||
subscriber = id,
|
||||
schema = FlowRequest,
|
||||
handler = self.on_flow_request,
|
||||
metrics = flow_request_metrics,
|
||||
)
|
||||
|
||||
self.flow_response_producer = Producer(
|
||||
backend = self.pubsub,
|
||||
topic = flow_response_queue,
|
||||
schema = FlowResponse,
|
||||
metrics = flow_response_metrics,
|
||||
)
|
||||
|
||||
config_req_metrics = ProducerMetrics(
|
||||
processor=self.id, flow=None, name="config-request",
|
||||
)
|
||||
config_resp_metrics = SubscriberMetrics(
|
||||
processor=self.id, flow=None, name="config-response",
|
||||
)
|
||||
|
||||
self.config_client = ConfigClient(
|
||||
backend=self.pubsub,
|
||||
subscription=f"{self.id}--config--{id}",
|
||||
consumer_name=self.id,
|
||||
request_topic=config_request_queue,
|
||||
request_schema=ConfigRequest,
|
||||
request_metrics=config_req_metrics,
|
||||
response_topic=config_response_queue,
|
||||
response_schema=ConfigResponse,
|
||||
response_metrics=config_resp_metrics,
|
||||
)
|
||||
|
||||
self.flow = FlowConfig(self.config_client, self.pubsub)
|
||||
|
||||
logger.info("Flow service initialized")
|
||||
|
||||
async def start(self):
|
||||
|
||||
await self.pubsub.ensure_queue(
|
||||
self.flow_request_topic, self.flow_request_subscriber
|
||||
)
|
||||
await self.config_client.start()
|
||||
await self.flow.ensure_existing_flow_queues()
|
||||
await self.flow_request_consumer.start()
|
||||
|
||||
async def on_flow_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
logger.debug(f"Handling flow request {id}...")
|
||||
|
||||
resp = await self.flow.handle(v)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Flow request failed: {e}")
|
||||
|
||||
resp = FlowResponse(
|
||||
error=Error(
|
||||
type = "flow-error",
|
||||
message = str(e),
|
||||
),
|
||||
)
|
||||
|
||||
await self.flow_response_producer.send(
|
||||
resp, properties={"id": id}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
AsyncProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'--flow-request-queue',
|
||||
default=default_flow_request_queue,
|
||||
help=f'Flow request queue (default: {default_flow_request_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--flow-response-queue',
|
||||
default=default_flow_response_queue,
|
||||
help=f'Flow response queue {default_flow_response_queue}',
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.launch(default_ident, __doc__)
|
||||
|
|
@ -54,7 +54,7 @@ class ConfigReceiver:
|
|||
return
|
||||
|
||||
# Gateway cares about flow config
|
||||
if notify_types and "flow" not in notify_types and "active-flow" not in notify_types:
|
||||
if notify_types and "flow" not in notify_types:
|
||||
logger.debug(
|
||||
f"Ignoring config notify v{notify_version}, "
|
||||
f"no flow types in {notify_types}"
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ class DispatcherManager:
|
|||
raise RuntimeError("This kind not supported by flow")
|
||||
|
||||
# FIXME: The -store bit, does it make sense?
|
||||
qconfig = intf_defs[int_kind]
|
||||
qconfig = intf_defs[int_kind]["flow"]
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
dispatcher = import_dispatchers[kind](
|
||||
|
|
@ -264,7 +264,7 @@ class DispatcherManager:
|
|||
if int_kind not in intf_defs:
|
||||
raise RuntimeError("This kind not supported by flow")
|
||||
|
||||
qconfig = intf_defs[int_kind]
|
||||
qconfig = intf_defs[int_kind]["flow"]
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
dispatcher = export_dispatchers[kind](
|
||||
|
|
@ -320,7 +320,7 @@ class DispatcherManager:
|
|||
elif kind in sender_dispatchers:
|
||||
dispatcher = sender_dispatchers[kind](
|
||||
backend = self.backend,
|
||||
queue = qconfig,
|
||||
queue = qconfig["flow"],
|
||||
)
|
||||
else:
|
||||
raise RuntimeError("Invalid kind")
|
||||
|
|
|
|||
|
|
@ -162,6 +162,9 @@ class Processor(AsyncProcessor):
|
|||
processor = self.id, flow = None, name = "storage-response"
|
||||
)
|
||||
|
||||
self.librarian_request_topic = librarian_request_queue
|
||||
self.librarian_request_subscriber = id
|
||||
|
||||
self.librarian_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
backend = self.pubsub,
|
||||
|
|
@ -180,6 +183,9 @@ class Processor(AsyncProcessor):
|
|||
metrics = librarian_response_metrics,
|
||||
)
|
||||
|
||||
self.collection_request_topic = collection_request_queue
|
||||
self.collection_request_subscriber = id
|
||||
|
||||
self.collection_request_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
backend = self.pubsub,
|
||||
|
|
@ -248,7 +254,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
self.register_config_handler(
|
||||
self.on_librarian_config,
|
||||
types=["flow", "active-flow"],
|
||||
types=["flow"],
|
||||
)
|
||||
|
||||
self.flows = {}
|
||||
|
|
@ -257,6 +263,12 @@ class Processor(AsyncProcessor):
|
|||
|
||||
async def start(self):
|
||||
|
||||
await self.pubsub.ensure_queue(
|
||||
self.librarian_request_topic, self.librarian_request_subscriber
|
||||
)
|
||||
await self.pubsub.ensure_queue(
|
||||
self.collection_request_topic, self.collection_request_subscriber
|
||||
)
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_request_consumer.start()
|
||||
await self.librarian_response_producer.start()
|
||||
|
|
@ -365,12 +377,12 @@ class Processor(AsyncProcessor):
|
|||
else:
|
||||
kind = "document-load"
|
||||
|
||||
q = flow["interfaces"][kind]
|
||||
q = flow["interfaces"][kind]["flow"]
|
||||
|
||||
# Emit document provenance to knowledge graph
|
||||
if "triples-store" in flow["interfaces"]:
|
||||
await self.emit_document_provenance(
|
||||
document, processing, flow["interfaces"]["triples-store"]
|
||||
document, processing, flow["interfaces"]["triples-store"]["flow"]
|
||||
)
|
||||
|
||||
if kind == "text-load":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue