mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-01 03:16:23 +02:00
Fix flow lifetimes
This commit is contained in:
parent
d624247dbd
commit
0410e7dc9d
1 changed files with 54 additions and 51 deletions
|
|
@ -74,12 +74,12 @@ class Processor(FlowProcessor):
|
||||||
# Register config handler for ontology updates
|
# Register config handler for ontology updates
|
||||||
self.register_config_handler(self.on_ontology_config)
|
self.register_config_handler(self.on_ontology_config)
|
||||||
|
|
||||||
# Initialize components
|
# Shared components (not flow-specific)
|
||||||
self.ontology_loader = None
|
self.ontology_loader = OntologyLoader()
|
||||||
self.ontology_embedder = None
|
|
||||||
self.text_processor = TextProcessor()
|
self.text_processor = TextProcessor()
|
||||||
self.ontology_selector = None
|
|
||||||
self.initialized = False
|
# Per-flow components (each flow gets its own embedder/vector store/selector)
|
||||||
|
self.flow_components = {} # flow_id -> {embedder, vector_store, selector}
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
self.top_k = params.get("top_k", 10)
|
self.top_k = params.get("top_k", 10)
|
||||||
|
|
@ -88,17 +88,27 @@ class Processor(FlowProcessor):
|
||||||
# Track loaded ontology version
|
# Track loaded ontology version
|
||||||
self.current_ontology_version = None
|
self.current_ontology_version = None
|
||||||
self.loaded_ontology_ids = set()
|
self.loaded_ontology_ids = set()
|
||||||
self.pending_config = None # Store config until components initialized
|
|
||||||
|
|
||||||
async def initialize_components(self, flow):
|
async def initialize_flow_components(self, flow):
|
||||||
"""Initialize OntoRAG components."""
|
"""Initialize per-flow OntoRAG components.
|
||||||
if self.initialized:
|
|
||||||
return
|
Each flow gets its own vector store and embedder to support
|
||||||
|
different embedding models across flows.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
flow: Flow object for this processing context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
flow_id: Identifier for this flow's components
|
||||||
|
"""
|
||||||
|
# Use flow object as identifier
|
||||||
|
flow_id = id(flow)
|
||||||
|
|
||||||
|
if flow_id in self.flow_components:
|
||||||
|
return flow_id # Already initialized for this flow
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Initialize ontology loader (no ConfigTableStore needed)
|
logger.info(f"Initializing components for flow {flow_id}")
|
||||||
self.ontology_loader = OntologyLoader()
|
|
||||||
logger.info("Ontology loader initialized")
|
|
||||||
|
|
||||||
# Initialize vector store (FAISS only, no fallback)
|
# Initialize vector store (FAISS only, no fallback)
|
||||||
vector_store = InMemoryVectorStore(
|
vector_store = InMemoryVectorStore(
|
||||||
|
|
@ -109,37 +119,46 @@ class Processor(FlowProcessor):
|
||||||
# Use embeddings client directly (no wrapper needed)
|
# Use embeddings client directly (no wrapper needed)
|
||||||
embeddings_client = flow("embeddings-request")
|
embeddings_client = flow("embeddings-request")
|
||||||
|
|
||||||
self.ontology_embedder = OntologyEmbedder(
|
ontology_embedder = OntologyEmbedder(
|
||||||
embedding_service=embeddings_client,
|
embedding_service=embeddings_client,
|
||||||
vector_store=vector_store
|
vector_store=vector_store
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Embed all loaded ontologies for this flow
|
||||||
|
if self.ontology_loader.get_all_ontologies():
|
||||||
|
logger.info(f"Embedding ontologies for flow {flow_id}")
|
||||||
|
for ont_id, ontology in self.ontology_loader.get_all_ontologies().items():
|
||||||
|
await ontology_embedder.embed_ontology(ontology)
|
||||||
|
logger.info(f"Embedded {ontology_embedder.get_embedded_count()} ontology elements for flow {flow_id}")
|
||||||
|
|
||||||
# Initialize ontology selector
|
# Initialize ontology selector
|
||||||
self.ontology_selector = OntologySelector(
|
ontology_selector = OntologySelector(
|
||||||
ontology_embedder=self.ontology_embedder,
|
ontology_embedder=ontology_embedder,
|
||||||
ontology_loader=self.ontology_loader,
|
ontology_loader=self.ontology_loader,
|
||||||
top_k=self.top_k,
|
top_k=self.top_k,
|
||||||
similarity_threshold=self.similarity_threshold
|
similarity_threshold=self.similarity_threshold
|
||||||
)
|
)
|
||||||
|
|
||||||
self.initialized = True
|
# Store flow-specific components
|
||||||
logger.info("OntoRAG components initialized successfully")
|
self.flow_components[flow_id] = {
|
||||||
|
'embedder': ontology_embedder,
|
||||||
|
'vector_store': vector_store,
|
||||||
|
'selector': ontology_selector
|
||||||
|
}
|
||||||
|
|
||||||
# Process pending config if available
|
logger.info(f"Flow {flow_id} components initialized successfully")
|
||||||
if self.pending_config:
|
return flow_id
|
||||||
logger.info("Processing pending config from startup")
|
|
||||||
config, version = self.pending_config
|
|
||||||
self.pending_config = None
|
|
||||||
await self.on_ontology_config(config, version)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to initialize OntoRAG components: {e}", exc_info=True)
|
logger.error(f"Failed to initialize flow {flow_id} components: {e}", exc_info=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def on_ontology_config(self, config, version):
|
async def on_ontology_config(self, config, version):
|
||||||
"""
|
"""
|
||||||
Handle ontology configuration updates from ConfigPush queue.
|
Handle ontology configuration updates from ConfigPush queue.
|
||||||
|
|
||||||
|
Parses and stores ontologies. Embedding happens per-flow on first message.
|
||||||
|
|
||||||
Called automatically when:
|
Called automatically when:
|
||||||
- Processor starts (gets full config history via start_of_messages=True)
|
- Processor starts (gets full config history via start_of_messages=True)
|
||||||
- Config service pushes updates (immediate event-driven notification)
|
- Config service pushes updates (immediate event-driven notification)
|
||||||
|
|
@ -161,12 +180,6 @@ class Processor(FlowProcessor):
|
||||||
logger.warning("No 'ontology' section in config")
|
logger.warning("No 'ontology' section in config")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if components are initialized
|
|
||||||
if not self.ontology_loader:
|
|
||||||
logger.debug("Components not yet initialized, storing config for later processing")
|
|
||||||
self.pending_config = (config, version)
|
|
||||||
return
|
|
||||||
|
|
||||||
ontology_configs = config["ontology"]
|
ontology_configs = config["ontology"]
|
||||||
|
|
||||||
# Parse ontology definitions
|
# Parse ontology definitions
|
||||||
|
|
@ -196,20 +209,10 @@ class Processor(FlowProcessor):
|
||||||
# Update ontology loader's internal state
|
# Update ontology loader's internal state
|
||||||
self.ontology_loader.update_ontologies(ontologies)
|
self.ontology_loader.update_ontologies(ontologies)
|
||||||
|
|
||||||
# Re-embed changed ontologies
|
# Clear all flow components to force re-embedding with new ontologies
|
||||||
if self.ontology_embedder:
|
if added_ids or removed_ids or updated_ids:
|
||||||
# Remove embeddings for deleted ontologies
|
logger.info("Clearing flow components to trigger re-embedding")
|
||||||
for ont_id in removed_ids:
|
self.flow_components.clear()
|
||||||
self.ontology_embedder.remove_ontology(ont_id)
|
|
||||||
|
|
||||||
# Embed new and updated ontologies
|
|
||||||
for ont_id in added_ids | updated_ids:
|
|
||||||
if ont_id in self.ontology_loader.get_all_ontologies():
|
|
||||||
await self.ontology_embedder.embed_ontology(
|
|
||||||
self.ontology_loader.get_ontology(ont_id)
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Re-embedded ontologies, total elements: {self.ontology_embedder.get_embedded_count()}")
|
|
||||||
|
|
||||||
# Update tracking
|
# Update tracking
|
||||||
self.current_ontology_version = version
|
self.current_ontology_version = version
|
||||||
|
|
@ -225,9 +228,9 @@ class Processor(FlowProcessor):
|
||||||
v = msg.value()
|
v = msg.value()
|
||||||
logger.info(f"Extracting ontology-based triples from {v.metadata.id}...")
|
logger.info(f"Extracting ontology-based triples from {v.metadata.id}...")
|
||||||
|
|
||||||
# Initialize components if needed
|
# Initialize flow-specific components if needed
|
||||||
if not self.initialized:
|
flow_id = await self.initialize_flow_components(flow)
|
||||||
await self.initialize_components(flow)
|
components = self.flow_components[flow_id]
|
||||||
|
|
||||||
chunk = v.chunk.decode("utf-8")
|
chunk = v.chunk.decode("utf-8")
|
||||||
logger.debug(f"Processing chunk: {chunk[:200]}...")
|
logger.debug(f"Processing chunk: {chunk[:200]}...")
|
||||||
|
|
@ -237,8 +240,8 @@ class Processor(FlowProcessor):
|
||||||
segments = self.text_processor.process_chunk(chunk, extract_phrases=True)
|
segments = self.text_processor.process_chunk(chunk, extract_phrases=True)
|
||||||
logger.debug(f"Split chunk into {len(segments)} segments")
|
logger.debug(f"Split chunk into {len(segments)} segments")
|
||||||
|
|
||||||
# Select relevant ontology subset
|
# Select relevant ontology subset (using flow-specific selector)
|
||||||
ontology_subsets = await self.ontology_selector.select_ontology_subset(segments)
|
ontology_subsets = await components['selector'].select_ontology_subset(segments)
|
||||||
|
|
||||||
if not ontology_subsets:
|
if not ontology_subsets:
|
||||||
logger.warning("No relevant ontology elements found for chunk")
|
logger.warning("No relevant ontology elements found for chunk")
|
||||||
|
|
@ -252,7 +255,7 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# Merge subsets if multiple ontologies matched
|
# Merge subsets if multiple ontologies matched
|
||||||
if len(ontology_subsets) > 1:
|
if len(ontology_subsets) > 1:
|
||||||
ontology_subset = self.ontology_selector.merge_subsets(ontology_subsets)
|
ontology_subset = components['selector'].merge_subsets(ontology_subsets)
|
||||||
else:
|
else:
|
||||||
ontology_subset = ontology_subsets[0]
|
ontology_subset = ontology_subsets[0]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue