Fix crash

This commit is contained in:
Cyber MacGeddon 2025-11-12 16:15:22 +00:00
parent dfd7ad3a56
commit d624247dbd
2 changed files with 30 additions and 6 deletions

View file

@ -88,6 +88,7 @@ class Processor(FlowProcessor):
# Track loaded ontology version # Track loaded ontology version
self.current_ontology_version = None self.current_ontology_version = None
self.loaded_ontology_ids = set() self.loaded_ontology_ids = set()
self.pending_config = None # Store config until components initialized
async def initialize_components(self, flow): async def initialize_components(self, flow):
"""Initialize OntoRAG components.""" """Initialize OntoRAG components."""
@ -124,8 +125,12 @@ class Processor(FlowProcessor):
self.initialized = True self.initialized = True
logger.info("OntoRAG components initialized successfully") logger.info("OntoRAG components initialized successfully")
# NOTE: Ontologies will be loaded via on_ontology_config() handler # Process pending config if available
# when ConfigPush messages arrive (including initial config on startup) if self.pending_config:
logger.info("Processing pending config from startup")
config, version = self.pending_config
self.pending_config = None
await self.on_ontology_config(config, version)
except Exception as e: except Exception as e:
logger.error(f"Failed to initialize OntoRAG components: {e}", exc_info=True) logger.error(f"Failed to initialize OntoRAG components: {e}", exc_info=True)
@ -156,6 +161,12 @@ class Processor(FlowProcessor):
logger.warning("No 'ontology' section in config") logger.warning("No 'ontology' section in config")
return return
# Check if components are initialized
if not self.ontology_loader:
logger.debug("Components not yet initialized, storing config for later processing")
self.pending_config = (config, version)
return
ontology_configs = config["ontology"] ontology_configs = config["ontology"]
# Parse ontology definitions # Parse ontology definitions

View file

@ -14,9 +14,16 @@ logger = logging.getLogger(__name__)
# Ensure required NLTK data is downloaded # Ensure required NLTK data is downloaded
try: try:
nltk.data.find('tokenizers/punkt') nltk.data.find('tokenizers/punkt_tab')
except LookupError: except LookupError:
nltk.download('punkt', quiet=True) try:
nltk.download('punkt_tab', quiet=True)
except:
# Fallback to older punkt if punkt_tab not available
try:
nltk.download('punkt', quiet=True)
except:
pass
try: try:
nltk.data.find('taggers/averaged_perceptron_tagger') nltk.data.find('taggers/averaged_perceptron_tagger')
@ -44,8 +51,14 @@ class SentenceSplitter:
def __init__(self): def __init__(self):
"""Initialize sentence splitter.""" """Initialize sentence splitter."""
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') try:
logger.info("Using NLTK sentence tokenizer") # Try newer punkt_tab first
self.sent_detector = nltk.data.load('tokenizers/punkt_tab/english/')
logger.info("Using NLTK sentence tokenizer (punkt_tab)")
except:
# Fallback to older punkt
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
logger.info("Using NLTK sentence tokenizer (punkt)")
def split(self, text: str) -> List[str]: def split(self, text: str) -> List[str]:
"""Split text into sentences. """Split text into sentences.