Fix crash

This commit is contained in:
Cyber MacGeddon 2025-11-12 16:15:22 +00:00
parent dfd7ad3a56
commit d624247dbd
2 changed files with 30 additions and 6 deletions

View file

@ -88,6 +88,7 @@ class Processor(FlowProcessor):
# Track loaded ontology version
self.current_ontology_version = None
self.loaded_ontology_ids = set()
self.pending_config = None # Store config until components initialized
async def initialize_components(self, flow):
"""Initialize OntoRAG components."""
@ -124,8 +125,12 @@ class Processor(FlowProcessor):
self.initialized = True
logger.info("OntoRAG components initialized successfully")
# NOTE: Ontologies will be loaded via on_ontology_config() handler
# when ConfigPush messages arrive (including initial config on startup)
# Process pending config if available
if self.pending_config:
logger.info("Processing pending config from startup")
config, version = self.pending_config
self.pending_config = None
await self.on_ontology_config(config, version)
except Exception as e:
logger.error(f"Failed to initialize OntoRAG components: {e}", exc_info=True)
@ -156,6 +161,12 @@ class Processor(FlowProcessor):
logger.warning("No 'ontology' section in config")
return
# Check if components are initialized
if not self.ontology_loader:
logger.debug("Components not yet initialized, storing config for later processing")
self.pending_config = (config, version)
return
ontology_configs = config["ontology"]
# Parse ontology definitions

View file

@ -14,9 +14,16 @@ logger = logging.getLogger(__name__)
# Ensure required NLTK data is downloaded
try:
nltk.data.find('tokenizers/punkt')
nltk.data.find('tokenizers/punkt_tab')
except LookupError:
nltk.download('punkt', quiet=True)
try:
nltk.download('punkt_tab', quiet=True)
except:
# Fallback to older punkt if punkt_tab not available
try:
nltk.download('punkt', quiet=True)
except:
pass
try:
nltk.data.find('taggers/averaged_perceptron_tagger')
@ -44,8 +51,14 @@ class SentenceSplitter:
def __init__(self):
"""Initialize sentence splitter."""
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
logger.info("Using NLTK sentence tokenizer")
try:
# Try newer punkt_tab first
self.sent_detector = nltk.data.load('tokenizers/punkt_tab/english/')
logger.info("Using NLTK sentence tokenizer (punkt_tab)")
except:
# Fallback to older punkt
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
logger.info("Using NLTK sentence tokenizer (punkt)")
def split(self, text: str) -> List[str]:
"""Split text into sentences.