Fix nltk stuff

This commit is contained in:
Cyber MacGeddon 2025-11-12 16:50:47 +00:00
parent 0410e7dc9d
commit 4fb602242d
2 changed files with 20 additions and 6 deletions

View file

@ -3,6 +3,7 @@ Ontology embedder component for OntoRAG system.
Generates and stores embeddings for ontology elements.
"""
import asyncio
import logging
import numpy as np
from typing import Dict, List, Any, Optional
@ -152,8 +153,12 @@ class OntologyEmbedder:
# Get embeddings for batch
texts = [elem['text'] for elem in batch]
try:
# Call embedding service (async)
embeddings = await self.embedding_service.embed_batch(texts)
# Call embedding service for each text (EmbeddingsClient.embed() is single-text)
embedding_tasks = [self.embedding_service.embed(text) for text in texts]
embeddings_list = await asyncio.gather(*embedding_tasks)
# Convert to numpy array
embeddings = np.array(embeddings_list)
# Store in vector store
ids = [elem['id'] for elem in batch]
@ -226,8 +231,10 @@ class OntologyEmbedder:
return None
try:
embeddings = await self.embedding_service.embed_batch(texts)
return embeddings
# EmbeddingsClient.embed() is single-text, so call in parallel
embedding_tasks = [self.embedding_service.embed(text) for text in texts]
embeddings_list = await asyncio.gather(*embedding_tasks)
return np.array(embeddings_list)
except Exception as e:
logger.error(f"Failed to embed texts: {e}")
return None

View file

@ -26,9 +26,16 @@ except LookupError:
pass
try:
nltk.data.find('taggers/averaged_perceptron_tagger')
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
except LookupError:
nltk.download('averaged_perceptron_tagger', quiet=True)
try:
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
except:
# Fallback to older name
try:
nltk.download('averaged_perceptron_tagger', quiet=True)
except:
pass
try:
nltk.data.find('corpora/stopwords')