mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-05 21:32:37 +02:00
Fix nltk stuff
This commit is contained in:
parent
0410e7dc9d
commit
4fb602242d
2 changed files with 20 additions and 6 deletions
|
|
@ -3,6 +3,7 @@ Ontology embedder component for OntoRAG system.
|
||||||
Generates and stores embeddings for ontology elements.
|
Generates and stores embeddings for ontology elements.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from typing import Dict, List, Any, Optional
|
from typing import Dict, List, Any, Optional
|
||||||
|
|
@ -152,8 +153,12 @@ class OntologyEmbedder:
|
||||||
# Get embeddings for batch
|
# Get embeddings for batch
|
||||||
texts = [elem['text'] for elem in batch]
|
texts = [elem['text'] for elem in batch]
|
||||||
try:
|
try:
|
||||||
# Call embedding service (async)
|
# Call embedding service for each text (EmbeddingsClient.embed() is single-text)
|
||||||
embeddings = await self.embedding_service.embed_batch(texts)
|
embedding_tasks = [self.embedding_service.embed(text) for text in texts]
|
||||||
|
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||||
|
|
||||||
|
# Convert to numpy array
|
||||||
|
embeddings = np.array(embeddings_list)
|
||||||
|
|
||||||
# Store in vector store
|
# Store in vector store
|
||||||
ids = [elem['id'] for elem in batch]
|
ids = [elem['id'] for elem in batch]
|
||||||
|
|
@ -226,8 +231,10 @@ class OntologyEmbedder:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
embeddings = await self.embedding_service.embed_batch(texts)
|
# EmbeddingsClient.embed() is single-text, so call in parallel
|
||||||
return embeddings
|
embedding_tasks = [self.embedding_service.embed(text) for text in texts]
|
||||||
|
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||||
|
return np.array(embeddings_list)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to embed texts: {e}")
|
logger.error(f"Failed to embed texts: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,16 @@ except LookupError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
nltk.data.find('taggers/averaged_perceptron_tagger')
|
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
|
||||||
except LookupError:
|
except LookupError:
|
||||||
|
try:
|
||||||
|
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
|
||||||
|
except:
|
||||||
|
# Fallback to older name
|
||||||
|
try:
|
||||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
nltk.data.find('corpora/stopwords')
|
nltk.data.find('corpora/stopwords')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue