mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-04 12:52:36 +02:00
Fix dimension stuff
This commit is contained in:
parent
92647cf088
commit
e00d69fb82
2 changed files with 22 additions and 10 deletions
|
|
@ -93,7 +93,8 @@ class Processor(FlowProcessor):
|
|||
"""Initialize per-flow OntoRAG components.
|
||||
|
||||
Each flow gets its own vector store and embedder to support
|
||||
different embedding models across flows.
|
||||
different embedding models across flows. The vector store dimension
|
||||
is auto-detected from the embeddings service.
|
||||
|
||||
Args:
|
||||
flow: Flow object for this processing context
|
||||
|
|
@ -110,15 +111,22 @@ class Processor(FlowProcessor):
|
|||
try:
|
||||
logger.info(f"Initializing components for flow {flow_id}")
|
||||
|
||||
# Initialize vector store (FAISS only, no fallback)
|
||||
vector_store = InMemoryVectorStore(
|
||||
dimension=1536, # text-embedding-3-small
|
||||
index_type='flat'
|
||||
)
|
||||
|
||||
# Use embeddings client directly (no wrapper needed)
|
||||
embeddings_client = flow("embeddings-request")
|
||||
|
||||
# Detect embedding dimension by embedding a test string
|
||||
logger.info("Detecting embedding dimension from embeddings service...")
|
||||
test_embedding_response = await embeddings_client.embed("test")
|
||||
test_embedding = test_embedding_response[0] # Extract from [[vector]]
|
||||
dimension = len(test_embedding)
|
||||
logger.info(f"Detected embedding dimension: {dimension}")
|
||||
|
||||
# Initialize vector store with detected dimension
|
||||
vector_store = InMemoryVectorStore(
|
||||
dimension=dimension,
|
||||
index_type='flat'
|
||||
)
|
||||
|
||||
ontology_embedder = OntologyEmbedder(
|
||||
embedding_service=embeddings_client,
|
||||
vector_store=vector_store
|
||||
|
|
@ -143,10 +151,11 @@ class Processor(FlowProcessor):
|
|||
self.flow_components[flow_id] = {
|
||||
'embedder': ontology_embedder,
|
||||
'vector_store': vector_store,
|
||||
'selector': ontology_selector
|
||||
'selector': ontology_selector,
|
||||
'dimension': dimension
|
||||
}
|
||||
|
||||
logger.info(f"Flow {flow_id} components initialized successfully")
|
||||
logger.info(f"Flow {flow_id} components initialized successfully (dimension={dimension})")
|
||||
return flow_id
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -164,6 +164,9 @@ class OntologyEmbedder:
|
|||
# Convert to numpy array
|
||||
embeddings = np.array(embeddings_list)
|
||||
|
||||
# Log embedding shape for debugging
|
||||
logger.debug(f"Embeddings shape: {embeddings.shape}, expected: ({len(batch)}, {self.vector_store.dimension})")
|
||||
|
||||
# Store in vector store
|
||||
ids = [elem['id'] for elem in batch]
|
||||
metadata_list = [elem['metadata'] for elem in batch]
|
||||
|
|
@ -174,7 +177,7 @@ class OntologyEmbedder:
|
|||
logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}")
|
||||
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}", exc_info=True)
|
||||
|
||||
self.embedded_ontologies.add(ontology.id)
|
||||
logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue