mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-11 16:22:37 +02:00
Fix dimension stuff
This commit is contained in:
parent
92647cf088
commit
e00d69fb82
2 changed files with 22 additions and 10 deletions
|
|
@ -93,7 +93,8 @@ class Processor(FlowProcessor):
|
||||||
"""Initialize per-flow OntoRAG components.
|
"""Initialize per-flow OntoRAG components.
|
||||||
|
|
||||||
Each flow gets its own vector store and embedder to support
|
Each flow gets its own vector store and embedder to support
|
||||||
different embedding models across flows.
|
different embedding models across flows. The vector store dimension
|
||||||
|
is auto-detected from the embeddings service.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
flow: Flow object for this processing context
|
flow: Flow object for this processing context
|
||||||
|
|
@ -110,15 +111,22 @@ class Processor(FlowProcessor):
|
||||||
try:
|
try:
|
||||||
logger.info(f"Initializing components for flow {flow_id}")
|
logger.info(f"Initializing components for flow {flow_id}")
|
||||||
|
|
||||||
# Initialize vector store (FAISS only, no fallback)
|
|
||||||
vector_store = InMemoryVectorStore(
|
|
||||||
dimension=1536, # text-embedding-3-small
|
|
||||||
index_type='flat'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Use embeddings client directly (no wrapper needed)
|
# Use embeddings client directly (no wrapper needed)
|
||||||
embeddings_client = flow("embeddings-request")
|
embeddings_client = flow("embeddings-request")
|
||||||
|
|
||||||
|
# Detect embedding dimension by embedding a test string
|
||||||
|
logger.info("Detecting embedding dimension from embeddings service...")
|
||||||
|
test_embedding_response = await embeddings_client.embed("test")
|
||||||
|
test_embedding = test_embedding_response[0] # Extract from [[vector]]
|
||||||
|
dimension = len(test_embedding)
|
||||||
|
logger.info(f"Detected embedding dimension: {dimension}")
|
||||||
|
|
||||||
|
# Initialize vector store with detected dimension
|
||||||
|
vector_store = InMemoryVectorStore(
|
||||||
|
dimension=dimension,
|
||||||
|
index_type='flat'
|
||||||
|
)
|
||||||
|
|
||||||
ontology_embedder = OntologyEmbedder(
|
ontology_embedder = OntologyEmbedder(
|
||||||
embedding_service=embeddings_client,
|
embedding_service=embeddings_client,
|
||||||
vector_store=vector_store
|
vector_store=vector_store
|
||||||
|
|
@ -143,10 +151,11 @@ class Processor(FlowProcessor):
|
||||||
self.flow_components[flow_id] = {
|
self.flow_components[flow_id] = {
|
||||||
'embedder': ontology_embedder,
|
'embedder': ontology_embedder,
|
||||||
'vector_store': vector_store,
|
'vector_store': vector_store,
|
||||||
'selector': ontology_selector
|
'selector': ontology_selector,
|
||||||
|
'dimension': dimension
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Flow {flow_id} components initialized successfully")
|
logger.info(f"Flow {flow_id} components initialized successfully (dimension={dimension})")
|
||||||
return flow_id
|
return flow_id
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,9 @@ class OntologyEmbedder:
|
||||||
# Convert to numpy array
|
# Convert to numpy array
|
||||||
embeddings = np.array(embeddings_list)
|
embeddings = np.array(embeddings_list)
|
||||||
|
|
||||||
|
# Log embedding shape for debugging
|
||||||
|
logger.debug(f"Embeddings shape: {embeddings.shape}, expected: ({len(batch)}, {self.vector_store.dimension})")
|
||||||
|
|
||||||
# Store in vector store
|
# Store in vector store
|
||||||
ids = [elem['id'] for elem in batch]
|
ids = [elem['id'] for elem in batch]
|
||||||
metadata_list = [elem['metadata'] for elem in batch]
|
metadata_list = [elem['metadata'] for elem in batch]
|
||||||
|
|
@ -174,7 +177,7 @@ class OntologyEmbedder:
|
||||||
logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}")
|
logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}")
|
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}", exc_info=True)
|
||||||
|
|
||||||
self.embedded_ontologies.add(ontology.id)
|
self.embedded_ontologies.add(ontology.id)
|
||||||
logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}")
|
logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue