Fix dimension stuff

This commit is contained in:
Cyber MacGeddon 2025-11-12 16:59:18 +00:00
parent 92647cf088
commit e00d69fb82
2 changed files with 22 additions and 10 deletions

View file

@ -93,7 +93,8 @@ class Processor(FlowProcessor):
"""Initialize per-flow OntoRAG components.
Each flow gets its own vector store and embedder to support
different embedding models across flows.
different embedding models across flows. The vector store dimension
is auto-detected from the embeddings service.
Args:
flow: Flow object for this processing context
@ -110,15 +111,22 @@ class Processor(FlowProcessor):
try:
logger.info(f"Initializing components for flow {flow_id}")
# Initialize vector store (FAISS only, no fallback)
vector_store = InMemoryVectorStore(
dimension=1536, # text-embedding-3-small
index_type='flat'
)
# Use embeddings client directly (no wrapper needed)
embeddings_client = flow("embeddings-request")
# Detect embedding dimension by embedding a test string
logger.info("Detecting embedding dimension from embeddings service...")
test_embedding_response = await embeddings_client.embed("test")
test_embedding = test_embedding_response[0] # Extract from [[vector]]
dimension = len(test_embedding)
logger.info(f"Detected embedding dimension: {dimension}")
# Initialize vector store with detected dimension
vector_store = InMemoryVectorStore(
dimension=dimension,
index_type='flat'
)
ontology_embedder = OntologyEmbedder(
embedding_service=embeddings_client,
vector_store=vector_store
@ -143,10 +151,11 @@ class Processor(FlowProcessor):
self.flow_components[flow_id] = {
'embedder': ontology_embedder,
'vector_store': vector_store,
'selector': ontology_selector
'selector': ontology_selector,
'dimension': dimension
}
logger.info(f"Flow {flow_id} components initialized successfully")
logger.info(f"Flow {flow_id} components initialized successfully (dimension={dimension})")
return flow_id
except Exception as e:

View file

@ -164,6 +164,9 @@ class OntologyEmbedder:
# Convert to numpy array
embeddings = np.array(embeddings_list)
# Log embedding shape for debugging
logger.debug(f"Embeddings shape: {embeddings.shape}, expected: ({len(batch)}, {self.vector_store.dimension})")
# Store in vector store
ids = [elem['id'] for elem in batch]
metadata_list = [elem['metadata'] for elem in batch]
@ -174,7 +177,7 @@ class OntologyEmbedder:
logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}")
except Exception as e:
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}")
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}", exc_info=True)
self.embedded_ontologies.add(ontology.id)
logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}")