Fix dimension stuff

This commit is contained in:
Cyber MacGeddon 2025-11-12 16:59:18 +00:00
parent 92647cf088
commit e00d69fb82
2 changed files with 22 additions and 10 deletions

View file

@ -93,7 +93,8 @@ class Processor(FlowProcessor):
"""Initialize per-flow OntoRAG components. """Initialize per-flow OntoRAG components.
Each flow gets its own vector store and embedder to support Each flow gets its own vector store and embedder to support
different embedding models across flows. different embedding models across flows. The vector store dimension
is auto-detected from the embeddings service.
Args: Args:
flow: Flow object for this processing context flow: Flow object for this processing context
@ -110,15 +111,22 @@ class Processor(FlowProcessor):
try: try:
logger.info(f"Initializing components for flow {flow_id}") logger.info(f"Initializing components for flow {flow_id}")
# Initialize vector store (FAISS only, no fallback)
vector_store = InMemoryVectorStore(
dimension=1536, # text-embedding-3-small
index_type='flat'
)
# Use embeddings client directly (no wrapper needed) # Use embeddings client directly (no wrapper needed)
embeddings_client = flow("embeddings-request") embeddings_client = flow("embeddings-request")
# Detect embedding dimension by embedding a test string
logger.info("Detecting embedding dimension from embeddings service...")
test_embedding_response = await embeddings_client.embed("test")
test_embedding = test_embedding_response[0] # Extract from [[vector]]
dimension = len(test_embedding)
logger.info(f"Detected embedding dimension: {dimension}")
# Initialize vector store with detected dimension
vector_store = InMemoryVectorStore(
dimension=dimension,
index_type='flat'
)
ontology_embedder = OntologyEmbedder( ontology_embedder = OntologyEmbedder(
embedding_service=embeddings_client, embedding_service=embeddings_client,
vector_store=vector_store vector_store=vector_store
@ -143,10 +151,11 @@ class Processor(FlowProcessor):
self.flow_components[flow_id] = { self.flow_components[flow_id] = {
'embedder': ontology_embedder, 'embedder': ontology_embedder,
'vector_store': vector_store, 'vector_store': vector_store,
'selector': ontology_selector 'selector': ontology_selector,
'dimension': dimension
} }
logger.info(f"Flow {flow_id} components initialized successfully") logger.info(f"Flow {flow_id} components initialized successfully (dimension={dimension})")
return flow_id return flow_id
except Exception as e: except Exception as e:

View file

@ -164,6 +164,9 @@ class OntologyEmbedder:
# Convert to numpy array # Convert to numpy array
embeddings = np.array(embeddings_list) embeddings = np.array(embeddings_list)
# Log embedding shape for debugging
logger.debug(f"Embeddings shape: {embeddings.shape}, expected: ({len(batch)}, {self.vector_store.dimension})")
# Store in vector store # Store in vector store
ids = [elem['id'] for elem in batch] ids = [elem['id'] for elem in batch]
metadata_list = [elem['metadata'] for elem in batch] metadata_list = [elem['metadata'] for elem in batch]
@ -174,7 +177,7 @@ class OntologyEmbedder:
logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}") logger.debug(f"Embedded batch of {len(batch)} elements from ontology {ontology.id}")
except Exception as e: except Exception as e:
logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}") logger.error(f"Failed to embed batch for ontology {ontology.id}: {e}", exc_info=True)
self.embedded_ontologies.add(ontology.id) self.embedded_ontologies.add(ontology.id)
logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}") logger.info(f"Embedded {embedded_count} elements from ontology {ontology.id}")