Ontology matching not working, debug

This commit is contained in:
Cyber MacGeddon 2025-11-12 17:03:22 +00:00
parent e00d69fb82
commit 3ff16fdcf6

View file

@ -81,6 +81,15 @@ class OntologySelector:
relevant_elements = set() relevant_elements = set()
element_scores = defaultdict(float) element_scores = defaultdict(float)
# Check if vector store has any elements
vector_store = self.embedder.get_vector_store()
store_size = vector_store.size()
logger.debug(f"Vector store size: {store_size} elements")
if store_size == 0:
logger.warning("Vector store is empty - no ontology elements embedded")
return relevant_elements
# Process each segment # Process each segment
for segment in segments: for segment in segments:
# Get embedding for segment # Get embedding for segment
@ -89,13 +98,25 @@ class OntologySelector:
logger.warning(f"Failed to embed segment: {segment.text[:50]}...") logger.warning(f"Failed to embed segment: {segment.text[:50]}...")
continue continue
# Search vector store logger.debug(f"Searching for segment: {segment.text[:100]}... (embedding shape: {embedding.shape})")
results = self.embedder.get_vector_store().search(
# Search vector store with no threshold to see all scores
all_results = vector_store.search(
embedding=embedding, embedding=embedding,
top_k=self.top_k, top_k=self.top_k,
threshold=self.similarity_threshold threshold=0.0 # Get all results to see scores
) )
# Log top scores for debugging
if all_results:
top_scores = [r.score for r in all_results[:3]]
logger.debug(f"Top 3 scores for segment: {top_scores}, threshold={self.similarity_threshold}")
# Filter by threshold
results = [r for r in all_results if r.score >= self.similarity_threshold]
logger.debug(f"Found {len(results)} results above threshold (out of {len(all_results)} total)")
# Process results # Process results
for result in results: for result in results:
metadata = result.metadata metadata = result.metadata
@ -109,7 +130,7 @@ class OntologySelector:
# Track scores for ranking # Track scores for ranking
element_scores[element_key] = max(element_scores[element_key], result.score) element_scores[element_key] = max(element_scores[element_key], result.score)
logger.debug(f"Found {len(relevant_elements)} relevant elements from {len(segments)} segments") logger.info(f"Found {len(relevant_elements)} relevant elements from {len(segments)} segments")
return relevant_elements return relevant_elements
def _build_ontology_subsets(self, relevant_elements: Set[Tuple[str, str, str, Dict]]) -> List[OntologySubset]: def _build_ontology_subsets(self, relevant_elements: Set[Tuple[str, str, str, Dict]]) -> List[OntologySubset]: