mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-30 10:56:23 +02:00
Ontology matching not working, debug
This commit is contained in:
parent
e00d69fb82
commit
3ff16fdcf6
1 changed files with 25 additions and 4 deletions
|
|
@ -81,6 +81,15 @@ class OntologySelector:
|
||||||
relevant_elements = set()
|
relevant_elements = set()
|
||||||
element_scores = defaultdict(float)
|
element_scores = defaultdict(float)
|
||||||
|
|
||||||
|
# Check if vector store has any elements
|
||||||
|
vector_store = self.embedder.get_vector_store()
|
||||||
|
store_size = vector_store.size()
|
||||||
|
logger.debug(f"Vector store size: {store_size} elements")
|
||||||
|
|
||||||
|
if store_size == 0:
|
||||||
|
logger.warning("Vector store is empty - no ontology elements embedded")
|
||||||
|
return relevant_elements
|
||||||
|
|
||||||
# Process each segment
|
# Process each segment
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
# Get embedding for segment
|
# Get embedding for segment
|
||||||
|
|
@ -89,13 +98,25 @@ class OntologySelector:
|
||||||
logger.warning(f"Failed to embed segment: {segment.text[:50]}...")
|
logger.warning(f"Failed to embed segment: {segment.text[:50]}...")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Search vector store
|
logger.debug(f"Searching for segment: {segment.text[:100]}... (embedding shape: {embedding.shape})")
|
||||||
results = self.embedder.get_vector_store().search(
|
|
||||||
|
# Search vector store with no threshold to see all scores
|
||||||
|
all_results = vector_store.search(
|
||||||
embedding=embedding,
|
embedding=embedding,
|
||||||
top_k=self.top_k,
|
top_k=self.top_k,
|
||||||
threshold=self.similarity_threshold
|
threshold=0.0 # Get all results to see scores
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log top scores for debugging
|
||||||
|
if all_results:
|
||||||
|
top_scores = [r.score for r in all_results[:3]]
|
||||||
|
logger.debug(f"Top 3 scores for segment: {top_scores}, threshold={self.similarity_threshold}")
|
||||||
|
|
||||||
|
# Filter by threshold
|
||||||
|
results = [r for r in all_results if r.score >= self.similarity_threshold]
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(results)} results above threshold (out of {len(all_results)} total)")
|
||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
|
|
@ -109,7 +130,7 @@ class OntologySelector:
|
||||||
# Track scores for ranking
|
# Track scores for ranking
|
||||||
element_scores[element_key] = max(element_scores[element_key], result.score)
|
element_scores[element_key] = max(element_scores[element_key], result.score)
|
||||||
|
|
||||||
logger.debug(f"Found {len(relevant_elements)} relevant elements from {len(segments)} segments")
|
logger.info(f"Found {len(relevant_elements)} relevant elements from {len(segments)} segments")
|
||||||
return relevant_elements
|
return relevant_elements
|
||||||
|
|
||||||
def _build_ontology_subsets(self, relevant_elements: Set[Tuple[str, str, str, Dict]]) -> List[OntologySubset]:
|
def _build_ontology_subsets(self, relevant_elements: Set[Tuple[str, str, str, Dict]]) -> List[OntologySubset]:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue