Fixing prompt invocation

2026-07-17 01:01:03 +02:00 · 2025-11-12 17:17:25 +00:00 · 2025-11-12 17:17:25 +00:00 · 2356fce876
commit 2356fce876
parent 3ff16fdcf6
2 changed files with 18 additions and 17 deletions
--- a/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py
+++ b/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py
@ -83,7 +83,7 @@ class Processor(FlowProcessor):
        # Configuration
        self.top_k = params.get("top_k", 10)
-        self.similarity_threshold = params.get("similarity_threshold", 0.7)
+        self.similarity_threshold = params.get("similarity_threshold", 0.3)
        # Track loaded ontology version
        self.current_ontology_version = None
@ -272,13 +272,15 @@ class Processor(FlowProcessor):
                        f"{len(ontology_subset.object_properties)} object properties, "
                        f"{len(ontology_subset.datatype_properties)} datatype properties")
-            # Build extraction prompt
+            # Build extraction prompt variables
-            prompt = self.build_extraction_prompt(chunk, ontology_subset)
+            prompt_variables = self.build_extraction_variables(chunk, ontology_subset)
            # Call prompt service for extraction
            try:
-                triples_response = await flow("prompt-request").extract_ontology_triples(
+                # Use prompt() method with extract-with-ontologies prompt ID
-                    prompt=prompt
+                triples_response = await flow("prompt-request").prompt(
                    id="extract-with-ontologies",
                    variables=prompt_variables
                )
                logger.debug(f"Extraction response: {triples_response}")
@ -477,8 +479,8 @@ TRIPLES (JSON array):"""
        parser.add_argument(
            '--similarity-threshold',
            type=float,
-            default=0.7,
+            default=0.3,
-            help='Similarity threshold for ontology matching (default: 0.7)'
+            help='Similarity threshold for ontology matching (default: 0.3, range: 0.0-1.0)'
        )
        FlowProcessor.add_args(parser)
--- a/trustgraph-flow/trustgraph/extract/kg/ontology/ontology_selector.py
+++ b/trustgraph-flow/trustgraph/extract/kg/ontology/ontology_selector.py
@ -84,22 +84,20 @@ class OntologySelector:
        # Check if vector store has any elements
        vector_store = self.embedder.get_vector_store()
        store_size = vector_store.size()
-        logger.debug(f"Vector store size: {store_size} elements")
+        logger.info(f"Vector store size: {store_size} elements")
        if store_size == 0:
            logger.warning("Vector store is empty - no ontology elements embedded")
            return relevant_elements
-        # Process each segment
+        # Process each segment (log first few for debugging)
-        for segment in segments:
+        for i, segment in enumerate(segments):
            # Get embedding for segment
            embedding = await self.embedder.embed_text(segment.text)
            if embedding is None:
                logger.warning(f"Failed to embed segment: {segment.text[:50]}...")
                continue
            logger.debug(f"Searching for segment: {segment.text[:100]}... (embedding shape: {embedding.shape})")
            # Search vector store with no threshold to see all scores
            all_results = vector_store.search(
                embedding=embedding,
@ -107,16 +105,17 @@ class OntologySelector:
                threshold=0.0  # Get all results to see scores
            )
-            # Log top scores for debugging
+            # Log top scores for first 3 segments to debug
-            if all_results:
+            if i < 3 and all_results:
                top_scores = [r.score for r in all_results[:3]]
-                logger.debug(f"Top 3 scores for segment: {top_scores}, threshold={self.similarity_threshold}")
+                top_elements = [r.metadata['element'] for r in all_results[:3]]
                logger.info(f"Segment {i}: '{segment.text[:60]}...'")
                logger.info(f"  Top 3 scores: {top_scores} (threshold={self.similarity_threshold})")
                logger.info(f"  Top 3 elements: {top_elements}")
            # Filter by threshold
            results = [r for r in all_results if r.score >= self.similarity_threshold]
            logger.debug(f"Found {len(results)} results above threshold (out of {len(all_results)} total)")
            # Process results
            for result in results:
                metadata = result.metadata