Fixing prompt invocation

This commit is contained in:
Cyber MacGeddon 2025-11-12 17:17:25 +00:00
parent 3ff16fdcf6
commit 2356fce876
2 changed files with 18 additions and 17 deletions

View file

@ -83,7 +83,7 @@ class Processor(FlowProcessor):
# Configuration # Configuration
self.top_k = params.get("top_k", 10) self.top_k = params.get("top_k", 10)
self.similarity_threshold = params.get("similarity_threshold", 0.7) self.similarity_threshold = params.get("similarity_threshold", 0.3)
# Track loaded ontology version # Track loaded ontology version
self.current_ontology_version = None self.current_ontology_version = None
@ -272,13 +272,15 @@ class Processor(FlowProcessor):
f"{len(ontology_subset.object_properties)} object properties, " f"{len(ontology_subset.object_properties)} object properties, "
f"{len(ontology_subset.datatype_properties)} datatype properties") f"{len(ontology_subset.datatype_properties)} datatype properties")
# Build extraction prompt # Build extraction prompt variables
prompt = self.build_extraction_prompt(chunk, ontology_subset) prompt_variables = self.build_extraction_variables(chunk, ontology_subset)
# Call prompt service for extraction # Call prompt service for extraction
try: try:
triples_response = await flow("prompt-request").extract_ontology_triples( # Use prompt() method with extract-with-ontologies prompt ID
prompt=prompt triples_response = await flow("prompt-request").prompt(
id="extract-with-ontologies",
variables=prompt_variables
) )
logger.debug(f"Extraction response: {triples_response}") logger.debug(f"Extraction response: {triples_response}")
@ -477,8 +479,8 @@ TRIPLES (JSON array):"""
parser.add_argument( parser.add_argument(
'--similarity-threshold', '--similarity-threshold',
type=float, type=float,
default=0.7, default=0.3,
help='Similarity threshold for ontology matching (default: 0.7)' help='Similarity threshold for ontology matching (default: 0.3, range: 0.0-1.0)'
) )
FlowProcessor.add_args(parser) FlowProcessor.add_args(parser)

View file

@ -84,22 +84,20 @@ class OntologySelector:
# Check if vector store has any elements # Check if vector store has any elements
vector_store = self.embedder.get_vector_store() vector_store = self.embedder.get_vector_store()
store_size = vector_store.size() store_size = vector_store.size()
logger.debug(f"Vector store size: {store_size} elements") logger.info(f"Vector store size: {store_size} elements")
if store_size == 0: if store_size == 0:
logger.warning("Vector store is empty - no ontology elements embedded") logger.warning("Vector store is empty - no ontology elements embedded")
return relevant_elements return relevant_elements
# Process each segment # Process each segment (log first few for debugging)
for segment in segments: for i, segment in enumerate(segments):
# Get embedding for segment # Get embedding for segment
embedding = await self.embedder.embed_text(segment.text) embedding = await self.embedder.embed_text(segment.text)
if embedding is None: if embedding is None:
logger.warning(f"Failed to embed segment: {segment.text[:50]}...") logger.warning(f"Failed to embed segment: {segment.text[:50]}...")
continue continue
logger.debug(f"Searching for segment: {segment.text[:100]}... (embedding shape: {embedding.shape})")
# Search vector store with no threshold to see all scores # Search vector store with no threshold to see all scores
all_results = vector_store.search( all_results = vector_store.search(
embedding=embedding, embedding=embedding,
@ -107,16 +105,17 @@ class OntologySelector:
threshold=0.0 # Get all results to see scores threshold=0.0 # Get all results to see scores
) )
# Log top scores for debugging # Log top scores for first 3 segments to debug
if all_results: if i < 3 and all_results:
top_scores = [r.score for r in all_results[:3]] top_scores = [r.score for r in all_results[:3]]
logger.debug(f"Top 3 scores for segment: {top_scores}, threshold={self.similarity_threshold}") top_elements = [r.metadata['element'] for r in all_results[:3]]
logger.info(f"Segment {i}: '{segment.text[:60]}...'")
logger.info(f" Top 3 scores: {top_scores} (threshold={self.similarity_threshold})")
logger.info(f" Top 3 elements: {top_elements}")
# Filter by threshold # Filter by threshold
results = [r for r in all_results if r.score >= self.similarity_threshold] results = [r for r in all_results if r.score >= self.similarity_threshold]
logger.debug(f"Found {len(results)} results above threshold (out of {len(all_results)} total)")
# Process results # Process results
for result in results: for result in results:
metadata = result.metadata metadata = result.metadata