Update ontology prompt

This commit is contained in:
Cyber MacGeddon 2025-11-12 17:19:50 +00:00
parent 2356fce876
commit bf067e1d2d

View file

@ -317,73 +317,22 @@ class Processor(FlowProcessor):
[] []
) )
def build_extraction_prompt(self, chunk: str, ontology_subset: OntologySubset) -> str: def build_extraction_variables(self, chunk: str, ontology_subset: OntologySubset) -> Dict[str, Any]:
"""Build prompt for ontology-based extraction.""" """Build variables for ontology-based extraction prompt template.
# Format classes
classes_str = self.format_classes(ontology_subset.classes)
# Format properties Args:
obj_props_str = self.format_properties( chunk: Text chunk to extract from
ontology_subset.object_properties, ontology_subset: Relevant ontology elements
"object"
)
dt_props_str = self.format_properties(
ontology_subset.datatype_properties,
"datatype"
)
prompt = f"""Extract knowledge triples from the following text using ONLY the provided ontology elements. Returns:
Dict with template variables: text, classes, object_properties, datatype_properties
ONTOLOGY CLASSES: """
{classes_str} return {
"text": chunk,
OBJECT PROPERTIES (connect entities): "classes": ontology_subset.classes,
{obj_props_str} "object_properties": ontology_subset.object_properties,
"datatype_properties": ontology_subset.datatype_properties
DATATYPE PROPERTIES (entity attributes): }
{dt_props_str}
RULES:
1. Only use classes defined above for entity types
2. Only use properties defined above for relationships and attributes
3. Respect domain and range constraints
4. Output format: JSON array of {{"subject": "", "predicate": "", "object": ""}}
5. For class instances, use rdf:type as predicate
6. Include rdfs:label for new entities
TEXT:
{chunk}
TRIPLES (JSON array):"""
return prompt
def format_classes(self, classes: Dict[str, Any]) -> str:
"""Format classes for prompt."""
if not classes:
return "None"
lines = []
for class_id, definition in classes.items():
comment = definition.get('comment', '')
parent = definition.get('subclass_of', 'Thing')
lines.append(f"- {class_id} (subclass of {parent}): {comment}")
return '\n'.join(lines)
def format_properties(self, properties: Dict[str, Any], prop_type: str) -> str:
"""Format properties for prompt."""
if not properties:
return "None"
lines = []
for prop_id, definition in properties.items():
comment = definition.get('comment', '')
domain = definition.get('domain', 'Any')
range_val = definition.get('range', 'Any')
lines.append(f"- {prop_id} ({domain} -> {range_val}): {comment}")
return '\n'.join(lines)
def parse_and_validate_triples(self, triples_response: List[Any], def parse_and_validate_triples(self, triples_response: List[Any],
ontology_subset: OntologySubset) -> List[Triple]: ontology_subset: OntologySubset) -> List[Triple]: