Update ontology prompt

2026-05-06 13:52:38 +02:00 · 2025-11-12 17:19:50 +00:00 · 2025-11-12 17:19:50 +00:00 · bf067e1d2d
commit bf067e1d2d
parent 2356fce876
1 changed files with 14 additions and 65 deletions
--- a/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py
+++ b/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py
@ -317,73 +317,22 @@ class Processor(FlowProcessor):
                []
            )
-    def build_extraction_prompt(self, chunk: str, ontology_subset: OntologySubset) -> str:
+    def build_extraction_variables(self, chunk: str, ontology_subset: OntologySubset) -> Dict[str, Any]:
-        """Build prompt for ontology-based extraction."""
+        """Build variables for ontology-based extraction prompt template.
        # Format classes
        classes_str = self.format_classes(ontology_subset.classes)
-        # Format properties
+        Args:
-        obj_props_str = self.format_properties(
+            chunk: Text chunk to extract from
-            ontology_subset.object_properties,
+            ontology_subset: Relevant ontology elements
            "object"
        )
        dt_props_str = self.format_properties(
            ontology_subset.datatype_properties,
            "datatype"
        )
-        prompt = f"""Extract knowledge triples from the following text using ONLY the provided ontology elements.
+        Returns:
-
+            Dict with template variables: text, classes, object_properties, datatype_properties
-ONTOLOGY CLASSES:
+        """
-{classes_str}
+        return {
-
+            "text": chunk,
-OBJECT PROPERTIES (connect entities):
+            "classes": ontology_subset.classes,
-{obj_props_str}
+            "object_properties": ontology_subset.object_properties,
-
+            "datatype_properties": ontology_subset.datatype_properties
-DATATYPE PROPERTIES (entity attributes):
+        }
 {dt_props_str}
 RULES:
 1. Only use classes defined above for entity types
 2. Only use properties defined above for relationships and attributes
 3. Respect domain and range constraints
 4. Output format: JSON array of {{"subject": "", "predicate": "", "object": ""}}
 5. For class instances, use rdf:type as predicate
 6. Include rdfs:label for new entities
 TEXT:
 {chunk}
 TRIPLES (JSON array):"""
        return prompt
    def format_classes(self, classes: Dict[str, Any]) -> str:
        """Format classes for prompt."""
        if not classes:
            return "None"
        lines = []
        for class_id, definition in classes.items():
            comment = definition.get('comment', '')
            parent = definition.get('subclass_of', 'Thing')
            lines.append(f"- {class_id} (subclass of {parent}): {comment}")
        return '\n'.join(lines)
    def format_properties(self, properties: Dict[str, Any], prop_type: str) -> str:
        """Format properties for prompt."""
        if not properties:
            return "None"
        lines = []
        for prop_id, definition in properties.items():
            comment = definition.get('comment', '')
            domain = definition.get('domain', 'Any')
            range_val = definition.get('range', 'Any')
            lines.append(f"- {prop_id} ({domain} -> {range_val}): {comment}")
        return '\n'.join(lines)
    def parse_and_validate_triples(self, triples_response: List[Any],
                                  ontology_subset: OntologySubset) -> List[Triple]: