mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-06 13:52:38 +02:00
Update ontology prompt
This commit is contained in:
parent
2356fce876
commit
bf067e1d2d
1 changed files with 14 additions and 65 deletions
|
|
@ -317,73 +317,22 @@ class Processor(FlowProcessor):
|
||||||
[]
|
[]
|
||||||
)
|
)
|
||||||
|
|
||||||
def build_extraction_prompt(self, chunk: str, ontology_subset: OntologySubset) -> str:
|
def build_extraction_variables(self, chunk: str, ontology_subset: OntologySubset) -> Dict[str, Any]:
|
||||||
"""Build prompt for ontology-based extraction."""
|
"""Build variables for ontology-based extraction prompt template.
|
||||||
# Format classes
|
|
||||||
classes_str = self.format_classes(ontology_subset.classes)
|
|
||||||
|
|
||||||
# Format properties
|
Args:
|
||||||
obj_props_str = self.format_properties(
|
chunk: Text chunk to extract from
|
||||||
ontology_subset.object_properties,
|
ontology_subset: Relevant ontology elements
|
||||||
"object"
|
|
||||||
)
|
|
||||||
dt_props_str = self.format_properties(
|
|
||||||
ontology_subset.datatype_properties,
|
|
||||||
"datatype"
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = f"""Extract knowledge triples from the following text using ONLY the provided ontology elements.
|
Returns:
|
||||||
|
Dict with template variables: text, classes, object_properties, datatype_properties
|
||||||
ONTOLOGY CLASSES:
|
"""
|
||||||
{classes_str}
|
return {
|
||||||
|
"text": chunk,
|
||||||
OBJECT PROPERTIES (connect entities):
|
"classes": ontology_subset.classes,
|
||||||
{obj_props_str}
|
"object_properties": ontology_subset.object_properties,
|
||||||
|
"datatype_properties": ontology_subset.datatype_properties
|
||||||
DATATYPE PROPERTIES (entity attributes):
|
}
|
||||||
{dt_props_str}
|
|
||||||
|
|
||||||
RULES:
|
|
||||||
1. Only use classes defined above for entity types
|
|
||||||
2. Only use properties defined above for relationships and attributes
|
|
||||||
3. Respect domain and range constraints
|
|
||||||
4. Output format: JSON array of {{"subject": "", "predicate": "", "object": ""}}
|
|
||||||
5. For class instances, use rdf:type as predicate
|
|
||||||
6. Include rdfs:label for new entities
|
|
||||||
|
|
||||||
TEXT:
|
|
||||||
{chunk}
|
|
||||||
|
|
||||||
TRIPLES (JSON array):"""
|
|
||||||
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def format_classes(self, classes: Dict[str, Any]) -> str:
|
|
||||||
"""Format classes for prompt."""
|
|
||||||
if not classes:
|
|
||||||
return "None"
|
|
||||||
|
|
||||||
lines = []
|
|
||||||
for class_id, definition in classes.items():
|
|
||||||
comment = definition.get('comment', '')
|
|
||||||
parent = definition.get('subclass_of', 'Thing')
|
|
||||||
lines.append(f"- {class_id} (subclass of {parent}): {comment}")
|
|
||||||
|
|
||||||
return '\n'.join(lines)
|
|
||||||
|
|
||||||
def format_properties(self, properties: Dict[str, Any], prop_type: str) -> str:
|
|
||||||
"""Format properties for prompt."""
|
|
||||||
if not properties:
|
|
||||||
return "None"
|
|
||||||
|
|
||||||
lines = []
|
|
||||||
for prop_id, definition in properties.items():
|
|
||||||
comment = definition.get('comment', '')
|
|
||||||
domain = definition.get('domain', 'Any')
|
|
||||||
range_val = definition.get('range', 'Any')
|
|
||||||
lines.append(f"- {prop_id} ({domain} -> {range_val}): {comment}")
|
|
||||||
|
|
||||||
return '\n'.join(lines)
|
|
||||||
|
|
||||||
def parse_and_validate_triples(self, triples_response: List[Any],
|
def parse_and_validate_triples(self, triples_response: List[Any],
|
||||||
ontology_subset: OntologySubset) -> List[Triple]:
|
ontology_subset: OntologySubset) -> List[Triple]:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue