mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -75,9 +75,11 @@ from .explainability import (
|
|||
ExplainabilityClient,
|
||||
ExplainEntity,
|
||||
Question,
|
||||
Grounding,
|
||||
Exploration,
|
||||
Focus,
|
||||
Synthesis,
|
||||
Reflection,
|
||||
Analysis,
|
||||
Conclusion,
|
||||
EdgeSelection,
|
||||
|
|
|
|||
|
|
@ -18,25 +18,28 @@ TG_EDGE_COUNT = TG + "edgeCount"
|
|||
TG_SELECTED_EDGE = TG + "selectedEdge"
|
||||
TG_EDGE = TG + "edge"
|
||||
TG_REASONING = TG + "reasoning"
|
||||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document"
|
||||
TG_CONCEPT = TG + "concept"
|
||||
TG_ENTITY = TG + "entity"
|
||||
TG_CHUNK_COUNT = TG + "chunkCount"
|
||||
TG_SELECTED_CHUNK = TG + "selectedChunk"
|
||||
TG_THOUGHT = TG + "thought"
|
||||
TG_ACTION = TG + "action"
|
||||
TG_ARGUMENTS = TG + "arguments"
|
||||
TG_OBSERVATION = TG + "observation"
|
||||
TG_ANSWER = TG + "answer"
|
||||
TG_THOUGHT_DOCUMENT = TG + "thoughtDocument"
|
||||
TG_OBSERVATION_DOCUMENT = TG + "observationDocument"
|
||||
|
||||
# Entity types
|
||||
TG_QUESTION = TG + "Question"
|
||||
TG_GROUNDING = TG + "Grounding"
|
||||
TG_EXPLORATION = TG + "Exploration"
|
||||
TG_FOCUS = TG + "Focus"
|
||||
TG_SYNTHESIS = TG + "Synthesis"
|
||||
TG_ANALYSIS = TG + "Analysis"
|
||||
TG_CONCLUSION = TG + "Conclusion"
|
||||
TG_ANSWER_TYPE = TG + "Answer"
|
||||
TG_REFLECTION_TYPE = TG + "Reflection"
|
||||
TG_THOUGHT_TYPE = TG + "Thought"
|
||||
TG_OBSERVATION_TYPE = TG + "Observation"
|
||||
TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
|
||||
TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
|
||||
TG_AGENT_QUESTION = TG + "AgentQuestion"
|
||||
|
|
@ -73,12 +76,16 @@ class ExplainEntity:
|
|||
|
||||
if TG_GRAPH_RAG_QUESTION in types or TG_DOC_RAG_QUESTION in types or TG_AGENT_QUESTION in types:
|
||||
return Question.from_triples(uri, triples, types)
|
||||
elif TG_GROUNDING in types:
|
||||
return Grounding.from_triples(uri, triples)
|
||||
elif TG_EXPLORATION in types:
|
||||
return Exploration.from_triples(uri, triples)
|
||||
elif TG_FOCUS in types:
|
||||
return Focus.from_triples(uri, triples)
|
||||
elif TG_SYNTHESIS in types:
|
||||
return Synthesis.from_triples(uri, triples)
|
||||
elif TG_REFLECTION_TYPE in types:
|
||||
return Reflection.from_triples(uri, triples)
|
||||
elif TG_ANALYSIS in types:
|
||||
return Analysis.from_triples(uri, triples)
|
||||
elif TG_CONCLUSION in types:
|
||||
|
|
@ -124,16 +131,38 @@ class Question(ExplainEntity):
|
|||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Grounding(ExplainEntity):
|
||||
"""Grounding entity - concept decomposition of the query."""
|
||||
concepts: List[str] = field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Grounding":
|
||||
concepts = []
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_CONCEPT:
|
||||
concepts.append(o)
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="grounding",
|
||||
concepts=concepts
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Exploration(ExplainEntity):
|
||||
"""Exploration entity - edges/chunks retrieved from the knowledge store."""
|
||||
edge_count: int = 0
|
||||
chunk_count: int = 0
|
||||
entities: List[str] = field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Exploration":
|
||||
edge_count = 0
|
||||
chunk_count = 0
|
||||
entities = []
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_EDGE_COUNT:
|
||||
|
|
@ -146,12 +175,15 @@ class Exploration(ExplainEntity):
|
|||
chunk_count = int(o)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif p == TG_ENTITY:
|
||||
entities.append(o)
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="exploration",
|
||||
edge_count=edge_count,
|
||||
chunk_count=chunk_count
|
||||
chunk_count=chunk_count,
|
||||
entities=entities
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -180,94 +212,104 @@ class Focus(ExplainEntity):
|
|||
@dataclass
|
||||
class Synthesis(ExplainEntity):
|
||||
"""Synthesis entity - the final answer."""
|
||||
content: str = ""
|
||||
document_uri: str = "" # Reference to librarian document
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Synthesis":
|
||||
content = ""
|
||||
document_uri = ""
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_CONTENT:
|
||||
content = o
|
||||
elif p == TG_DOCUMENT:
|
||||
if p == TG_DOCUMENT:
|
||||
document_uri = o
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="synthesis",
|
||||
content=content,
|
||||
document_uri=document_uri
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Reflection(ExplainEntity):
|
||||
"""Reflection entity - intermediate commentary (Thought or Observation)."""
|
||||
document_uri: str = "" # Reference to content in librarian
|
||||
reflection_type: str = "" # "thought" or "observation"
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Reflection":
|
||||
document_uri = ""
|
||||
reflection_type = ""
|
||||
|
||||
types = [o for s, p, o in triples if p == RDF_TYPE]
|
||||
|
||||
if TG_THOUGHT_TYPE in types:
|
||||
reflection_type = "thought"
|
||||
elif TG_OBSERVATION_TYPE in types:
|
||||
reflection_type = "observation"
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_DOCUMENT:
|
||||
document_uri = o
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="reflection",
|
||||
document_uri=document_uri,
|
||||
reflection_type=reflection_type
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Analysis(ExplainEntity):
|
||||
"""Analysis entity - one think/act/observe cycle (Agent only)."""
|
||||
thought: str = ""
|
||||
action: str = ""
|
||||
arguments: str = "" # JSON string
|
||||
observation: str = ""
|
||||
thought_document_uri: str = "" # Reference to thought in librarian
|
||||
observation_document_uri: str = "" # Reference to observation in librarian
|
||||
thought_uri: str = "" # URI of thought sub-entity
|
||||
observation_uri: str = "" # URI of observation sub-entity
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Analysis":
|
||||
thought = ""
|
||||
action = ""
|
||||
arguments = ""
|
||||
observation = ""
|
||||
thought_document_uri = ""
|
||||
observation_document_uri = ""
|
||||
thought_uri = ""
|
||||
observation_uri = ""
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_THOUGHT:
|
||||
thought = o
|
||||
elif p == TG_ACTION:
|
||||
if p == TG_ACTION:
|
||||
action = o
|
||||
elif p == TG_ARGUMENTS:
|
||||
arguments = o
|
||||
elif p == TG_THOUGHT:
|
||||
thought_uri = o
|
||||
elif p == TG_OBSERVATION:
|
||||
observation = o
|
||||
elif p == TG_THOUGHT_DOCUMENT:
|
||||
thought_document_uri = o
|
||||
elif p == TG_OBSERVATION_DOCUMENT:
|
||||
observation_document_uri = o
|
||||
observation_uri = o
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="analysis",
|
||||
thought=thought,
|
||||
action=action,
|
||||
arguments=arguments,
|
||||
observation=observation,
|
||||
thought_document_uri=thought_document_uri,
|
||||
observation_document_uri=observation_document_uri
|
||||
thought_uri=thought_uri,
|
||||
observation_uri=observation_uri
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Conclusion(ExplainEntity):
|
||||
"""Conclusion entity - final answer (Agent only)."""
|
||||
answer: str = ""
|
||||
document_uri: str = "" # Reference to librarian document
|
||||
|
||||
@classmethod
|
||||
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Conclusion":
|
||||
answer = ""
|
||||
document_uri = ""
|
||||
|
||||
for s, p, o in triples:
|
||||
if p == TG_ANSWER:
|
||||
answer = o
|
||||
elif p == TG_DOCUMENT:
|
||||
if p == TG_DOCUMENT:
|
||||
document_uri = o
|
||||
|
||||
return cls(
|
||||
uri=uri,
|
||||
entity_type="conclusion",
|
||||
answer=answer,
|
||||
document_uri=document_uri
|
||||
)
|
||||
|
||||
|
|
@ -543,42 +585,29 @@ class ExplainabilityClient:
|
|||
o_label = self.resolve_label(edge.get("o", ""), user, collection)
|
||||
return (s_label, p_label, o_label)
|
||||
|
||||
def fetch_synthesis_content(
|
||||
def fetch_document_content(
|
||||
self,
|
||||
synthesis: Synthesis,
|
||||
document_uri: str,
|
||||
api: Any,
|
||||
user: Optional[str] = None,
|
||||
max_content: int = 10000
|
||||
) -> str:
|
||||
"""
|
||||
Fetch the content for a Synthesis entity.
|
||||
|
||||
If synthesis has inline content, returns that.
|
||||
If synthesis has a document_uri, fetches from librarian with retry.
|
||||
Fetch content from the librarian by document URI.
|
||||
|
||||
Args:
|
||||
synthesis: The Synthesis entity
|
||||
document_uri: The document URI in the librarian
|
||||
api: TrustGraph Api instance for librarian access
|
||||
user: User identifier for librarian
|
||||
max_content: Maximum content length to return
|
||||
|
||||
Returns:
|
||||
The synthesis content as a string
|
||||
The document content as a string
|
||||
"""
|
||||
# If inline content exists, use it
|
||||
if synthesis.content:
|
||||
if len(synthesis.content) > max_content:
|
||||
return synthesis.content[:max_content] + "... [truncated]"
|
||||
return synthesis.content
|
||||
|
||||
# Otherwise fetch from librarian
|
||||
if not synthesis.document_uri:
|
||||
if not document_uri:
|
||||
return ""
|
||||
|
||||
# Extract document ID from URI (e.g., "urn:document:abc123" -> "abc123")
|
||||
doc_id = synthesis.document_uri
|
||||
if doc_id.startswith("urn:document:"):
|
||||
doc_id = doc_id[len("urn:document:"):]
|
||||
doc_id = document_uri
|
||||
|
||||
# Retry fetching from librarian for eventual consistency
|
||||
for attempt in range(self.max_retries):
|
||||
|
|
@ -603,129 +632,6 @@ class ExplainabilityClient:
|
|||
|
||||
return ""
|
||||
|
||||
def fetch_conclusion_content(
|
||||
self,
|
||||
conclusion: Conclusion,
|
||||
api: Any,
|
||||
user: Optional[str] = None,
|
||||
max_content: int = 10000
|
||||
) -> str:
|
||||
"""
|
||||
Fetch the content for a Conclusion entity (Agent final answer).
|
||||
|
||||
If conclusion has inline answer, returns that.
|
||||
If conclusion has a document_uri, fetches from librarian with retry.
|
||||
|
||||
Args:
|
||||
conclusion: The Conclusion entity
|
||||
api: TrustGraph Api instance for librarian access
|
||||
user: User identifier for librarian
|
||||
max_content: Maximum content length to return
|
||||
|
||||
Returns:
|
||||
The conclusion answer as a string
|
||||
"""
|
||||
# If inline answer exists, use it
|
||||
if conclusion.answer:
|
||||
if len(conclusion.answer) > max_content:
|
||||
return conclusion.answer[:max_content] + "... [truncated]"
|
||||
return conclusion.answer
|
||||
|
||||
# Otherwise fetch from librarian
|
||||
if not conclusion.document_uri:
|
||||
return ""
|
||||
|
||||
# Use document URI directly (it's already a full URN)
|
||||
doc_id = conclusion.document_uri
|
||||
|
||||
# Retry fetching from librarian for eventual consistency
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
library = api.library()
|
||||
content_bytes = library.get_document_content(user=user, id=doc_id)
|
||||
|
||||
# Decode as text
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
if len(content) > max_content:
|
||||
return content[:max_content] + "... [truncated]"
|
||||
return content
|
||||
except UnicodeDecodeError:
|
||||
return f"[Binary: {len(content_bytes)} bytes]"
|
||||
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
time.sleep(self.retry_delay)
|
||||
continue
|
||||
return f"[Error fetching content: {e}]"
|
||||
|
||||
return ""
|
||||
|
||||
def fetch_analysis_content(
|
||||
self,
|
||||
analysis: Analysis,
|
||||
api: Any,
|
||||
user: Optional[str] = None,
|
||||
max_content: int = 10000
|
||||
) -> None:
|
||||
"""
|
||||
Fetch thought and observation content for an Analysis entity.
|
||||
|
||||
If analysis has inline content, uses that.
|
||||
If analysis has document URIs, fetches from librarian with retry.
|
||||
Modifies the analysis object in place.
|
||||
|
||||
Args:
|
||||
analysis: The Analysis entity (modified in place)
|
||||
api: TrustGraph Api instance for librarian access
|
||||
user: User identifier for librarian
|
||||
max_content: Maximum content length to return
|
||||
"""
|
||||
# Fetch thought if needed
|
||||
if not analysis.thought and analysis.thought_document_uri:
|
||||
doc_id = analysis.thought_document_uri
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
library = api.library()
|
||||
content_bytes = library.get_document_content(user=user, id=doc_id)
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
if len(content) > max_content:
|
||||
analysis.thought = content[:max_content] + "... [truncated]"
|
||||
else:
|
||||
analysis.thought = content
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
analysis.thought = f"[Binary: {len(content_bytes)} bytes]"
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
time.sleep(self.retry_delay)
|
||||
continue
|
||||
analysis.thought = f"[Error fetching thought: {e}]"
|
||||
|
||||
# Fetch observation if needed
|
||||
if not analysis.observation and analysis.observation_document_uri:
|
||||
doc_id = analysis.observation_document_uri
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
library = api.library()
|
||||
content_bytes = library.get_document_content(user=user, id=doc_id)
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
if len(content) > max_content:
|
||||
analysis.observation = content[:max_content] + "... [truncated]"
|
||||
else:
|
||||
analysis.observation = content
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
analysis.observation = f"[Binary: {len(content_bytes)} bytes]"
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
time.sleep(self.retry_delay)
|
||||
continue
|
||||
analysis.observation = f"[Error fetching observation: {e}]"
|
||||
|
||||
def fetch_graphrag_trace(
|
||||
self,
|
||||
|
|
@ -739,7 +645,7 @@ class ExplainabilityClient:
|
|||
"""
|
||||
Fetch the complete GraphRAG trace starting from a question URI.
|
||||
|
||||
Follows the provenance chain: Question -> Exploration -> Focus -> Synthesis
|
||||
Follows the provenance chain: Question -> Grounding -> Exploration -> Focus -> Synthesis
|
||||
|
||||
Args:
|
||||
question_uri: The question entity URI
|
||||
|
|
@ -750,13 +656,14 @@ class ExplainabilityClient:
|
|||
max_content: Maximum content length for synthesis
|
||||
|
||||
Returns:
|
||||
Dict with question, exploration, focus, synthesis entities
|
||||
Dict with question, grounding, exploration, focus, synthesis entities
|
||||
"""
|
||||
if graph is None:
|
||||
graph = "urn:graph:retrieval"
|
||||
|
||||
trace = {
|
||||
"question": None,
|
||||
"grounding": None,
|
||||
"exploration": None,
|
||||
"focus": None,
|
||||
"synthesis": None,
|
||||
|
|
@ -768,8 +675,8 @@ class ExplainabilityClient:
|
|||
return trace
|
||||
trace["question"] = question
|
||||
|
||||
# Find exploration: ?exploration prov:wasGeneratedBy question_uri
|
||||
exploration_triples = self.flow.triples_query(
|
||||
# Find grounding: ?grounding prov:wasGeneratedBy question_uri
|
||||
grounding_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_GENERATED_BY,
|
||||
o=question_uri,
|
||||
g=graph,
|
||||
|
|
@ -778,6 +685,30 @@ class ExplainabilityClient:
|
|||
limit=10
|
||||
)
|
||||
|
||||
if grounding_triples:
|
||||
grounding_uris = [
|
||||
extract_term_value(t.get("s", {}))
|
||||
for t in grounding_triples
|
||||
]
|
||||
for gnd_uri in grounding_uris:
|
||||
grounding = self.fetch_entity(gnd_uri, graph, user, collection)
|
||||
if isinstance(grounding, Grounding):
|
||||
trace["grounding"] = grounding
|
||||
break
|
||||
|
||||
if not trace["grounding"]:
|
||||
return trace
|
||||
|
||||
# Find exploration: ?exploration prov:wasDerivedFrom grounding_uri
|
||||
exploration_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=trace["grounding"].uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
||||
if exploration_triples:
|
||||
exploration_uris = [
|
||||
extract_term_value(t.get("s", {}))
|
||||
|
|
@ -834,11 +765,6 @@ class ExplainabilityClient:
|
|||
for synth_uri in synthesis_uris:
|
||||
synthesis = self.fetch_entity(synth_uri, graph, user, collection)
|
||||
if isinstance(synthesis, Synthesis):
|
||||
# Fetch content if needed
|
||||
if api and not synthesis.content and synthesis.document_uri:
|
||||
synthesis.content = self.fetch_synthesis_content(
|
||||
synthesis, api, user, max_content
|
||||
)
|
||||
trace["synthesis"] = synthesis
|
||||
break
|
||||
|
||||
|
|
@ -928,11 +854,6 @@ class ExplainabilityClient:
|
|||
for synth_uri in synthesis_uris:
|
||||
synthesis = self.fetch_entity(synth_uri, graph, user, collection)
|
||||
if isinstance(synthesis, Synthesis):
|
||||
# Fetch content if needed
|
||||
if api and not synthesis.content and synthesis.document_uri:
|
||||
synthesis.content = self.fetch_synthesis_content(
|
||||
synthesis, api, user, max_content
|
||||
)
|
||||
trace["synthesis"] = synthesis
|
||||
break
|
||||
|
||||
|
|
@ -978,20 +899,43 @@ class ExplainabilityClient:
|
|||
return trace
|
||||
trace["question"] = question
|
||||
|
||||
# Follow the chain of wasDerivedFrom
|
||||
# Follow the chain: wasGeneratedBy for first hop, wasDerivedFrom after
|
||||
current_uri = session_uri
|
||||
is_first = True
|
||||
max_iterations = 50 # Safety limit
|
||||
|
||||
for _ in range(max_iterations):
|
||||
# Find entity derived from current
|
||||
derived_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=current_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
# First hop uses wasGeneratedBy (entity←activity),
|
||||
# subsequent hops use wasDerivedFrom (entity←entity)
|
||||
if is_first:
|
||||
derived_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_GENERATED_BY,
|
||||
o=current_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
# Fall back to wasDerivedFrom for backwards compatibility
|
||||
if not derived_triples:
|
||||
derived_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=current_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
is_first = False
|
||||
else:
|
||||
derived_triples = self.flow.triples_query(
|
||||
p=PROV_WAS_DERIVED_FROM,
|
||||
o=current_uri,
|
||||
g=graph,
|
||||
user=user,
|
||||
collection=collection,
|
||||
limit=10
|
||||
)
|
||||
|
||||
if not derived_triples:
|
||||
break
|
||||
|
|
@ -1003,19 +947,9 @@ class ExplainabilityClient:
|
|||
entity = self.fetch_entity(derived_uri, graph, user, collection)
|
||||
|
||||
if isinstance(entity, Analysis):
|
||||
# Fetch thought/observation content from librarian if needed
|
||||
if api:
|
||||
self.fetch_analysis_content(
|
||||
entity, api, user=user, max_content=max_content
|
||||
)
|
||||
trace["iterations"].append(entity)
|
||||
current_uri = derived_uri
|
||||
elif isinstance(entity, Conclusion):
|
||||
# Fetch answer content from librarian if needed
|
||||
if api and not entity.answer and entity.document_uri:
|
||||
entity.answer = self.fetch_conclusion_content(
|
||||
entity, api, user=user, max_content=max_content
|
||||
)
|
||||
trace["conclusion"] = entity
|
||||
break
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL, TRIPLE
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
|
||||
|
|
@ -22,9 +22,11 @@ def to_value(x):
|
|||
|
||||
|
||||
def from_value(x):
|
||||
"""Convert Uri, Literal, or string to schema Term."""
|
||||
"""Convert Uri, Literal, string, or Term to schema Term."""
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, Term):
|
||||
return x
|
||||
if isinstance(x, Uri):
|
||||
return Term(type=IRI, iri=str(x))
|
||||
elif isinstance(x, Literal):
|
||||
|
|
@ -41,7 +43,7 @@ def from_value(x):
|
|||
class TriplesClient(RequestResponse):
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
timeout=30):
|
||||
timeout=30, g=None):
|
||||
|
||||
resp = await self.request(
|
||||
TriplesQueryRequest(
|
||||
|
|
@ -51,6 +53,7 @@ class TriplesClient(RequestResponse):
|
|||
limit = limit,
|
||||
user = user,
|
||||
collection = collection,
|
||||
g = g,
|
||||
),
|
||||
timeout=timeout
|
||||
)
|
||||
|
|
@ -68,7 +71,7 @@ class TriplesClient(RequestResponse):
|
|||
async def query_stream(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
batch_size=20, timeout=30,
|
||||
batch_callback=None):
|
||||
batch_callback=None, g=None):
|
||||
"""
|
||||
Streaming triple query - calls callback for each batch as it arrives.
|
||||
|
||||
|
|
@ -80,6 +83,8 @@ class TriplesClient(RequestResponse):
|
|||
batch_size: Triples per batch
|
||||
timeout: Request timeout in seconds
|
||||
batch_callback: Async callback(batch, is_final) called for each batch
|
||||
g: Graph filter. ""=default graph only, None=all graphs,
|
||||
or a specific graph IRI.
|
||||
|
||||
Returns:
|
||||
List[Triple]: All triples (flattened) if no callback provided
|
||||
|
|
@ -112,6 +117,7 @@ class TriplesClient(RequestResponse):
|
|||
collection=collection,
|
||||
streaming=True,
|
||||
batch_size=batch_size,
|
||||
g=g,
|
||||
),
|
||||
timeout=timeout,
|
||||
recipient=recipient,
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
triple_limit=int(data.get("triple-limit", 30)),
|
||||
max_subgraph_size=int(data.get("max-subgraph-size", 1000)),
|
||||
max_path_length=int(data.get("max-path-length", 2)),
|
||||
edge_limit=int(data.get("edge-limit", 25)),
|
||||
streaming=data.get("streaming", False)
|
||||
)
|
||||
|
||||
|
|
@ -96,6 +97,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
"triple-limit": obj.triple_limit,
|
||||
"max-subgraph-size": obj.max_subgraph_size,
|
||||
"max-path-length": obj.max_path_length,
|
||||
"edge-limit": obj.edge_limit,
|
||||
"streaming": getattr(obj, "streaming", False)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,15 +42,19 @@ from . uris import (
|
|||
agent_uri,
|
||||
# Query-time provenance URIs (GraphRAG)
|
||||
question_uri,
|
||||
grounding_uri,
|
||||
exploration_uri,
|
||||
focus_uri,
|
||||
synthesis_uri,
|
||||
# Agent provenance URIs
|
||||
agent_session_uri,
|
||||
agent_iteration_uri,
|
||||
agent_thought_uri,
|
||||
agent_observation_uri,
|
||||
agent_final_uri,
|
||||
# Document RAG provenance URIs
|
||||
docrag_question_uri,
|
||||
docrag_grounding_uri,
|
||||
docrag_exploration_uri,
|
||||
docrag_synthesis_uri,
|
||||
)
|
||||
|
|
@ -74,18 +78,19 @@ from . namespaces import (
|
|||
# Extraction provenance entity types
|
||||
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_CHUNK_TYPE, TG_SUBGRAPH_TYPE,
|
||||
# Query-time provenance predicates (GraphRAG)
|
||||
TG_QUERY, TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_REASONING, TG_CONTENT,
|
||||
TG_QUERY, TG_CONCEPT, TG_ENTITY,
|
||||
TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_REASONING,
|
||||
# Query-time provenance predicates (DocumentRAG)
|
||||
TG_CHUNK_COUNT, TG_SELECTED_CHUNK,
|
||||
# Explainability entity types
|
||||
TG_QUESTION, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
||||
TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
||||
TG_ANALYSIS, TG_CONCLUSION,
|
||||
# Unifying types
|
||||
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
|
||||
# Question subtypes (to distinguish retrieval mechanism)
|
||||
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION,
|
||||
# Agent provenance predicates
|
||||
TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION, TG_ANSWER,
|
||||
# Agent document references
|
||||
TG_THOUGHT_DOCUMENT, TG_OBSERVATION_DOCUMENT,
|
||||
TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
|
||||
# Document reference predicate
|
||||
TG_DOCUMENT,
|
||||
# Named graphs
|
||||
|
|
@ -99,6 +104,7 @@ from . triples import (
|
|||
subgraph_provenance_triples,
|
||||
# Query-time provenance triple builders (GraphRAG)
|
||||
question_triples,
|
||||
grounding_triples,
|
||||
exploration_triples,
|
||||
focus_triples,
|
||||
synthesis_triples,
|
||||
|
|
@ -139,15 +145,19 @@ __all__ = [
|
|||
"agent_uri",
|
||||
# Query-time provenance URIs
|
||||
"question_uri",
|
||||
"grounding_uri",
|
||||
"exploration_uri",
|
||||
"focus_uri",
|
||||
"synthesis_uri",
|
||||
# Agent provenance URIs
|
||||
"agent_session_uri",
|
||||
"agent_iteration_uri",
|
||||
"agent_thought_uri",
|
||||
"agent_observation_uri",
|
||||
"agent_final_uri",
|
||||
# Document RAG provenance URIs
|
||||
"docrag_question_uri",
|
||||
"docrag_grounding_uri",
|
||||
"docrag_exploration_uri",
|
||||
"docrag_synthesis_uri",
|
||||
# Namespaces
|
||||
|
|
@ -164,18 +174,19 @@ __all__ = [
|
|||
# Extraction provenance entity types
|
||||
"TG_DOCUMENT_TYPE", "TG_PAGE_TYPE", "TG_CHUNK_TYPE", "TG_SUBGRAPH_TYPE",
|
||||
# Query-time provenance predicates (GraphRAG)
|
||||
"TG_QUERY", "TG_EDGE_COUNT", "TG_SELECTED_EDGE", "TG_REASONING", "TG_CONTENT",
|
||||
"TG_QUERY", "TG_CONCEPT", "TG_ENTITY",
|
||||
"TG_EDGE_COUNT", "TG_SELECTED_EDGE", "TG_REASONING",
|
||||
# Query-time provenance predicates (DocumentRAG)
|
||||
"TG_CHUNK_COUNT", "TG_SELECTED_CHUNK",
|
||||
# Explainability entity types
|
||||
"TG_QUESTION", "TG_EXPLORATION", "TG_FOCUS", "TG_SYNTHESIS",
|
||||
"TG_QUESTION", "TG_GROUNDING", "TG_EXPLORATION", "TG_FOCUS", "TG_SYNTHESIS",
|
||||
"TG_ANALYSIS", "TG_CONCLUSION",
|
||||
# Unifying types
|
||||
"TG_ANSWER_TYPE", "TG_REFLECTION_TYPE", "TG_THOUGHT_TYPE", "TG_OBSERVATION_TYPE",
|
||||
# Question subtypes
|
||||
"TG_GRAPH_RAG_QUESTION", "TG_DOC_RAG_QUESTION", "TG_AGENT_QUESTION",
|
||||
# Agent provenance predicates
|
||||
"TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION", "TG_ANSWER",
|
||||
# Agent document references
|
||||
"TG_THOUGHT_DOCUMENT", "TG_OBSERVATION_DOCUMENT",
|
||||
"TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION",
|
||||
# Document reference predicate
|
||||
"TG_DOCUMENT",
|
||||
# Named graphs
|
||||
|
|
@ -186,6 +197,7 @@ __all__ = [
|
|||
"subgraph_provenance_triples",
|
||||
# Query-time provenance triple builders (GraphRAG)
|
||||
"question_triples",
|
||||
"grounding_triples",
|
||||
"exploration_triples",
|
||||
"focus_triples",
|
||||
"synthesis_triples",
|
||||
|
|
|
|||
|
|
@ -15,10 +15,11 @@ from .. schema import Triple, Term, IRI, LITERAL
|
|||
|
||||
from . namespaces import (
|
||||
RDF_TYPE, RDFS_LABEL,
|
||||
PROV_ACTIVITY, PROV_ENTITY, PROV_WAS_DERIVED_FROM, PROV_STARTED_AT_TIME,
|
||||
TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION, TG_ANSWER,
|
||||
PROV_ACTIVITY, PROV_ENTITY, PROV_WAS_DERIVED_FROM,
|
||||
PROV_WAS_GENERATED_BY, PROV_STARTED_AT_TIME,
|
||||
TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
|
||||
TG_QUESTION, TG_ANALYSIS, TG_CONCLUSION, TG_DOCUMENT,
|
||||
TG_THOUGHT_DOCUMENT, TG_OBSERVATION_DOCUMENT,
|
||||
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
|
||||
TG_AGENT_QUESTION,
|
||||
)
|
||||
|
||||
|
|
@ -73,12 +74,13 @@ def agent_session_triples(
|
|||
|
||||
def agent_iteration_triples(
|
||||
iteration_uri: str,
|
||||
parent_uri: str,
|
||||
thought: str = "",
|
||||
question_uri: Optional[str] = None,
|
||||
previous_uri: Optional[str] = None,
|
||||
action: str = "",
|
||||
arguments: Dict[str, Any] = None,
|
||||
observation: str = "",
|
||||
thought_uri: Optional[str] = None,
|
||||
thought_document_id: Optional[str] = None,
|
||||
observation_uri: Optional[str] = None,
|
||||
observation_document_id: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
|
|
@ -86,19 +88,22 @@ def agent_iteration_triples(
|
|||
|
||||
Creates:
|
||||
- Entity declaration with tg:Analysis type
|
||||
- wasDerivedFrom link to parent (previous iteration or session)
|
||||
- Thought, action, arguments, and observation data
|
||||
- Document references for thought/observation when stored in librarian
|
||||
- wasGeneratedBy link to question (if first iteration)
|
||||
- wasDerivedFrom link to previous iteration (if not first)
|
||||
- Action and arguments metadata
|
||||
- Thought sub-entity (tg:Reflection, tg:Thought) with librarian document
|
||||
- Observation sub-entity (tg:Reflection, tg:Observation) with librarian document
|
||||
|
||||
Args:
|
||||
iteration_uri: URI of this iteration (from agent_iteration_uri)
|
||||
parent_uri: URI of the parent (previous iteration or session)
|
||||
thought: The agent's reasoning/thought (used if thought_document_id not provided)
|
||||
question_uri: URI of the question activity (for first iteration)
|
||||
previous_uri: URI of the previous iteration (for subsequent iterations)
|
||||
action: The tool/action name
|
||||
arguments: Arguments passed to the tool (will be JSON-encoded)
|
||||
observation: The result/observation from the tool (used if observation_document_id not provided)
|
||||
thought_document_id: Optional document URI for thought in librarian (preferred)
|
||||
observation_document_id: Optional document URI for observation in librarian (preferred)
|
||||
thought_uri: URI for the thought sub-entity
|
||||
thought_document_id: Document URI for thought in librarian
|
||||
observation_uri: URI for the observation sub-entity
|
||||
observation_document_id: Document URI for observation in librarian
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -110,45 +115,70 @@ def agent_iteration_triples(
|
|||
_triple(iteration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(iteration_uri, RDF_TYPE, _iri(TG_ANALYSIS)),
|
||||
_triple(iteration_uri, RDFS_LABEL, _literal(f"Analysis: {action}")),
|
||||
_triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri)),
|
||||
_triple(iteration_uri, TG_ACTION, _literal(action)),
|
||||
_triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
|
||||
]
|
||||
|
||||
# Thought: use document reference or inline
|
||||
if thought_document_id:
|
||||
triples.append(_triple(iteration_uri, TG_THOUGHT_DOCUMENT, _iri(thought_document_id)))
|
||||
elif thought:
|
||||
triples.append(_triple(iteration_uri, TG_THOUGHT, _literal(thought)))
|
||||
if question_uri:
|
||||
triples.append(
|
||||
_triple(iteration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
|
||||
)
|
||||
elif previous_uri:
|
||||
triples.append(
|
||||
_triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(previous_uri))
|
||||
)
|
||||
|
||||
# Observation: use document reference or inline
|
||||
if observation_document_id:
|
||||
triples.append(_triple(iteration_uri, TG_OBSERVATION_DOCUMENT, _iri(observation_document_id)))
|
||||
elif observation:
|
||||
triples.append(_triple(iteration_uri, TG_OBSERVATION, _literal(observation)))
|
||||
# Thought sub-entity
|
||||
if thought_uri:
|
||||
triples.extend([
|
||||
_triple(iteration_uri, TG_THOUGHT, _iri(thought_uri)),
|
||||
_triple(thought_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
|
||||
_triple(thought_uri, RDF_TYPE, _iri(TG_THOUGHT_TYPE)),
|
||||
_triple(thought_uri, RDFS_LABEL, _literal("Thought")),
|
||||
_triple(thought_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
|
||||
])
|
||||
if thought_document_id:
|
||||
triples.append(
|
||||
_triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
|
||||
)
|
||||
|
||||
# Observation sub-entity
|
||||
if observation_uri:
|
||||
triples.extend([
|
||||
_triple(iteration_uri, TG_OBSERVATION, _iri(observation_uri)),
|
||||
_triple(observation_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
|
||||
_triple(observation_uri, RDF_TYPE, _iri(TG_OBSERVATION_TYPE)),
|
||||
_triple(observation_uri, RDFS_LABEL, _literal("Observation")),
|
||||
_triple(observation_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
|
||||
])
|
||||
if observation_document_id:
|
||||
triples.append(
|
||||
_triple(observation_uri, TG_DOCUMENT, _iri(observation_document_id))
|
||||
)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
def agent_final_triples(
|
||||
final_uri: str,
|
||||
parent_uri: str,
|
||||
answer: str = "",
|
||||
question_uri: Optional[str] = None,
|
||||
previous_uri: Optional[str] = None,
|
||||
document_id: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for an agent final answer (Conclusion).
|
||||
|
||||
Creates:
|
||||
- Entity declaration with tg:Conclusion type
|
||||
- wasDerivedFrom link to parent (last iteration or session)
|
||||
- Either document reference (if document_id provided) or inline answer
|
||||
- Entity declaration with tg:Conclusion and tg:Answer types
|
||||
- wasGeneratedBy link to question (if no iterations)
|
||||
- wasDerivedFrom link to last iteration (if iterations exist)
|
||||
- Document reference to librarian
|
||||
|
||||
Args:
|
||||
final_uri: URI of the final answer (from agent_final_uri)
|
||||
parent_uri: URI of the parent (last iteration or session if no iterations)
|
||||
answer: The final answer text (used if document_id not provided)
|
||||
document_id: Optional document URI in librarian (preferred)
|
||||
question_uri: URI of the question activity (if no iterations)
|
||||
previous_uri: URI of the last iteration (if iterations exist)
|
||||
document_id: Librarian document ID for the answer content
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -156,15 +186,20 @@ def agent_final_triples(
|
|||
triples = [
|
||||
_triple(final_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(final_uri, RDF_TYPE, _iri(TG_CONCLUSION)),
|
||||
_triple(final_uri, RDF_TYPE, _iri(TG_ANSWER_TYPE)),
|
||||
_triple(final_uri, RDFS_LABEL, _literal("Conclusion")),
|
||||
_triple(final_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri)),
|
||||
]
|
||||
|
||||
if question_uri:
|
||||
triples.append(
|
||||
_triple(final_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
|
||||
)
|
||||
elif previous_uri:
|
||||
triples.append(
|
||||
_triple(final_uri, PROV_WAS_DERIVED_FROM, _iri(previous_uri))
|
||||
)
|
||||
|
||||
if document_id:
|
||||
# Store reference to document in librarian (as IRI)
|
||||
triples.append(_triple(final_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
elif answer:
|
||||
# Fallback: store inline answer
|
||||
triples.append(_triple(final_uri, TG_ANSWER, _literal(answer)))
|
||||
|
||||
return triples
|
||||
|
|
|
|||
|
|
@ -60,11 +60,12 @@ TG_SOURCE_CHAR_LENGTH = TG + "sourceCharLength"
|
|||
|
||||
# Query-time provenance predicates (GraphRAG)
|
||||
TG_QUERY = TG + "query"
|
||||
TG_CONCEPT = TG + "concept"
|
||||
TG_ENTITY = TG + "entity"
|
||||
TG_EDGE_COUNT = TG + "edgeCount"
|
||||
TG_SELECTED_EDGE = TG + "selectedEdge"
|
||||
TG_EDGE = TG + "edge"
|
||||
TG_REASONING = TG + "reasoning"
|
||||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document" # Reference to document in librarian
|
||||
|
||||
# Query-time provenance predicates (DocumentRAG)
|
||||
|
|
@ -79,27 +80,29 @@ TG_SUBGRAPH_TYPE = TG + "Subgraph"
|
|||
|
||||
# Explainability entity types (shared)
|
||||
TG_QUESTION = TG + "Question"
|
||||
TG_GROUNDING = TG + "Grounding"
|
||||
TG_EXPLORATION = TG + "Exploration"
|
||||
TG_FOCUS = TG + "Focus"
|
||||
TG_SYNTHESIS = TG + "Synthesis"
|
||||
TG_ANALYSIS = TG + "Analysis"
|
||||
TG_CONCLUSION = TG + "Conclusion"
|
||||
|
||||
# Unifying types for answer and intermediate commentary
|
||||
TG_ANSWER_TYPE = TG + "Answer" # Final answer (Synthesis, Conclusion)
|
||||
TG_REFLECTION_TYPE = TG + "Reflection" # Intermediate commentary (Thought, Observation)
|
||||
TG_THOUGHT_TYPE = TG + "Thought" # Agent reasoning
|
||||
TG_OBSERVATION_TYPE = TG + "Observation" # Agent tool result
|
||||
|
||||
# Question subtypes (to distinguish retrieval mechanism)
|
||||
TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
|
||||
TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
|
||||
TG_AGENT_QUESTION = TG + "AgentQuestion"
|
||||
|
||||
# Agent provenance predicates
|
||||
TG_THOUGHT = TG + "thought"
|
||||
TG_THOUGHT = TG + "thought" # Links iteration to thought sub-entity
|
||||
TG_ACTION = TG + "action"
|
||||
TG_ARGUMENTS = TG + "arguments"
|
||||
TG_OBSERVATION = TG + "observation"
|
||||
TG_ANSWER = TG + "answer"
|
||||
|
||||
# Agent document references (for librarian storage)
|
||||
TG_THOUGHT_DOCUMENT = TG + "thoughtDocument"
|
||||
TG_OBSERVATION_DOCUMENT = TG + "observationDocument"
|
||||
TG_OBSERVATION = TG + "observation" # Links iteration to observation sub-entity
|
||||
|
||||
# Named graph URIs for RDF datasets
|
||||
# These separate different types of data while keeping them in the same collection
|
||||
|
|
|
|||
|
|
@ -20,12 +20,15 @@ from . namespaces import (
|
|||
# Extraction provenance entity types
|
||||
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_CHUNK_TYPE, TG_SUBGRAPH_TYPE,
|
||||
# Query-time provenance predicates (GraphRAG)
|
||||
TG_QUERY, TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_EDGE, TG_REASONING, TG_CONTENT,
|
||||
TG_QUERY, TG_CONCEPT, TG_ENTITY,
|
||||
TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_EDGE, TG_REASONING,
|
||||
TG_DOCUMENT,
|
||||
# Query-time provenance predicates (DocumentRAG)
|
||||
TG_CHUNK_COUNT, TG_SELECTED_CHUNK,
|
||||
# Explainability entity types
|
||||
TG_QUESTION, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
||||
TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
||||
# Unifying types
|
||||
TG_ANSWER_TYPE,
|
||||
# Question subtypes
|
||||
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION,
|
||||
)
|
||||
|
|
@ -347,35 +350,78 @@ def question_triples(
|
|||
]
|
||||
|
||||
|
||||
def grounding_triples(
|
||||
grounding_uri: str,
|
||||
question_uri: str,
|
||||
concepts: List[str],
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a grounding entity (concept decomposition of query).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for grounding
|
||||
- wasGeneratedBy link to question
|
||||
- Concept literals for each extracted concept
|
||||
|
||||
Args:
|
||||
grounding_uri: URI of the grounding entity (from grounding_uri)
|
||||
question_uri: URI of the parent question
|
||||
concepts: List of concept strings extracted from the query
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
"""
|
||||
triples = [
|
||||
_triple(grounding_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(grounding_uri, RDF_TYPE, _iri(TG_GROUNDING)),
|
||||
_triple(grounding_uri, RDFS_LABEL, _literal("Grounding")),
|
||||
_triple(grounding_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
|
||||
]
|
||||
|
||||
for concept in concepts:
|
||||
triples.append(_triple(grounding_uri, TG_CONCEPT, _literal(concept)))
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
def exploration_triples(
|
||||
exploration_uri: str,
|
||||
question_uri: str,
|
||||
grounding_uri: str,
|
||||
edge_count: int,
|
||||
entities: Optional[List[str]] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for an exploration entity (all edges retrieved from subgraph).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for exploration
|
||||
- wasGeneratedBy link to question
|
||||
- wasDerivedFrom link to grounding
|
||||
- Edge count metadata
|
||||
- Entity IRIs for each seed entity
|
||||
|
||||
Args:
|
||||
exploration_uri: URI of the exploration entity (from exploration_uri)
|
||||
question_uri: URI of the parent question
|
||||
grounding_uri: URI of the parent grounding entity
|
||||
edge_count: Number of edges retrieved
|
||||
entities: Optional list of seed entity URIs
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
"""
|
||||
return [
|
||||
triples = [
|
||||
_triple(exploration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(exploration_uri, RDF_TYPE, _iri(TG_EXPLORATION)),
|
||||
_triple(exploration_uri, RDFS_LABEL, _literal("Exploration")),
|
||||
_triple(exploration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
|
||||
_triple(exploration_uri, PROV_WAS_DERIVED_FROM, _iri(grounding_uri)),
|
||||
_triple(exploration_uri, TG_EDGE_COUNT, _literal(edge_count)),
|
||||
]
|
||||
|
||||
if entities:
|
||||
for entity in entities:
|
||||
triples.append(_triple(exploration_uri, TG_ENTITY, _iri(entity)))
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
def _quoted_triple(s: str, p: str, o: str) -> Term:
|
||||
"""Create a quoted triple term (RDF-star) from string values."""
|
||||
|
|
@ -454,22 +500,20 @@ def focus_triples(
|
|||
def synthesis_triples(
|
||||
synthesis_uri: str,
|
||||
focus_uri: str,
|
||||
answer_text: str = "",
|
||||
document_id: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a synthesis entity (final answer text).
|
||||
Build triples for a synthesis entity (final answer).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for synthesis
|
||||
- Entity declaration for synthesis with tg:Answer type
|
||||
- wasDerivedFrom link to focus
|
||||
- Either document reference (if document_id provided) or inline content
|
||||
- Document reference to librarian
|
||||
|
||||
Args:
|
||||
synthesis_uri: URI of the synthesis entity (from synthesis_uri)
|
||||
focus_uri: URI of the parent focus entity
|
||||
answer_text: The synthesized answer text (used if no document_id)
|
||||
document_id: Optional librarian document ID (preferred over inline content)
|
||||
document_id: Librarian document ID for the answer content
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -477,16 +521,13 @@ def synthesis_triples(
|
|||
triples = [
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(TG_SYNTHESIS)),
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(TG_ANSWER_TYPE)),
|
||||
_triple(synthesis_uri, RDFS_LABEL, _literal("Synthesis")),
|
||||
_triple(synthesis_uri, PROV_WAS_DERIVED_FROM, _iri(focus_uri)),
|
||||
]
|
||||
|
||||
if document_id:
|
||||
# Store reference to document in librarian (as IRI)
|
||||
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
elif answer_text:
|
||||
# Fallback: store inline content
|
||||
triples.append(_triple(synthesis_uri, TG_CONTENT, _literal(answer_text)))
|
||||
|
||||
return triples
|
||||
|
||||
|
|
@ -533,7 +574,7 @@ def docrag_question_triples(
|
|||
|
||||
def docrag_exploration_triples(
|
||||
exploration_uri: str,
|
||||
question_uri: str,
|
||||
grounding_uri: str,
|
||||
chunk_count: int,
|
||||
chunk_ids: Optional[List[str]] = None,
|
||||
) -> List[Triple]:
|
||||
|
|
@ -542,12 +583,12 @@ def docrag_exploration_triples(
|
|||
|
||||
Creates:
|
||||
- Entity declaration with tg:Exploration type
|
||||
- wasGeneratedBy link to question
|
||||
- wasDerivedFrom link to grounding
|
||||
- Chunk count and optional chunk references
|
||||
|
||||
Args:
|
||||
exploration_uri: URI of the exploration entity
|
||||
question_uri: URI of the parent question
|
||||
grounding_uri: URI of the parent grounding entity
|
||||
chunk_count: Number of chunks retrieved
|
||||
chunk_ids: Optional list of chunk URIs/IDs
|
||||
|
||||
|
|
@ -558,7 +599,7 @@ def docrag_exploration_triples(
|
|||
_triple(exploration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(exploration_uri, RDF_TYPE, _iri(TG_EXPLORATION)),
|
||||
_triple(exploration_uri, RDFS_LABEL, _literal("Exploration")),
|
||||
_triple(exploration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
|
||||
_triple(exploration_uri, PROV_WAS_DERIVED_FROM, _iri(grounding_uri)),
|
||||
_triple(exploration_uri, TG_CHUNK_COUNT, _literal(chunk_count)),
|
||||
]
|
||||
|
||||
|
|
@ -573,22 +614,20 @@ def docrag_exploration_triples(
|
|||
def docrag_synthesis_triples(
|
||||
synthesis_uri: str,
|
||||
exploration_uri: str,
|
||||
answer_text: str = "",
|
||||
document_id: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a document RAG synthesis entity (final answer).
|
||||
|
||||
Creates:
|
||||
- Entity declaration with tg:Synthesis type
|
||||
- Entity declaration with tg:Synthesis and tg:Answer types
|
||||
- wasDerivedFrom link to exploration (skips focus step)
|
||||
- Either document reference or inline content
|
||||
- Document reference to librarian
|
||||
|
||||
Args:
|
||||
synthesis_uri: URI of the synthesis entity
|
||||
exploration_uri: URI of the parent exploration entity
|
||||
answer_text: The synthesized answer text (used if no document_id)
|
||||
document_id: Optional librarian document ID (preferred over inline content)
|
||||
document_id: Librarian document ID for the answer content
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
|
|
@ -596,13 +635,12 @@ def docrag_synthesis_triples(
|
|||
triples = [
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(TG_SYNTHESIS)),
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(TG_ANSWER_TYPE)),
|
||||
_triple(synthesis_uri, RDFS_LABEL, _literal("Synthesis")),
|
||||
_triple(synthesis_uri, PROV_WAS_DERIVED_FROM, _iri(exploration_uri)),
|
||||
]
|
||||
|
||||
if document_id:
|
||||
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
elif answer_text:
|
||||
triples.append(_triple(synthesis_uri, TG_CONTENT, _literal(answer_text)))
|
||||
|
||||
return triples
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ def agent_uri(component_name: str) -> str:
|
|||
#
|
||||
# Terminology:
|
||||
# Question - What was asked, the anchor for everything
|
||||
# Grounding - Decomposing the question into concepts
|
||||
# Exploration - Casting wide, what do we know about this space
|
||||
# Focus - Closing down, what's actually relevant here
|
||||
# Synthesis - Weaving the relevant pieces into an answer
|
||||
|
|
@ -87,6 +88,19 @@ def question_uri(session_id: str = None) -> str:
|
|||
return f"urn:trustgraph:question:{session_id}"
|
||||
|
||||
|
||||
def grounding_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for a grounding entity (concept decomposition of query).
|
||||
|
||||
Args:
|
||||
session_id: The session UUID (same as question_uri).
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:prov:grounding:{uuid}
|
||||
"""
|
||||
return f"urn:trustgraph:prov:grounding:{session_id}"
|
||||
|
||||
|
||||
def exploration_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for an exploration entity (edges retrieved from subgraph).
|
||||
|
|
@ -173,6 +187,34 @@ def agent_iteration_uri(session_id: str, iteration_num: int) -> str:
|
|||
return f"urn:trustgraph:agent:{session_id}/i{iteration_num}"
|
||||
|
||||
|
||||
def agent_thought_uri(session_id: str, iteration_num: int) -> str:
|
||||
"""
|
||||
Generate URI for an agent thought sub-entity.
|
||||
|
||||
Args:
|
||||
session_id: The session UUID.
|
||||
iteration_num: 1-based iteration number.
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:agent:{uuid}/i{num}/thought
|
||||
"""
|
||||
return f"urn:trustgraph:agent:{session_id}/i{iteration_num}/thought"
|
||||
|
||||
|
||||
def agent_observation_uri(session_id: str, iteration_num: int) -> str:
|
||||
"""
|
||||
Generate URI for an agent observation sub-entity.
|
||||
|
||||
Args:
|
||||
session_id: The session UUID.
|
||||
iteration_num: 1-based iteration number.
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:agent:{uuid}/i{num}/observation
|
||||
"""
|
||||
return f"urn:trustgraph:agent:{session_id}/i{iteration_num}/observation"
|
||||
|
||||
|
||||
def agent_final_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for an agent final answer.
|
||||
|
|
@ -205,6 +247,19 @@ def docrag_question_uri(session_id: str = None) -> str:
|
|||
return f"urn:trustgraph:docrag:{session_id}"
|
||||
|
||||
|
||||
def docrag_grounding_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for a document RAG grounding entity (concept decomposition).
|
||||
|
||||
Args:
|
||||
session_id: The session UUID.
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:docrag:{uuid}/grounding
|
||||
"""
|
||||
return f"urn:trustgraph:docrag:{session_id}/grounding"
|
||||
|
||||
|
||||
def docrag_exploration_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for a document RAG exploration entity (chunks retrieved).
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ from . namespaces import (
|
|||
TG_LLM_MODEL, TG_ONTOLOGY, TG_EMBEDDING_MODEL,
|
||||
TG_SOURCE_TEXT, TG_SOURCE_CHAR_OFFSET, TG_SOURCE_CHAR_LENGTH,
|
||||
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_CHUNK_TYPE, TG_SUBGRAPH_TYPE,
|
||||
TG_CONCEPT, TG_ENTITY, TG_GROUNDING,
|
||||
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -80,6 +82,11 @@ TG_CLASS_LABELS = [
|
|||
_label_triple(TG_PAGE_TYPE, "Page"),
|
||||
_label_triple(TG_CHUNK_TYPE, "Chunk"),
|
||||
_label_triple(TG_SUBGRAPH_TYPE, "Subgraph"),
|
||||
_label_triple(TG_GROUNDING, "Grounding"),
|
||||
_label_triple(TG_ANSWER_TYPE, "Answer"),
|
||||
_label_triple(TG_REFLECTION_TYPE, "Reflection"),
|
||||
_label_triple(TG_THOUGHT_TYPE, "Thought"),
|
||||
_label_triple(TG_OBSERVATION_TYPE, "Observation"),
|
||||
]
|
||||
|
||||
# TrustGraph predicate labels
|
||||
|
|
@ -100,6 +107,8 @@ TG_PREDICATE_LABELS = [
|
|||
_label_triple(TG_SOURCE_TEXT, "source text"),
|
||||
_label_triple(TG_SOURCE_CHAR_OFFSET, "source character offset"),
|
||||
_label_triple(TG_SOURCE_CHAR_LENGTH, "source character length"),
|
||||
_label_triple(TG_CONCEPT, "concept"),
|
||||
_label_triple(TG_ENTITY, "entity"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ class GraphRagQuery:
|
|||
triple_limit: int = 0
|
||||
max_subgraph_size: int = 0
|
||||
max_path_length: int = 0
|
||||
edge_limit: int = 0
|
||||
streaming: bool = False
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue