From b2ef7bbb8c05c147332214506db57e810ab15603 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Mon, 9 Mar 2026 11:07:32 +0000 Subject: [PATCH] Fix doc embeddings invocation (#672) * Fix doc embeddings invocation * Tidy query embeddings invocation --- trustgraph-base/trustgraph/api/flow.py | 5 +++-- .../trustgraph/api/socket_client.py | 2 +- .../cli/invoke_document_embeddings.py | 10 ++++++---- .../trustgraph/cli/invoke_graph_embeddings.py | 19 +++++++++++++++++-- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index 49e2f9fa..f20d4d56 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -599,6 +599,7 @@ class FlowInstance: collection="scientists", limit=5 ) + # results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]} ``` """ @@ -633,7 +634,7 @@ class FlowInstance: limit: Maximum number of results (default: 10) Returns: - dict: Query results with chunk_ids of matching document chunks + dict: Query results with chunks containing chunk_id and score Example: ```python @@ -644,7 +645,7 @@ class FlowInstance: collection="research-papers", limit=5 ) - # results contains {"chunk_ids": ["doc1/p0/c0", "doc2/p1/c3", ...]} + # results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]} ``` """ diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index 113ebe35..26c241a7 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -695,7 +695,7 @@ class SocketFlowInstance: collection="research-papers", limit=5 ) - # results contains {"chunk_ids": ["doc1/p0/c0", ...]} + # results contains {"chunks": [{"chunk_id": "...", "score": 0.95}, ...]} ``` """ # First convert text to embedding vector diff --git a/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py b/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py index b3eef8a6..43bcc985 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py +++ b/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py @@ -26,12 +26,14 @@ def query(url, flow_id, query_text, user, collection, limit, token=None): limit=limit ) - chunk_ids = result.get("chunk_ids", []) - if not chunk_ids: + chunks = result.get("chunks", []) + if not chunks: print("No matching chunks found.") else: - for i, chunk_id in enumerate(chunk_ids, 1): - print(f"{i}. {chunk_id}") + for i, chunk in enumerate(chunks, 1): + chunk_id = chunk.get("chunk_id", "") + score = chunk.get("score", 0.0) + print(f"{i}. {chunk_id} (score: {score:.4f})") finally: # Clean up socket connection diff --git a/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py b/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py index ae195007..5b0f4c67 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py +++ b/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py @@ -27,8 +27,23 @@ def query(url, flow_id, query_text, user, collection, limit, token=None): ) entities = result.get("entities", []) - for entity in entities: - print(entity) + if not entities: + print("No matching entities found.") + else: + for i, match in enumerate(entities, 1): + entity = match.get("entity", {}) + score = match.get("score", 0.0) + # Format entity based on type (wire format uses compact keys) + term_type = entity.get("t", "") + if term_type == "i": # IRI + entity_str = entity.get("i", "") + elif term_type == "l": # Literal + entity_str = f'"{entity.get("v", "")}"' + elif term_type == "b": # Blank node + entity_str = f'_:{entity.get("d", "")}' + else: + entity_str = str(entity) + print(f"{i}. {entity_str} (score: {score:.4f})") finally: # Clean up socket connection