Fix doc embeddings invocation (#672)

* Fix doc embeddings invocation * Tidy query embeddings invocation
2026-04-25 00:16:23 +02:00 · 2026-03-09 11:07:32 +00:00 · 2026-03-09 11:07:32 +00:00 · b2ef7bbb8c
commit b2ef7bbb8c
parent f2ae0e8623
4 changed files with 27 additions and 9 deletions
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -599,6 +599,7 @@ class FlowInstance:
                collection="scientists",
                limit=5
            )
            # results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]}
            ```
        """
@ -633,7 +634,7 @@ class FlowInstance:
            limit: Maximum number of results (default: 10)
        Returns:
-            dict: Query results with chunk_ids of matching document chunks
+            dict: Query results with chunks containing chunk_id and score
        Example:
            ```python
@ -644,7 +645,7 @@ class FlowInstance:
                collection="research-papers",
                limit=5
            )
-            # results contains {"chunk_ids": ["doc1/p0/c0", "doc2/p1/c3", ...]}
+            # results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]}
            ```
        """
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -695,7 +695,7 @@ class SocketFlowInstance:
                collection="research-papers",
                limit=5
            )
-            # results contains {"chunk_ids": ["doc1/p0/c0", ...]}
+            # results contains {"chunks": [{"chunk_id": "...", "score": 0.95}, ...]}
            ```
        """
        # First convert text to embedding vector
--- a/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py
@ -26,12 +26,14 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
            limit=limit
        )
-        chunk_ids = result.get("chunk_ids", [])
+        chunks = result.get("chunks", [])
-        if not chunk_ids:
+        if not chunks:
            print("No matching chunks found.")
        else:
-            for i, chunk_id in enumerate(chunk_ids, 1):
+            for i, chunk in enumerate(chunks, 1):
-                print(f"{i}. {chunk_id}")
+                chunk_id = chunk.get("chunk_id", "")
                score = chunk.get("score", 0.0)
                print(f"{i}. {chunk_id} (score: {score:.4f})")
    finally:
        # Clean up socket connection
--- a/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py
@ -27,8 +27,23 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
        )
        entities = result.get("entities", [])
-        for entity in entities:
+        if not entities:
-            print(entity)
+            print("No matching entities found.")
        else:
            for i, match in enumerate(entities, 1):
                entity = match.get("entity", {})
                score = match.get("score", 0.0)
                # Format entity based on type (wire format uses compact keys)
                term_type = entity.get("t", "")
                if term_type == "i":  # IRI
                    entity_str = entity.get("i", "")
                elif term_type == "l":  # Literal
                    entity_str = f'"{entity.get("v", "")}"'
                elif term_type == "b":  # Blank node
                    entity_str = f'_:{entity.get("d", "")}'
                else:
                    entity_str = str(entity)
                print(f"{i}. {entity_str} (score: {score:.4f})")
    finally:
        # Clean up socket connection