Fix doc embeddings invocation (#672)

* Fix doc embeddings invocation * Tidy query embeddings invocation
2026-07-25 05:01:01 +02:00 · 2026-03-09 11:07:32 +00:00 · 2026-03-09 11:07:32 +00:00 · b2ef7bbb8c
commit b2ef7bbb8c
parent f2ae0e8623
4 changed files with 27 additions and 9 deletions
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -599,6 +599,7 @@ class FlowInstance:
                collection="scientists",
                limit=5
            )
+            # results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]}
            ```
        """

@ -633,7 +634,7 @@ class FlowInstance:
            limit: Maximum number of results (default: 10)

        Returns:
-            dict: Query results with chunk_ids of matching document chunks
+            dict: Query results with chunks containing chunk_id and score

        Example:
            ```python
@ -644,7 +645,7 @@ class FlowInstance:
                collection="research-papers",
                limit=5
            )
-            # results contains {"chunk_ids": ["doc1/p0/c0", "doc2/p1/c3", ...]}
+            # results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]}
            ```
        """

--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -695,7 +695,7 @@ class SocketFlowInstance:
                collection="research-papers",
                limit=5
            )
-            # results contains {"chunk_ids": ["doc1/p0/c0", ...]}
+            # results contains {"chunks": [{"chunk_id": "...", "score": 0.95}, ...]}
            ```
        """
        # First convert text to embedding vector
--- a/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_document_embeddings.py
@ -26,12 +26,14 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
            limit=limit
        )

-        chunk_ids = result.get("chunk_ids", [])
-        if not chunk_ids:
+        chunks = result.get("chunks", [])
+        if not chunks:
            print("No matching chunks found.")
        else:
-            for i, chunk_id in enumerate(chunk_ids, 1):
-                print(f"{i}. {chunk_id}")
+            for i, chunk in enumerate(chunks, 1):
+                chunk_id = chunk.get("chunk_id", "")
+                score = chunk.get("score", 0.0)
+                print(f"{i}. {chunk_id} (score: {score:.4f})")

    finally:
        # Clean up socket connection
--- a/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_graph_embeddings.py
@ -27,8 +27,23 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
        )

        entities = result.get("entities", [])
-        for entity in entities:
-            print(entity)
+        if not entities:
+            print("No matching entities found.")
+        else:
+            for i, match in enumerate(entities, 1):
+                entity = match.get("entity", {})
+                score = match.get("score", 0.0)
+                # Format entity based on type (wire format uses compact keys)
+                term_type = entity.get("t", "")
+                if term_type == "i":  # IRI
+                    entity_str = entity.get("i", "")
+                elif term_type == "l":  # Literal
+                    entity_str = f'"{entity.get("v", "")}"'
+                elif term_type == "b":  # Blank node
+                    entity_str = f'_:{entity.get("d", "")}'
+                else:
+                    entity_str = str(entity)
+                print(f"{i}. {entity_str} (score: {score:.4f})")

    finally:
        # Clean up socket connection