Row embeddings APIs exposed (#646)

* Added row embeddings API and CLI support * Updated protocol specs * Row embeddings agent tool * Add new agent tool to CLI
2026-06-14 01:05:14 +02:00 · 2026-02-23 21:52:56 +00:00 · 2026-02-23 21:52:56 +00:00 · 4bbc6d844f
commit 4bbc6d844f
parent 1809c1f56d
25 changed files with 1090 additions and 29 deletions
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -881,3 +881,73 @@ class SocketFlowInstance:
        request.update(kwargs)

        return self.client._send_request_sync("mcp-tool", self.flow_id, request, False)
+
+    def row_embeddings_query(
+        self,
+        text: str,
+        schema_name: str,
+        user: str = "trustgraph",
+        collection: str = "default",
+        index_name: Optional[str] = None,
+        limit: int = 10,
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """
+        Query row data using semantic similarity on indexed fields.
+
+        Finds rows whose indexed field values are semantically similar to the
+        input text, using vector embeddings. This enables fuzzy/semantic matching
+        on structured data.
+
+        Args:
+            text: Query text for semantic search
+            schema_name: Schema name to search within
+            user: User/keyspace identifier (default: "trustgraph")
+            collection: Collection identifier (default: "default")
+            index_name: Optional index name to filter search to specific index
+            limit: Maximum number of results (default: 10)
+            **kwargs: Additional parameters passed to the service
+
+        Returns:
+            dict: Query results with matches containing index_name, index_value,
+                  text, and score
+
+        Example:
+            ```python
+            socket = api.socket()
+            flow = socket.flow("default")
+
+            # Search for customers by name similarity
+            results = flow.row_embeddings_query(
+                text="John Smith",
+                schema_name="customers",
+                user="trustgraph",
+                collection="sales",
+                limit=5
+            )
+
+            # Filter to specific index
+            results = flow.row_embeddings_query(
+                text="machine learning engineer",
+                schema_name="employees",
+                index_name="job_title",
+                limit=10
+            )
+            ```
+        """
+        # First convert text to embeddings vectors
+        emb_result = self.embeddings(text=text)
+        vectors = emb_result.get("vectors", [])
+
+        request = {
+            "vectors": vectors,
+            "schema_name": schema_name,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        if index_name:
+            request["index_name"] = index_name
+        request.update(kwargs)
+
+        return self.client._send_request_sync("row-embeddings", self.flow_id, request, False)