Row embeddings APIs exposed (#646)

* Added row embeddings API and CLI support * Updated protocol specs * Row embeddings agent tool * Add new agent tool to CLI
2026-07-03 23:11:00 +02:00 · 2026-02-23 21:52:56 +00:00 · 2026-02-23 21:52:56 +00:00 · 4bbc6d844f
commit 4bbc6d844f
parent 1809c1f56d
25 changed files with 1090 additions and 29 deletions
--- a/trustgraph-base/trustgraph/api/async_flow.py
+++ b/trustgraph-base/trustgraph/api/async_flow.py
@ -766,3 +766,63 @@ class AsyncFlowInstance:
        request_data.update(kwargs)

        return await self.request("rows", request_data)
+
+    async def row_embeddings_query(
+        self, text: str, schema_name: str, user: str = "trustgraph",
+        collection: str = "default", index_name: Optional[str] = None,
+        limit: int = 10, **kwargs: Any
+    ):
+        """
+        Query row embeddings for semantic search on structured data.
+
+        Performs semantic search over row index embeddings to find rows whose
+        indexed field values are most similar to the input text. Enables
+        fuzzy/semantic matching on structured data.
+
+        Args:
+            text: Query text for semantic search
+            schema_name: Schema name to search within
+            user: User identifier (default: "trustgraph")
+            collection: Collection identifier (default: "default")
+            index_name: Optional index name to filter search to specific index
+            limit: Maximum number of results to return (default: 10)
+            **kwargs: Additional service-specific parameters
+
+        Returns:
+            dict: Response containing matches with index_name, index_value,
+                  text, and score
+
+        Example:
+            ```python
+            async_flow = await api.async_flow()
+            flow = async_flow.id("default")
+
+            # Search for customers by name similarity
+            results = await flow.row_embeddings_query(
+                text="John Smith",
+                schema_name="customers",
+                user="trustgraph",
+                collection="sales",
+                limit=5
+            )
+
+            for match in results.get("matches", []):
+                print(f"{match['index_name']}: {match['index_value']} (score: {match['score']})")
+            ```
+        """
+        # First convert text to embeddings vectors
+        emb_result = await self.embeddings(text=text)
+        vectors = emb_result.get("vectors", [])
+
+        request_data = {
+            "vectors": vectors,
+            "schema_name": schema_name,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        if index_name:
+            request_data["index_name"] = index_name
+        request_data.update(kwargs)
+
+        return await self.request("row-embeddings", request_data)
--- a/trustgraph-base/trustgraph/api/async_socket_client.py
+++ b/trustgraph-base/trustgraph/api/async_socket_client.py
@ -345,3 +345,26 @@ class AsyncSocketFlowInstance:
        request.update(kwargs)

        return await self.client._send_request("mcp-tool", self.flow_id, request)
+
+    async def row_embeddings_query(
+        self, text: str, schema_name: str, user: str = "trustgraph",
+        collection: str = "default", index_name: Optional[str] = None,
+        limit: int = 10, **kwargs
+    ):
+        """Query row embeddings for semantic search on structured data"""
+        # First convert text to embeddings vectors
+        emb_result = await self.embeddings(text=text)
+        vectors = emb_result.get("vectors", [])
+
+        request = {
+            "vectors": vectors,
+            "schema_name": schema_name,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        if index_name:
+            request["index_name"] = index_name
+        request.update(kwargs)
+
+        return await self.client._send_request("row-embeddings", self.flow_id, request)
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -1297,3 +1297,78 @@ class FlowInstance:

        return response["schema-matches"]

+    def row_embeddings_query(
+            self, text, schema_name, user="trustgraph", collection="default",
+            index_name=None, limit=10
+    ):
+        """
+        Query row data using semantic similarity on indexed fields.
+
+        Finds rows whose indexed field values are semantically similar to the
+        input text, using vector embeddings. This enables fuzzy/semantic matching
+        on structured data.
+
+        Args:
+            text: Query text for semantic search
+            schema_name: Schema name to search within
+            user: User/keyspace identifier (default: "trustgraph")
+            collection: Collection identifier (default: "default")
+            index_name: Optional index name to filter search to specific index
+            limit: Maximum number of results (default: 10)
+
+        Returns:
+            dict: Query results with matches containing index_name, index_value,
+                  text, and score
+
+        Example:
+            ```python
+            flow = api.flow().id("default")
+
+            # Search for customers by name similarity
+            results = flow.row_embeddings_query(
+                text="John Smith",
+                schema_name="customers",
+                user="trustgraph",
+                collection="sales",
+                limit=5
+            )
+
+            # Filter to specific index
+            results = flow.row_embeddings_query(
+                text="machine learning engineer",
+                schema_name="employees",
+                index_name="job_title",
+                limit=10
+            )
+            ```
+        """
+
+        # First convert text to embeddings vectors
+        emb_result = self.embeddings(text=text)
+        vectors = emb_result.get("vectors", [])
+
+        # Query row embeddings for semantic search
+        input = {
+            "vectors": vectors,
+            "schema_name": schema_name,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+
+        if index_name:
+            input["index_name"] = index_name
+
+        response = self.request(
+            "service/row-embeddings",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response
+
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -881,3 +881,73 @@ class SocketFlowInstance:
        request.update(kwargs)

        return self.client._send_request_sync("mcp-tool", self.flow_id, request, False)
+
+    def row_embeddings_query(
+        self,
+        text: str,
+        schema_name: str,
+        user: str = "trustgraph",
+        collection: str = "default",
+        index_name: Optional[str] = None,
+        limit: int = 10,
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """
+        Query row data using semantic similarity on indexed fields.
+
+        Finds rows whose indexed field values are semantically similar to the
+        input text, using vector embeddings. This enables fuzzy/semantic matching
+        on structured data.
+
+        Args:
+            text: Query text for semantic search
+            schema_name: Schema name to search within
+            user: User/keyspace identifier (default: "trustgraph")
+            collection: Collection identifier (default: "default")
+            index_name: Optional index name to filter search to specific index
+            limit: Maximum number of results (default: 10)
+            **kwargs: Additional parameters passed to the service
+
+        Returns:
+            dict: Query results with matches containing index_name, index_value,
+                  text, and score
+
+        Example:
+            ```python
+            socket = api.socket()
+            flow = socket.flow("default")
+
+            # Search for customers by name similarity
+            results = flow.row_embeddings_query(
+                text="John Smith",
+                schema_name="customers",
+                user="trustgraph",
+                collection="sales",
+                limit=5
+            )
+
+            # Filter to specific index
+            results = flow.row_embeddings_query(
+                text="machine learning engineer",
+                schema_name="employees",
+                index_name="job_title",
+                limit=10
+            )
+            ```
+        """
+        # First convert text to embeddings vectors
+        emb_result = self.embeddings(text=text)
+        vectors = emb_result.get("vectors", [])
+
+        request = {
+            "vectors": vectors,
+            "schema_name": schema_name,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        if index_name:
+            request["index_name"] = index_name
+        request.update(kwargs)
+
+        return self.client._send_request_sync("row-embeddings", self.flow_id, request, False)