Row embeddings APIs exposed (#646)

* Added row embeddings API and CLI support * Updated protocol specs * Row embeddings agent tool * Add new agent tool to CLI
2026-06-11 07:45:13 +02:00 · 2026-02-23 21:52:56 +00:00 · 2026-02-23 21:52:56 +00:00 · 4bbc6d844f
commit 4bbc6d844f
parent 1809c1f56d
25 changed files with 1090 additions and 29 deletions
--- a/trustgraph-cli/trustgraph/cli/invoke_row_embeddings.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_row_embeddings.py
@ -0,0 +1,126 @@
+"""
+Queries row data by text similarity using vector embeddings on indexed fields.
+Returns matching rows with their index values and similarity scores.
+"""
+
+import argparse
+import os
+from trustgraph.api import Api
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
+
+def query(url, flow_id, query_text, schema_name, user, collection, index_name, limit, token=None):
+
+    # Create API client
+    api = Api(url=url, token=token)
+    socket = api.socket()
+    flow = socket.flow(flow_id)
+
+    try:
+        # Call row embeddings query service
+        result = flow.row_embeddings_query(
+            text=query_text,
+            schema_name=schema_name,
+            user=user,
+            collection=collection,
+            index_name=index_name,
+            limit=limit
+        )
+
+        matches = result.get("matches", [])
+        for match in matches:
+            print(f"Index: {match['index_name']}")
+            print(f"  Values: {match['index_value']}")
+            print(f"  Text: {match['text']}")
+            print(f"  Score: {match['score']:.4f}")
+            print()
+
+    finally:
+        # Clean up socket connection
+        socket.close()
+
+def main():
+
+    parser = argparse.ArgumentParser(
+        prog='tg-invoke-row-embeddings',
+        description=__doc__,
+    )
+
+    parser.add_argument(
+        '-u', '--url',
+        default=default_url,
+        help=f'API URL (default: {default_url})',
+    )
+
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
+    parser.add_argument(
+        '-f', '--flow-id',
+        default="default",
+        help=f'Flow ID (default: default)'
+    )
+
+    parser.add_argument(
+        '-U', '--user',
+        default="trustgraph",
+        help='User/keyspace (default: trustgraph)',
+    )
+
+    parser.add_argument(
+        '-c', '--collection',
+        default="default",
+        help='Collection (default: default)',
+    )
+
+    parser.add_argument(
+        '-s', '--schema-name',
+        required=True,
+        help='Schema name to search within (required)',
+    )
+
+    parser.add_argument(
+        '-i', '--index-name',
+        default=None,
+        help='Index name to filter search (optional)',
+    )
+
+    parser.add_argument(
+        '-l', '--limit',
+        type=int,
+        default=10,
+        help='Maximum number of results (default: 10)',
+    )
+
+    parser.add_argument(
+        'query',
+        nargs=1,
+        help='Query text to search for similar row index values',
+    )
+
+    args = parser.parse_args()
+
+    try:
+
+        query(
+            url=args.url,
+            flow_id=args.flow_id,
+            query_text=args.query[0],
+            schema_name=args.schema_name,
+            user=args.user,
+            collection=args.collection,
+            index_name=args.index_name,
+            limit=args.limit,
+            token=args.token,
+        )
+
+    except Exception as e:
+
+        print("Exception:", e, flush=True)
+
+if __name__ == "__main__":
+    main()
--- a/trustgraph-cli/trustgraph/cli/set_tool.py
+++ b/trustgraph-cli/trustgraph/cli/set_tool.py
@ -2,8 +2,9 @@
 Configures and registers tools in the TrustGraph system.

 This script allows you to define agent tools with various types including:
- knowledge-query: Query knowledge bases  
+- knowledge-query: Query knowledge bases
 - structured-query: Query structured data using natural language
+- row-embeddings-query: Semantic search on structured data indexes
 - text-completion: Text generation
 - mcp-tool: Reference to MCP (Model Context Protocol) tools
 - prompt: Prompt template execution
@ -64,6 +65,9 @@ def set_tool(
        mcp_tool : str,
        collection : str,
        template : str,
+        schema_name : str,
+        index_name : str,
+        limit : int,
        arguments : List[Argument],
        group : List[str],
        state : str,
@ -89,6 +93,12 @@ def set_tool(

    if template: object["template"] = template

+    if schema_name: object["schema-name"] = schema_name
+
+    if index_name: object["index-name"] = index_name
+
+    if limit: object["limit"] = limit
+
    if arguments:
        object["arguments"] = [
            {
@ -120,30 +130,37 @@ def main():
        description=__doc__,
        epilog=textwrap.dedent('''
            Valid tool types:
-              knowledge-query    - Query knowledge bases (fixed args)
-              structured-query   - Query structured data using natural language (fixed args)
-              text-completion    - Text completion/generation (fixed args)
-              mcp-tool           - Model Control Protocol tool (configurable args)
-              prompt             - Prompt template query (configurable args)
-            
-            Note: Tools marked "(fixed args)" have predefined arguments and don't need 
+              knowledge-query      - Query knowledge bases (fixed args)
+              structured-query     - Query structured data using natural language (fixed args)
+              row-embeddings-query - Semantic search on structured data indexes (fixed args)
+              text-completion      - Text completion/generation (fixed args)
+              mcp-tool             - Model Control Protocol tool (configurable args)
+              prompt               - Prompt template query (configurable args)
+
+            Note: Tools marked "(fixed args)" have predefined arguments and don't need
            --argument specified. Tools marked "(configurable args)" require --argument.
-            
+
            Valid argument types:
-              string            - String/text parameter  
+              string            - String/text parameter
              number            - Numeric parameter
-            
+
            Examples:
              %(prog)s --id weather_tool --name get_weather \\
                       --type knowledge-query \\
                       --description "Get weather information for a location" \\
                       --collection weather_data
-              
+
              %(prog)s --id data_query_tool --name query_data \\
                       --type structured-query \\
                       --description "Query structured data using natural language" \\
                       --collection sales_data
-              
+
+              %(prog)s --id customer_search --name find_customer \\
+                       --type row-embeddings-query \\
+                       --description "Find customers by name using semantic search" \\
+                       --schema-name customers --collection sales \\
+                       --index-name full_name --limit 20
+
              %(prog)s --id calc_tool --name calculate --type mcp-tool \\
                       --description "Perform mathematical calculations" \\
                       --mcp-tool calculator \\
@ -181,7 +198,7 @@ def main():

    parser.add_argument(
        '--type',
-        help=f'Tool type, one of: knowledge-query, structured-query, text-completion, mcp-tool, prompt',
+        help=f'Tool type, one of: knowledge-query, structured-query, row-embeddings-query, text-completion, mcp-tool, prompt',
    )

    parser.add_argument(
@ -191,7 +208,23 @@ def main():

    parser.add_argument(
        '--collection',
-        help=f'For knowledge-query and structured-query types: collection to query',
+        help=f'For knowledge-query, structured-query, and row-embeddings-query types: collection to query',
+    )
+
+    parser.add_argument(
+        '--schema-name',
+        help=f'For row-embeddings-query type: schema name to search within (required)',
+    )
+
+    parser.add_argument(
+        '--index-name',
+        help=f'For row-embeddings-query type: specific index to filter search (optional)',
+    )
+
+    parser.add_argument(
+        '--limit',
+        type=int,
+        help=f'For row-embeddings-query type: maximum results to return (default: 10)',
    )

    parser.add_argument(
@ -227,7 +260,8 @@ def main():
    try:

        valid_types = [
-            "knowledge-query", "structured-query", "text-completion", "mcp-tool", "prompt"
+            "knowledge-query", "structured-query", "row-embeddings-query",
+            "text-completion", "mcp-tool", "prompt"
        ]

        if args.id is None:
@ -261,6 +295,9 @@ def main():
            mcp_tool=mcp_tool,
            collection=args.collection,
            template=args.template,
+            schema_name=args.schema_name,
+            index_name=args.index_name,
+            limit=args.limit,
            arguments=arguments,
            group=args.group,
            state=args.state,
--- a/trustgraph-cli/trustgraph/cli/show_tools.py
+++ b/trustgraph-cli/trustgraph/cli/show_tools.py
@ -4,8 +4,9 @@ Displays the current agent tool configurations
 Shows all configured tools including their types:
 - knowledge-query: Tools that query knowledge bases
 - structured-query: Tools that query structured data using natural language
+- row-embeddings-query: Tools for semantic search on structured data indexes
 - text-completion: Tools for text generation
- mcp-tool: References to MCP (Model Context Protocol) tools  
+- mcp-tool: References to MCP (Model Context Protocol) tools
 - prompt: Tools that execute prompt templates
 """

@ -41,11 +42,19 @@ def show_config(url, token=None):

        if tp == "mcp-tool":
            table.append(("mcp-tool", data["mcp-tool"]))
-          
-        if tp == "knowledge-query" or tp == "structured-query":
+
+        if tp in ("knowledge-query", "structured-query", "row-embeddings-query"):
            if "collection" in data:
                table.append(("collection", data["collection"]))

+        if tp == "row-embeddings-query":
+            if "schema-name" in data:
+                table.append(("schema-name", data["schema-name"]))
+            if "index-name" in data:
+                table.append(("index-name", data["index-name"]))
+            if "limit" in data:
+                table.append(("limit", data["limit"]))
+
        if tp == "prompt":
            table.append(("template", data["template"]))
            for n, arg in enumerate(data["arguments"]):