mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Row embeddings APIs exposed (#646)
* Added row embeddings API and CLI support * Updated protocol specs * Row embeddings agent tool * Add new agent tool to CLI
This commit is contained in:
parent
1809c1f56d
commit
4bbc6d844f
25 changed files with 1090 additions and 29 deletions
|
|
@ -0,0 +1,39 @@
|
|||
type: object
|
||||
description: |
|
||||
Row embeddings query request - find similar rows by vector similarity on indexed fields.
|
||||
Enables semantic/fuzzy matching on structured data.
|
||||
required:
|
||||
- vectors
|
||||
- schema_name
|
||||
properties:
|
||||
vectors:
|
||||
type: array
|
||||
description: Query embedding vector
|
||||
items:
|
||||
type: number
|
||||
example: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156]
|
||||
schema_name:
|
||||
type: string
|
||||
description: Schema name to search within
|
||||
example: customers
|
||||
index_name:
|
||||
type: string
|
||||
description: Optional index name to filter search to specific index
|
||||
example: full_name
|
||||
limit:
|
||||
type: integer
|
||||
description: Maximum number of matches to return
|
||||
default: 10
|
||||
minimum: 1
|
||||
maximum: 1000
|
||||
example: 20
|
||||
user:
|
||||
type: string
|
||||
description: User identifier
|
||||
default: trustgraph
|
||||
example: alice
|
||||
collection:
|
||||
type: string
|
||||
description: Collection to search
|
||||
default: default
|
||||
example: sales
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
type: object
|
||||
description: Row embeddings query response with matching row index information
|
||||
properties:
|
||||
error:
|
||||
type: object
|
||||
description: Error information if query failed
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
description: Error type identifier
|
||||
example: row-embeddings-query-error
|
||||
message:
|
||||
type: string
|
||||
description: Human-readable error message
|
||||
example: Schema not found
|
||||
matches:
|
||||
type: array
|
||||
description: List of matching row index entries with similarity scores
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
index_name:
|
||||
type: string
|
||||
description: Name of the indexed field(s)
|
||||
example: full_name
|
||||
index_value:
|
||||
type: array
|
||||
description: Values of the indexed fields for this row
|
||||
items:
|
||||
type: string
|
||||
example: ["John", "Smith"]
|
||||
text:
|
||||
type: string
|
||||
description: The text that was embedded for this index entry
|
||||
example: "John Smith"
|
||||
score:
|
||||
type: number
|
||||
description: Similarity score (higher is more similar)
|
||||
example: 0.89
|
||||
example:
|
||||
matches:
|
||||
- index_name: full_name
|
||||
index_value: ["John", "Smith"]
|
||||
text: "John Smith"
|
||||
score: 0.95
|
||||
- index_name: full_name
|
||||
index_value: ["Jon", "Smythe"]
|
||||
text: "Jon Smythe"
|
||||
score: 0.82
|
||||
- index_name: full_name
|
||||
index_value: ["Jonathan", "Schmidt"]
|
||||
text: "Jonathan Schmidt"
|
||||
score: 0.76
|
||||
|
|
@ -133,6 +133,8 @@ paths:
|
|||
$ref: './paths/flow/graph-embeddings.yaml'
|
||||
/api/v1/flow/{flow}/service/document-embeddings:
|
||||
$ref: './paths/flow/document-embeddings.yaml'
|
||||
/api/v1/flow/{flow}/service/row-embeddings:
|
||||
$ref: './paths/flow/row-embeddings.yaml'
|
||||
/api/v1/flow/{flow}/service/text-load:
|
||||
$ref: './paths/flow/text-load.yaml'
|
||||
/api/v1/flow/{flow}/service/document-load:
|
||||
|
|
|
|||
101
specs/api/paths/flow/row-embeddings.yaml
Normal file
101
specs/api/paths/flow/row-embeddings.yaml
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Row Embeddings Query - semantic search on structured data
|
||||
description: |
|
||||
Query row embeddings to find similar rows by vector similarity on indexed fields.
|
||||
Enables fuzzy/semantic matching on structured data.
|
||||
|
||||
## Row Embeddings Query Overview
|
||||
|
||||
Find rows whose indexed field values are semantically similar to a query:
|
||||
- **Input**: Query embedding vector, schema name, optional index filter
|
||||
- **Search**: Compare against stored row index embeddings
|
||||
- **Output**: Matching rows with index values and similarity scores
|
||||
|
||||
Core component of semantic search on structured data.
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Fuzzy name matching**: Find customers by approximate name
|
||||
- **Semantic field search**: Find products by description similarity
|
||||
- **Data deduplication**: Identify potential duplicate records
|
||||
- **Entity resolution**: Match records across datasets
|
||||
|
||||
## Process
|
||||
|
||||
1. Obtain query embedding (via embeddings service)
|
||||
2. Query stored row index embeddings for the specified schema
|
||||
3. Calculate cosine similarity
|
||||
4. Return top N most similar index entries
|
||||
5. Use index values to retrieve full rows via GraphQL
|
||||
|
||||
## Response Format
|
||||
|
||||
Each match includes:
|
||||
- `index_name`: The indexed field(s) that matched
|
||||
- `index_value`: The actual values for those fields
|
||||
- `text`: The text that was embedded
|
||||
- `score`: Similarity score (higher = more similar)
|
||||
|
||||
operationId: rowEmbeddingsQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/RowEmbeddingsQueryRequest.yaml'
|
||||
examples:
|
||||
basicQuery:
|
||||
summary: Find similar customer names
|
||||
value:
|
||||
vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178]
|
||||
schema_name: customers
|
||||
limit: 10
|
||||
user: alice
|
||||
collection: sales
|
||||
filteredQuery:
|
||||
summary: Search specific index
|
||||
value:
|
||||
vectors: [0.1, -0.2, 0.3, -0.4, 0.5]
|
||||
schema_name: products
|
||||
index_name: description
|
||||
limit: 20
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/RowEmbeddingsQueryResponse.yaml'
|
||||
examples:
|
||||
similarRows:
|
||||
summary: Similar rows found
|
||||
value:
|
||||
matches:
|
||||
- index_name: full_name
|
||||
index_value: ["John", "Smith"]
|
||||
text: "John Smith"
|
||||
score: 0.95
|
||||
- index_name: full_name
|
||||
index_value: ["Jon", "Smythe"]
|
||||
text: "Jon Smythe"
|
||||
score: 0.82
|
||||
- index_name: full_name
|
||||
index_value: ["Jonathan", "Schmidt"]
|
||||
text: "Jonathan Schmidt"
|
||||
score: 0.76
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
|
|
@ -31,6 +31,7 @@ payload:
|
|||
- $ref: './requests/StructuredDiagRequest.yaml'
|
||||
- $ref: './requests/GraphEmbeddingsRequest.yaml'
|
||||
- $ref: './requests/DocumentEmbeddingsRequest.yaml'
|
||||
- $ref: './requests/RowEmbeddingsRequest.yaml'
|
||||
- $ref: './requests/TextLoadRequest.yaml'
|
||||
- $ref: './requests/DocumentLoadRequest.yaml'
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
type: object
|
||||
description: WebSocket request for row-embeddings service (flow-hosted service)
|
||||
required:
|
||||
- id
|
||||
- service
|
||||
- flow
|
||||
- request
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique request identifier
|
||||
service:
|
||||
type: string
|
||||
const: row-embeddings
|
||||
description: Service identifier for row-embeddings service
|
||||
flow:
|
||||
type: string
|
||||
description: Flow ID
|
||||
request:
|
||||
$ref: '../../../../api/components/schemas/embeddings-query/RowEmbeddingsQueryRequest.yaml'
|
||||
examples:
|
||||
- id: req-1
|
||||
service: row-embeddings
|
||||
flow: my-flow
|
||||
request:
|
||||
vectors: [0.023, -0.142, 0.089, 0.234]
|
||||
schema_name: customers
|
||||
limit: 10
|
||||
user: trustgraph
|
||||
collection: default
|
||||
Loading…
Add table
Add a link
Reference in a new issue