mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
REST API OpenAPI spec (#612)
* OpenAPI spec in specs/api. Checked lint with redoc.
This commit is contained in:
parent
62b754d788
commit
fce43ae035
84 changed files with 5638 additions and 0 deletions
130
specs/api/paths/flow/agent.yaml
Normal file
130
specs/api/paths/flow/agent.yaml
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Agent service - conversational AI with reasoning
|
||||
description: |
|
||||
AI agent that can understand questions, reason about them, and take actions.
|
||||
|
||||
## Agent Overview
|
||||
|
||||
The agent service provides a conversational AI that:
|
||||
- Understands natural language questions
|
||||
- Reasons about problems using thoughts
|
||||
- Takes actions to gather information
|
||||
- Provides coherent answers
|
||||
|
||||
## Request Format
|
||||
|
||||
Send a question with optional:
|
||||
- **state**: Continue from previous conversation
|
||||
- **history**: Previous agent steps for context
|
||||
- **group**: Collaborative agent identifiers
|
||||
- **streaming**: Enable streaming responses
|
||||
|
||||
## Response Modes
|
||||
|
||||
### Streaming Mode (streaming: true)
|
||||
Responses arrive as chunks with `chunk-type`:
|
||||
- `thought`: Agent's reasoning process
|
||||
- `action`: Action being taken
|
||||
- `observation`: Result from action
|
||||
- `answer`: Final response to user
|
||||
- `error`: Error occurred
|
||||
|
||||
Each chunk may have multiple messages. Check flags:
|
||||
- `end-of-message`: Current chunk type complete
|
||||
- `end-of-dialog`: Entire conversation complete
|
||||
|
||||
### Legacy Mode (streaming: false)
|
||||
Single response with:
|
||||
- `answer`: Complete answer
|
||||
- `thought`: Reasoning (if any)
|
||||
- `observation`: Observations (if any)
|
||||
|
||||
## Multi-turn Conversations
|
||||
|
||||
Include `history` array with previous steps to maintain context.
|
||||
Each step has: thought, action, arguments, observation.
|
||||
|
||||
operationId: agentService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/agent/AgentRequest.yaml'
|
||||
examples:
|
||||
simpleQuestion:
|
||||
summary: Simple question
|
||||
value:
|
||||
question: What is the capital of France?
|
||||
user: alice
|
||||
streamingQuestion:
|
||||
summary: Question with streaming enabled
|
||||
value:
|
||||
question: Explain quantum computing
|
||||
user: alice
|
||||
streaming: true
|
||||
conversationWithHistory:
|
||||
summary: Multi-turn conversation
|
||||
value:
|
||||
question: And what about its population?
|
||||
user: alice
|
||||
history:
|
||||
- thought: User is asking about the capital of France
|
||||
action: search
|
||||
arguments:
|
||||
query: "capital of France"
|
||||
observation: "Paris is the capital of France"
|
||||
user: alice
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/agent/AgentResponse.yaml'
|
||||
examples:
|
||||
streamingThought:
|
||||
summary: Streaming thought chunk
|
||||
value:
|
||||
chunk-type: thought
|
||||
content: I need to search for information about quantum computing
|
||||
end-of-message: false
|
||||
end-of-dialog: false
|
||||
streamingAnswer:
|
||||
summary: Streaming answer chunk
|
||||
value:
|
||||
chunk-type: answer
|
||||
content: Quantum computing uses quantum mechanics principles...
|
||||
end-of-message: false
|
||||
end-of-dialog: false
|
||||
streamingComplete:
|
||||
summary: Streaming complete marker
|
||||
value:
|
||||
chunk-type: answer
|
||||
content: ""
|
||||
end-of-message: true
|
||||
end-of-dialog: true
|
||||
legacyResponse:
|
||||
summary: Legacy non-streaming response
|
||||
value:
|
||||
answer: Paris is the capital of France.
|
||||
thought: User is asking about the capital of France
|
||||
observation: ""
|
||||
end-of-message: false
|
||||
end-of-dialog: false
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
103
specs/api/paths/flow/document-embeddings.yaml
Normal file
103
specs/api/paths/flow/document-embeddings.yaml
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Document Embeddings Query - find similar text chunks
|
||||
description: |
|
||||
Query document embeddings to find similar text chunks by vector similarity.
|
||||
|
||||
## Document Embeddings Query Overview
|
||||
|
||||
Find document chunks semantically similar to a query vector:
|
||||
- **Input**: Query embedding vector
|
||||
- **Search**: Compare against stored chunk embeddings
|
||||
- **Output**: Most similar text chunks
|
||||
|
||||
Core component of document RAG retrieval.
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Document retrieval**: Find relevant passages
|
||||
- **Semantic search**: Search by meaning not keywords
|
||||
- **Context gathering**: Get text for RAG
|
||||
- **Similar content**: Discover related documents
|
||||
|
||||
## Process
|
||||
|
||||
1. Obtain query embedding (via embeddings service)
|
||||
2. Query stored document chunk embeddings
|
||||
3. Calculate cosine similarity
|
||||
4. Return top N most similar chunks
|
||||
5. Use chunks as context for generation
|
||||
|
||||
## Chunking
|
||||
|
||||
Documents are split into chunks during indexing:
|
||||
- Typical size: 200-1000 tokens
|
||||
- Overlap between chunks for continuity
|
||||
- Each chunk has own embedding
|
||||
|
||||
Queries return individual chunks, not full documents.
|
||||
|
||||
## Similarity Scoring
|
||||
|
||||
Uses cosine similarity:
|
||||
- Results ordered by similarity
|
||||
- No explicit scores in response
|
||||
- Limit controls result count
|
||||
|
||||
## Output Format
|
||||
|
||||
Returns text chunks as strings:
|
||||
- Raw chunk text
|
||||
- No metadata (source, position, etc.)
|
||||
- Use for LLM context directly
|
||||
|
||||
operationId: documentEmbeddingsQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml'
|
||||
examples:
|
||||
basicQuery:
|
||||
summary: Find similar chunks
|
||||
value:
|
||||
vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178]
|
||||
limit: 10
|
||||
user: alice
|
||||
collection: research
|
||||
largeQuery:
|
||||
summary: Larger result set
|
||||
value:
|
||||
vectors: [0.1, -0.2, 0.3, -0.4, 0.5]
|
||||
limit: 30
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml'
|
||||
examples:
|
||||
similarChunks:
|
||||
summary: Similar document chunks
|
||||
value:
|
||||
chunks:
|
||||
- "Quantum computing uses quantum mechanics principles like superposition and entanglement for computation. Unlike classical bits, quantum bits (qubits) can exist in multiple states simultaneously."
|
||||
- "Neural networks are computing systems inspired by biological neural networks. They consist of interconnected nodes organized in layers that process information through weighted connections."
|
||||
- "Machine learning algorithms learn patterns from data without being explicitly programmed. They improve their performance through experience and exposure to training data."
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
119
specs/api/paths/flow/document-load.yaml
Normal file
119
specs/api/paths/flow/document-load.yaml
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Document Load - load binary documents (PDF, etc.)
|
||||
description: |
|
||||
Load binary documents (PDF, Word, etc.) into processing pipeline.
|
||||
|
||||
## Document Load Overview
|
||||
|
||||
Fire-and-forget binary document loading:
|
||||
- **Input**: Document data (base64 encoded)
|
||||
- **Process**: Extract text, chunk, embed, store
|
||||
- **Output**: None (202 Accepted)
|
||||
|
||||
Asynchronous processing for PDF and other binary formats.
|
||||
|
||||
## Processing Pipeline
|
||||
|
||||
Documents go through:
|
||||
1. **Text extraction**: PDF→text, DOCX→text, etc.
|
||||
2. **Chunking**: Split into overlapping chunks
|
||||
3. **Embedding**: Generate vectors for each chunk
|
||||
4. **Storage**: Store chunks + embeddings
|
||||
5. **Indexing**: Make searchable
|
||||
|
||||
Pipeline runs asynchronously.
|
||||
|
||||
## Supported Formats
|
||||
|
||||
- **PDF**: Portable Document Format
|
||||
- **DOCX**: Microsoft Word
|
||||
- **HTML**: Web pages
|
||||
- Other formats via extractors
|
||||
|
||||
Format detected from content, not extension.
|
||||
|
||||
## Binary Encoding
|
||||
|
||||
Documents must be base64 encoded:
|
||||
```python
|
||||
with open('document.pdf', 'rb') as f:
|
||||
doc_bytes = f.read()
|
||||
encoded = base64.b64encode(doc_bytes).decode('utf-8')
|
||||
```
|
||||
|
||||
## Metadata
|
||||
|
||||
Optional RDF triples:
|
||||
- Document properties
|
||||
- Source information
|
||||
- Custom attributes
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **PDF ingestion**: Process research papers
|
||||
- **Document libraries**: Index document collections
|
||||
- **Content migration**: Import from other systems
|
||||
- **Automated processing**: Batch document loading
|
||||
|
||||
## No Response Data
|
||||
|
||||
Returns 202 Accepted immediately:
|
||||
- Document queued
|
||||
- Processing happens asynchronously
|
||||
- No status tracking
|
||||
- Query later to verify indexed
|
||||
|
||||
operationId: documentLoadService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/loading/DocumentLoadRequest.yaml'
|
||||
examples:
|
||||
loadPdf:
|
||||
summary: Load PDF document
|
||||
value:
|
||||
data: JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCg==
|
||||
id: doc-789
|
||||
user: alice
|
||||
collection: research
|
||||
withMetadata:
|
||||
summary: Load with metadata
|
||||
value:
|
||||
data: JVBERi0xLjQKJeLjz9MK...
|
||||
id: doc-101112
|
||||
user: bob
|
||||
collection: papers
|
||||
metadata:
|
||||
- s: {v: "doc-101112", e: false}
|
||||
p: {v: "http://purl.org/dc/terms/title", e: true}
|
||||
o: {v: "Quantum Entanglement Research", e: false}
|
||||
- s: {v: "doc-101112", e: false}
|
||||
p: {v: "http://purl.org/dc/terms/date", e: true}
|
||||
o: {v: "2024-01-15", e: false}
|
||||
responses:
|
||||
'202':
|
||||
description: Document accepted for processing
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties: {}
|
||||
example: {}
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
107
specs/api/paths/flow/document-rag.yaml
Normal file
107
specs/api/paths/flow/document-rag.yaml
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Document RAG - retrieve and generate from documents
|
||||
description: |
|
||||
Retrieval-Augmented Generation over document embeddings.
|
||||
|
||||
## Document RAG Overview
|
||||
|
||||
Document RAG combines:
|
||||
1. **Retrieval**: Search document embeddings using semantic similarity
|
||||
2. **Generation**: Use LLM to synthesize answer from retrieved documents
|
||||
|
||||
This provides grounded answers based on your document corpus.
|
||||
|
||||
## Query Process
|
||||
|
||||
1. Convert query to embedding
|
||||
2. Search document embeddings for most similar chunks
|
||||
3. Retrieve top N document chunks (configurable via doc-limit)
|
||||
4. Pass query + retrieved context to LLM
|
||||
5. Generate answer grounded in documents
|
||||
|
||||
## Streaming
|
||||
|
||||
Enable `streaming: true` to receive the answer as it's generated:
|
||||
- Multiple messages with `response` content
|
||||
- Final message with `end-of-stream: true`
|
||||
|
||||
Without streaming, returns complete answer in single response.
|
||||
|
||||
## Parameters
|
||||
|
||||
- **doc-limit**: Controls retrieval depth (1-100, default 20)
|
||||
- Higher = more context but slower
|
||||
- Lower = faster but may miss relevant info
|
||||
- **collection**: Target specific document collection
|
||||
- **user**: Multi-tenant isolation
|
||||
|
||||
operationId: documentRagService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/rag/DocumentRagRequest.yaml'
|
||||
examples:
|
||||
basicQuery:
|
||||
summary: Basic document query
|
||||
value:
|
||||
query: What are the key findings in the research papers?
|
||||
user: alice
|
||||
collection: research
|
||||
streamingQuery:
|
||||
summary: Streaming query
|
||||
value:
|
||||
query: Summarize the main conclusions
|
||||
user: alice
|
||||
collection: research
|
||||
doc-limit: 15
|
||||
streaming: true
|
||||
limitedRetrieval:
|
||||
summary: Query with limited retrieval
|
||||
value:
|
||||
query: What is quantum entanglement?
|
||||
doc-limit: 5
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/rag/DocumentRagResponse.yaml'
|
||||
examples:
|
||||
completeResponse:
|
||||
summary: Complete non-streaming response
|
||||
value:
|
||||
response: |
|
||||
The research papers present three key findings:
|
||||
1. Quantum entanglement exhibits non-local correlations
|
||||
2. Bell's inequality is violated in experimental tests
|
||||
3. Applications in quantum cryptography are promising
|
||||
end-of-stream: false
|
||||
streamingChunk:
|
||||
summary: Streaming response chunk
|
||||
value:
|
||||
response: "The research papers present three"
|
||||
end-of-stream: false
|
||||
streamingComplete:
|
||||
summary: Streaming complete marker
|
||||
value:
|
||||
response: ""
|
||||
end-of-stream: true
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
85
specs/api/paths/flow/embeddings.yaml
Normal file
85
specs/api/paths/flow/embeddings.yaml
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Embeddings - text to vector conversion
|
||||
description: |
|
||||
Convert text to embedding vectors for semantic similarity search.
|
||||
|
||||
## Embeddings Overview
|
||||
|
||||
Embeddings transform text into dense vector representations that:
|
||||
- Capture semantic meaning
|
||||
- Enable similarity comparisons via cosine distance
|
||||
- Support semantic search and retrieval
|
||||
- Power RAG systems
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Document indexing**: Convert documents to vectors for storage
|
||||
- **Query encoding**: Convert search queries for similarity matching
|
||||
- **Semantic similarity**: Find related texts via vector distance
|
||||
- **Clustering**: Group similar content
|
||||
- **Classification**: Use as features for ML models
|
||||
|
||||
## Vector Dimensions
|
||||
|
||||
Dimension count depends on embedding model:
|
||||
- text-embedding-ada-002: 1536 dimensions
|
||||
- text-embedding-3-small: 1536 dimensions
|
||||
- text-embedding-3-large: 3072 dimensions
|
||||
- Custom models: Varies
|
||||
|
||||
## Single Request
|
||||
|
||||
Unlike batch embedding APIs, this endpoint processes one text at a time.
|
||||
For bulk operations, use document-load or text-load services.
|
||||
|
||||
operationId: embeddingsService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings/EmbeddingsRequest.yaml'
|
||||
examples:
|
||||
shortText:
|
||||
summary: Short text embedding
|
||||
value:
|
||||
text: Machine learning
|
||||
sentence:
|
||||
summary: Sentence embedding
|
||||
value:
|
||||
text: Quantum computing uses quantum mechanics principles for computation.
|
||||
paragraph:
|
||||
summary: Paragraph embedding
|
||||
value:
|
||||
text: |
|
||||
Neural networks are computing systems inspired by biological neural networks.
|
||||
They consist of interconnected nodes (neurons) organized in layers.
|
||||
Through training, they learn to recognize patterns and make predictions.
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings/EmbeddingsResponse.yaml'
|
||||
examples:
|
||||
embeddingVector:
|
||||
summary: Embedding vector
|
||||
value:
|
||||
vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178, 0.045, 0.312]
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
95
specs/api/paths/flow/graph-embeddings.yaml
Normal file
95
specs/api/paths/flow/graph-embeddings.yaml
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Graph Embeddings Query - find similar entities
|
||||
description: |
|
||||
Query graph embeddings to find similar entities by vector similarity.
|
||||
|
||||
## Graph Embeddings Query Overview
|
||||
|
||||
Find entities semantically similar to a query vector:
|
||||
- **Input**: Query embedding vector
|
||||
- **Search**: Compare against stored entity embeddings
|
||||
- **Output**: Most similar entities (RDF URIs)
|
||||
|
||||
Core component of graph RAG retrieval.
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Entity discovery**: Find related entities
|
||||
- **Concept expansion**: Discover similar concepts
|
||||
- **Graph exploration**: Navigate by semantic similarity
|
||||
- **RAG retrieval**: Get entities for context
|
||||
|
||||
## Process
|
||||
|
||||
1. Obtain query embedding (via embeddings service)
|
||||
2. Query stored entity embeddings
|
||||
3. Calculate cosine similarity
|
||||
4. Return top N most similar entities
|
||||
5. Use entities to retrieve triples/subgraph
|
||||
|
||||
## Similarity Scoring
|
||||
|
||||
Uses cosine similarity between vectors:
|
||||
- Results ordered by similarity (most similar first)
|
||||
- No explicit similarity scores returned
|
||||
- Limit controls result count
|
||||
|
||||
## Entity Format
|
||||
|
||||
Returns RDF values (entities):
|
||||
- URI entities: `{v: "https://...", e: true}`
|
||||
- These are references to knowledge graph entities
|
||||
- Use with triples query to get entity details
|
||||
|
||||
operationId: graphEmbeddingsQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml'
|
||||
examples:
|
||||
basicQuery:
|
||||
summary: Find similar entities
|
||||
value:
|
||||
vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178]
|
||||
limit: 10
|
||||
user: alice
|
||||
collection: research
|
||||
largeQuery:
|
||||
summary: Larger result set
|
||||
value:
|
||||
vectors: [0.1, -0.2, 0.3, -0.4, 0.5]
|
||||
limit: 50
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml'
|
||||
examples:
|
||||
similarEntities:
|
||||
summary: Similar entities found
|
||||
value:
|
||||
entities:
|
||||
- {v: "https://example.com/person/alice", e: true}
|
||||
- {v: "https://example.com/person/bob", e: true}
|
||||
- {v: "https://example.com/concept/quantum-computing", e: true}
|
||||
- {v: "https://example.com/concept/machine-learning", e: true}
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
127
specs/api/paths/flow/graph-rag.yaml
Normal file
127
specs/api/paths/flow/graph-rag.yaml
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Graph RAG - retrieve and generate from knowledge graph
|
||||
description: |
|
||||
Retrieval-Augmented Generation over knowledge graph.
|
||||
|
||||
## Graph RAG Overview
|
||||
|
||||
Graph RAG combines:
|
||||
1. **Retrieval**: Find relevant entities and subgraph from knowledge graph
|
||||
2. **Generation**: Use LLM to reason over graph structure and generate answer
|
||||
|
||||
This provides graph-aware answers that leverage relationships and structure.
|
||||
|
||||
## Query Process
|
||||
|
||||
1. Identify relevant entities from query (using embeddings)
|
||||
2. Retrieve connected subgraph around entities
|
||||
3. Optionally traverse paths up to max-path-length hops
|
||||
4. Limit subgraph size to stay within context window
|
||||
5. Pass query + graph structure to LLM
|
||||
6. Generate answer incorporating graph relationships
|
||||
|
||||
## Streaming
|
||||
|
||||
Enable `streaming: true` to receive the answer as it's generated:
|
||||
- Multiple messages with `response` content
|
||||
- Final message with `end-of-stream: true`
|
||||
|
||||
Without streaming, returns complete answer in single response.
|
||||
|
||||
## Parameters
|
||||
|
||||
Control retrieval scope with multiple knobs:
|
||||
- **entity-limit**: How many starting entities to find (1-200, default 50)
|
||||
- **triple-limit**: Triples per entity (1-100, default 30)
|
||||
- **max-subgraph-size**: Total subgraph cap (10-5000, default 1000)
|
||||
- **max-path-length**: Graph traversal depth (1-5, default 2)
|
||||
|
||||
Higher limits = more context but:
|
||||
- Slower retrieval
|
||||
- Larger context for LLM
|
||||
- May hit context window limits
|
||||
|
||||
## Use Cases
|
||||
|
||||
Best for queries requiring:
|
||||
- Relationship understanding ("How are X and Y connected?")
|
||||
- Multi-hop reasoning ("What's the path from A to B?")
|
||||
- Structural analysis ("What are the main entities related to X?")
|
||||
|
||||
operationId: graphRagService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/rag/GraphRagRequest.yaml'
|
||||
examples:
|
||||
basicQuery:
|
||||
summary: Basic graph query
|
||||
value:
|
||||
query: What connections exist between quantum physics and computer science?
|
||||
user: alice
|
||||
collection: research
|
||||
streamingQuery:
|
||||
summary: Streaming query with custom limits
|
||||
value:
|
||||
query: Trace the historical development of AI from Turing to modern LLMs
|
||||
user: alice
|
||||
collection: research
|
||||
entity-limit: 40
|
||||
triple-limit: 25
|
||||
max-subgraph-size: 800
|
||||
max-path-length: 3
|
||||
streaming: true
|
||||
focusedQuery:
|
||||
summary: Focused query with tight limits
|
||||
value:
|
||||
query: What is the immediate relationship between entity A and B?
|
||||
entity-limit: 10
|
||||
triple-limit: 15
|
||||
max-subgraph-size: 200
|
||||
max-path-length: 1
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/rag/GraphRagResponse.yaml'
|
||||
examples:
|
||||
completeResponse:
|
||||
summary: Complete non-streaming response
|
||||
value:
|
||||
response: |
|
||||
Quantum physics and computer science intersect primarily through quantum computing.
|
||||
The knowledge graph shows connections through:
|
||||
- Quantum algorithms (Shor's algorithm, Grover's algorithm)
|
||||
- Quantum information theory
|
||||
- Computational complexity theory
|
||||
end-of-stream: false
|
||||
streamingChunk:
|
||||
summary: Streaming response chunk
|
||||
value:
|
||||
response: "Quantum physics and computer science intersect"
|
||||
end-of-stream: false
|
||||
streamingComplete:
|
||||
summary: Streaming complete marker
|
||||
value:
|
||||
response: ""
|
||||
end-of-stream: true
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
119
specs/api/paths/flow/mcp-tool.yaml
Normal file
119
specs/api/paths/flow/mcp-tool.yaml
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: MCP Tool - execute Model Context Protocol tools
|
||||
description: |
|
||||
Execute MCP (Model Context Protocol) tools for agent capabilities.
|
||||
|
||||
## MCP Tool Overview
|
||||
|
||||
MCP tools provide agent capabilities through standardized protocol:
|
||||
- **Search tools**: Web search, document search
|
||||
- **Data tools**: Database queries, API calls
|
||||
- **Action tools**: File operations, system commands
|
||||
- **Integration tools**: Third-party service connectors
|
||||
|
||||
Tools extend agent capabilities beyond pure LLM generation.
|
||||
|
||||
## Tool Execution
|
||||
|
||||
Tools are:
|
||||
1. Registered via MCP protocol
|
||||
2. Discovered by agent
|
||||
3. Called with structured parameters
|
||||
4. Return text or structured results
|
||||
|
||||
## Request Format
|
||||
|
||||
- **name**: Tool identifier (e.g., "search", "calculator", "weather")
|
||||
- **parameters**: Tool-specific arguments as JSON object
|
||||
|
||||
## Response Format
|
||||
|
||||
Tools can return:
|
||||
- **text**: Plain text result (simple tools)
|
||||
- **object**: Structured JSON result (complex tools)
|
||||
|
||||
## Tool Registration
|
||||
|
||||
Tools are registered via MCP server configuration:
|
||||
- Define tool schema (name, parameters, description)
|
||||
- Implement tool handler
|
||||
- Register with MCP server
|
||||
- Agent discovers and uses tool
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Web search**: Find external information
|
||||
- **Calculator**: Perform calculations
|
||||
- **Database query**: Retrieve structured data
|
||||
- **API integration**: Call external services
|
||||
- **File operations**: Read/write files
|
||||
- **Code execution**: Run scripts
|
||||
|
||||
operationId: mcpToolService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/mcp-tool/McpToolRequest.yaml'
|
||||
examples:
|
||||
searchTool:
|
||||
summary: Search tool execution
|
||||
value:
|
||||
name: search
|
||||
parameters:
|
||||
query: quantum computing
|
||||
limit: 10
|
||||
calculatorTool:
|
||||
summary: Calculator tool
|
||||
value:
|
||||
name: calculator
|
||||
parameters:
|
||||
expression: (42 * 7) + 15
|
||||
weatherTool:
|
||||
summary: Weather tool
|
||||
value:
|
||||
name: weather
|
||||
parameters:
|
||||
location: San Francisco
|
||||
units: celsius
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/mcp-tool/McpToolResponse.yaml'
|
||||
examples:
|
||||
textResponse:
|
||||
summary: Text result
|
||||
value:
|
||||
text: The result is 309
|
||||
objectResponse:
|
||||
summary: Structured result
|
||||
value:
|
||||
object:
|
||||
results:
|
||||
- title: Introduction to Quantum Computing
|
||||
url: https://example.com/qc-intro
|
||||
snippet: Quantum computing uses quantum mechanics...
|
||||
- title: Quantum Algorithms
|
||||
url: https://example.com/qc-algos
|
||||
snippet: Key algorithms include Shor's and Grover's...
|
||||
total: 10
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
148
specs/api/paths/flow/nlp-query.yaml
Normal file
148
specs/api/paths/flow/nlp-query.yaml
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: NLP Query - natural language to structured query
|
||||
description: |
|
||||
Convert natural language questions to structured GraphQL queries.
|
||||
|
||||
## NLP Query Overview
|
||||
|
||||
Transforms user questions into executable GraphQL:
|
||||
- **Natural input**: Ask questions in plain English
|
||||
- **Structured output**: Get GraphQL query + variables
|
||||
- **Schema-aware**: Uses knowledge graph schema
|
||||
- **Confidence scoring**: Know how well question was understood
|
||||
|
||||
Enables non-technical users to query knowledge graph.
|
||||
|
||||
## Process
|
||||
|
||||
1. Parse natural language question
|
||||
2. Identify entities and relationships
|
||||
3. Map to GraphQL schema types
|
||||
4. Generate query with variables
|
||||
5. Return query + confidence score
|
||||
|
||||
## Using Results
|
||||
|
||||
Generated query can be:
|
||||
- Executed via objects query service
|
||||
- Inspected and modified if needed
|
||||
- Cached for similar questions
|
||||
|
||||
Example workflow:
|
||||
```
|
||||
1. User asks: "Who does Alice know?"
|
||||
2. NLP Query generates GraphQL
|
||||
3. Execute via /api/v1/flow/{flow}/service/objects
|
||||
4. Return results to user
|
||||
```
|
||||
|
||||
## Schema Detection
|
||||
|
||||
Response includes `detected-schemas` array showing:
|
||||
- Which types were identified
|
||||
- What entities were matched
|
||||
- Schema coverage of question
|
||||
|
||||
Helps understand query scope.
|
||||
|
||||
## Confidence Scores
|
||||
|
||||
- **0.9-1.0**: High confidence, likely correct
|
||||
- **0.7-0.9**: Good confidence, probably correct
|
||||
- **0.5-0.7**: Medium confidence, may need review
|
||||
- **< 0.5**: Low confidence, likely incorrect
|
||||
|
||||
Low scores suggest:
|
||||
- Ambiguous question
|
||||
- Missing schema coverage
|
||||
- Complex query structure
|
||||
|
||||
operationId: nlpQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/NlpQueryRequest.yaml'
|
||||
examples:
|
||||
simpleQuestion:
|
||||
summary: Simple relationship question
|
||||
value:
|
||||
question: Who does Alice know?
|
||||
max-results: 50
|
||||
complexQuestion:
|
||||
summary: Multi-hop relationship
|
||||
value:
|
||||
question: What companies employ people that Alice knows?
|
||||
max-results: 100
|
||||
filterQuestion:
|
||||
summary: Question with filters
|
||||
value:
|
||||
question: Which engineers does Bob collaborate with?
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/NlpQueryResponse.yaml'
|
||||
examples:
|
||||
successfulQuery:
|
||||
summary: Successful query generation
|
||||
value:
|
||||
graphql-query: |
|
||||
query GetConnections($person: ID!) {
|
||||
person(id: $person) {
|
||||
knows { name email }
|
||||
}
|
||||
}
|
||||
variables:
|
||||
person: "https://example.com/person/alice"
|
||||
detected-schemas: ["Person"]
|
||||
confidence: 0.92
|
||||
complexQuery:
|
||||
summary: Complex multi-hop query
|
||||
value:
|
||||
graphql-query: |
|
||||
query GetCompanies($person: ID!) {
|
||||
person(id: $person) {
|
||||
knows {
|
||||
worksFor {
|
||||
name
|
||||
industry
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
variables:
|
||||
person: "https://example.com/person/alice"
|
||||
detected-schemas: ["Person", "Organization"]
|
||||
confidence: 0.85
|
||||
lowConfidence:
|
||||
summary: Low confidence result
|
||||
value:
|
||||
graphql-query: |
|
||||
query Search {
|
||||
search(term: "unknown entities") {
|
||||
results
|
||||
}
|
||||
}
|
||||
variables: {}
|
||||
detected-schemas: []
|
||||
confidence: 0.43
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
166
specs/api/paths/flow/objects.yaml
Normal file
166
specs/api/paths/flow/objects.yaml
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Objects query - GraphQL over knowledge graph
|
||||
description: |
|
||||
Query knowledge graph using GraphQL for object-oriented data access.
|
||||
|
||||
## Objects Query Overview
|
||||
|
||||
GraphQL interface to knowledge graph:
|
||||
- **Schema-driven**: Predefined types and relationships
|
||||
- **Flexible queries**: Request exactly what you need
|
||||
- **Nested data**: Traverse relationships in single query
|
||||
- **Type-safe**: Strong typing with introspection
|
||||
|
||||
Abstracts RDF triples into familiar object model.
|
||||
|
||||
## GraphQL Benefits
|
||||
|
||||
Compared to triples query:
|
||||
- **Developer-friendly**: Objects instead of triples
|
||||
- **Efficient**: Get related data in one query
|
||||
- **Typed**: Schema defines available fields
|
||||
- **Discoverable**: Introspection for tooling
|
||||
|
||||
## Query Structure
|
||||
|
||||
Standard GraphQL query format:
|
||||
```graphql
|
||||
query OperationName($var: Type!) {
|
||||
fieldName(arg: $var) {
|
||||
subField1
|
||||
subField2
|
||||
nestedObject {
|
||||
nestedField
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Variables
|
||||
|
||||
Pass variables for parameterized queries:
|
||||
```json
|
||||
{
|
||||
"query": "query GetPerson($id: ID!) { person(id: $id) { name } }",
|
||||
"variables": {"id": "https://example.com/person/alice"}
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
GraphQL distinguishes:
|
||||
- **Field errors**: Invalid query, missing fields (in `errors` array)
|
||||
- **System errors**: Connection issues, timeouts (in `error` object)
|
||||
|
||||
Partial data may be returned with field errors.
|
||||
|
||||
## Schema Definition
|
||||
|
||||
Schema defines available types via config service.
|
||||
Use introspection query to discover schema.
|
||||
|
||||
operationId: objectsQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/ObjectsQueryRequest.yaml'
|
||||
examples:
|
||||
simpleQuery:
|
||||
summary: Simple query
|
||||
value:
|
||||
query: |
|
||||
{
|
||||
person(id: "https://example.com/person/alice") {
|
||||
name
|
||||
email
|
||||
}
|
||||
}
|
||||
user: alice
|
||||
collection: research
|
||||
queryWithVariables:
|
||||
summary: Query with variables
|
||||
value:
|
||||
query: |
|
||||
query GetPerson($id: ID!) {
|
||||
person(id: $id) {
|
||||
name
|
||||
email
|
||||
knows {
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
||||
variables:
|
||||
id: "https://example.com/person/alice"
|
||||
operation-name: GetPerson
|
||||
nestedQuery:
|
||||
summary: Nested relationship query
|
||||
value:
|
||||
query: |
|
||||
{
|
||||
person(id: "https://example.com/person/alice") {
|
||||
name
|
||||
knows {
|
||||
name
|
||||
worksFor {
|
||||
name
|
||||
location
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/ObjectsQueryResponse.yaml'
|
||||
examples:
|
||||
successfulQuery:
|
||||
summary: Successful query
|
||||
value:
|
||||
data:
|
||||
person:
|
||||
name: Alice
|
||||
email: alice@example.com
|
||||
knows:
|
||||
- name: Bob
|
||||
- name: Carol
|
||||
extensions:
|
||||
execution_time_ms: "42"
|
||||
queryWithFieldErrors:
|
||||
summary: Query with field errors
|
||||
value:
|
||||
data:
|
||||
person:
|
||||
name: Alice
|
||||
email: null
|
||||
errors:
|
||||
- message: Cannot query field 'nonexistent' on type 'Person'
|
||||
path: ["person", "nonexistent"]
|
||||
systemError:
|
||||
summary: System error
|
||||
value:
|
||||
data: null
|
||||
error:
|
||||
type: TIMEOUT_ERROR
|
||||
message: Query execution timeout after 30s
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
143
specs/api/paths/flow/prompt.yaml
Normal file
143
specs/api/paths/flow/prompt.yaml
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Prompt service - template-based generation
|
||||
description: |
|
||||
Execute stored prompt templates with variable substitution.
|
||||
|
||||
## Prompt Service Overview
|
||||
|
||||
The prompt service enables:
|
||||
- Reusable prompt templates stored in configuration
|
||||
- Variable substitution for dynamic prompts
|
||||
- Consistent prompt engineering across requests
|
||||
- Text or structured object outputs
|
||||
|
||||
## Template System
|
||||
|
||||
Prompts are stored via config service (`/api/v1/config`) with:
|
||||
- **id**: Unique prompt identifier
|
||||
- **template**: Prompt text with `{variable}` placeholders
|
||||
- **system**: Optional system prompt
|
||||
- **output_format**: "text" or "object"
|
||||
|
||||
Example template:
|
||||
```
|
||||
Summarize the following document in {max_length} words:
|
||||
|
||||
{document}
|
||||
```
|
||||
|
||||
## Variable Substitution
|
||||
|
||||
Two ways to pass variables:
|
||||
|
||||
1. **terms** (explicit JSON strings):
|
||||
```json
|
||||
{
|
||||
"terms": {
|
||||
"document": "\"Text here...\"",
|
||||
"max_length": "\"200\""
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **variables** (auto-converted):
|
||||
```json
|
||||
{
|
||||
"variables": {
|
||||
"document": "Text here...",
|
||||
"max_length": 200
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Output Types
|
||||
|
||||
- **text**: Plain text response in `text` field
|
||||
- **object**: Structured JSON in `object` field (as string)
|
||||
|
||||
## Streaming
|
||||
|
||||
Enable `streaming: true` to receive response incrementally.
|
||||
|
||||
## Use Cases
|
||||
|
||||
- Document summarization
|
||||
- Entity extraction
|
||||
- Classification tasks
|
||||
- Data transformation
|
||||
- Any repeatable LLM task with consistent prompting
|
||||
|
||||
operationId: promptService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/prompt/PromptRequest.yaml'
|
||||
examples:
|
||||
withTerms:
|
||||
summary: Using terms (JSON strings)
|
||||
value:
|
||||
id: summarize-document
|
||||
terms:
|
||||
document: '"This document discusses quantum computing, covering qubits, superposition, and entanglement. Applications include cryptography and optimization."'
|
||||
max_length: '"50"'
|
||||
withVariables:
|
||||
summary: Using variables (auto-converted)
|
||||
value:
|
||||
id: extract-entities
|
||||
variables:
|
||||
text: A paper by Einstein on relativity published in 1905.
|
||||
entity_types: ["person", "year", "topic"]
|
||||
streaming:
|
||||
summary: Streaming response
|
||||
value:
|
||||
id: generate-report
|
||||
variables:
|
||||
data: {revenue: 1000000, growth: 15}
|
||||
format: executive summary
|
||||
streaming: true
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/prompt/PromptResponse.yaml'
|
||||
examples:
|
||||
textResponse:
|
||||
summary: Text output
|
||||
value:
|
||||
text: This document provides an overview of quantum computing fundamentals and cryptographic applications.
|
||||
end-of-stream: false
|
||||
objectResponse:
|
||||
summary: Structured output
|
||||
value:
|
||||
object: '{"entities": [{"type": "person", "value": "Einstein"}, {"type": "year", "value": "1905"}, {"type": "topic", "value": "relativity"}]}'
|
||||
end-of-stream: false
|
||||
streamingChunk:
|
||||
summary: Streaming chunk
|
||||
value:
|
||||
text: This document provides an overview
|
||||
end-of-stream: false
|
||||
streamingComplete:
|
||||
summary: Streaming complete
|
||||
value:
|
||||
text: ""
|
||||
end-of-stream: true
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
172
specs/api/paths/flow/structured-diag.yaml
Normal file
172
specs/api/paths/flow/structured-diag.yaml
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Structured Diag - analyze structured data formats
|
||||
description: |
|
||||
Analyze and understand structured data (CSV, JSON, XML).
|
||||
|
||||
## Structured Diag Overview
|
||||
|
||||
Helps process unknown structured data:
|
||||
- **Detect format**: Identify CSV, JSON, or XML
|
||||
- **Generate schema**: Create descriptor from sample
|
||||
- **Match schemas**: Find existing schemas that fit data
|
||||
- **Full diagnosis**: Complete analysis in one call
|
||||
|
||||
Essential for data ingestion pipelines.
|
||||
|
||||
## Operations
|
||||
|
||||
### detect-type
|
||||
Identify data format from sample:
|
||||
- Input: Data sample
|
||||
- Output: Format (csv/json/xml) + confidence
|
||||
- Use when: Format is unknown
|
||||
|
||||
### generate-descriptor
|
||||
Create schema descriptor:
|
||||
- Input: Sample + known type
|
||||
- Output: Field definitions, types, structure
|
||||
- Use when: Need to understand data structure
|
||||
|
||||
### diagnose (recommended)
|
||||
Combined analysis:
|
||||
- Input: Data sample
|
||||
- Output: Format + descriptor + metadata
|
||||
- Use when: Starting from scratch
|
||||
|
||||
### schema-selection
|
||||
Find matching schemas:
|
||||
- Input: Data sample
|
||||
- Output: List of schema IDs that match
|
||||
- Use when: Have existing schemas, need to match data
|
||||
|
||||
## Data Types
|
||||
|
||||
Supported formats:
|
||||
- **CSV**: Comma-separated values (or custom delimiter)
|
||||
- **JSON**: JSON objects or arrays
|
||||
- **XML**: XML documents
|
||||
|
||||
## Options
|
||||
|
||||
Format-specific options:
|
||||
- **CSV**: delimiter, has_header, quote_char
|
||||
- **JSON**: array_path (for nested arrays)
|
||||
- **XML**: root_element, record_path
|
||||
|
||||
## Workflow Example
|
||||
|
||||
1. Receive unknown data file
|
||||
2. Call diagnose operation with sample
|
||||
3. Get format + schema descriptor
|
||||
4. Use descriptor to process full dataset
|
||||
5. Load data via document-load or text-load
|
||||
|
||||
operationId: structuredDiagService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/diag/StructuredDiagRequest.yaml'
|
||||
examples:
|
||||
detectType:
|
||||
summary: Detect data type
|
||||
value:
|
||||
operation: detect-type
|
||||
sample: |
|
||||
name,age,email
|
||||
Alice,30,alice@example.com
|
||||
Bob,25,bob@example.com
|
||||
generateDescriptor:
|
||||
summary: Generate schema descriptor
|
||||
value:
|
||||
operation: generate-descriptor
|
||||
sample: |
|
||||
name,age,email
|
||||
Alice,30,alice@example.com
|
||||
type: csv
|
||||
schema-name: person-records
|
||||
options:
|
||||
delimiter: ","
|
||||
has_header: "true"
|
||||
diagnose:
|
||||
summary: Full diagnosis
|
||||
value:
|
||||
operation: diagnose
|
||||
sample: |
|
||||
[
|
||||
{"name": "Alice", "age": 30},
|
||||
{"name": "Bob", "age": 25}
|
||||
]
|
||||
schemaSelection:
|
||||
summary: Find matching schemas
|
||||
value:
|
||||
operation: schema-selection
|
||||
sample: |
|
||||
name,email,phone
|
||||
Alice,alice@example.com,555-1234
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/diag/StructuredDiagResponse.yaml'
|
||||
examples:
|
||||
detectedType:
|
||||
summary: Type detection result
|
||||
value:
|
||||
operation: detect-type
|
||||
detected-type: csv
|
||||
confidence: 0.95
|
||||
generatedDescriptor:
|
||||
summary: Generated descriptor
|
||||
value:
|
||||
operation: generate-descriptor
|
||||
descriptor:
|
||||
schema_name: person-records
|
||||
type: csv
|
||||
fields:
|
||||
- {name: name, type: string}
|
||||
- {name: age, type: integer}
|
||||
- {name: email, type: string}
|
||||
metadata:
|
||||
field_count: "3"
|
||||
has_header: "true"
|
||||
fullDiagnosis:
|
||||
summary: Complete diagnosis
|
||||
value:
|
||||
operation: diagnose
|
||||
detected-type: json
|
||||
confidence: 0.98
|
||||
descriptor:
|
||||
type: json
|
||||
structure: array_of_objects
|
||||
fields:
|
||||
- {name: name, type: string}
|
||||
- {name: age, type: integer}
|
||||
metadata:
|
||||
record_count: "2"
|
||||
schemaMatches:
|
||||
summary: Schema selection results
|
||||
value:
|
||||
operation: schema-selection
|
||||
schema-matches:
|
||||
- person-schema-v1
|
||||
- contact-schema-v2
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
134
specs/api/paths/flow/structured-query.yaml
Normal file
134
specs/api/paths/flow/structured-query.yaml
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Structured Query - question to results (all-in-one)
|
||||
description: |
|
||||
Ask natural language questions and get results directly.
|
||||
|
||||
## Structured Query Overview
|
||||
|
||||
Combines two operations in one call:
|
||||
1. **NLP Query**: Generate GraphQL from question
|
||||
2. **Objects Query**: Execute generated query
|
||||
3. **Return Results**: Direct answer data
|
||||
|
||||
Simplest way to query knowledge graph with natural language.
|
||||
|
||||
## Comparison with Other Services
|
||||
|
||||
### Structured Query (this service)
|
||||
- **Input**: Natural language question
|
||||
- **Output**: Query results (data)
|
||||
- **Use when**: Want simple, direct answers
|
||||
|
||||
### NLP Query + Objects Query (separate calls)
|
||||
- **Step 1**: Convert question → GraphQL
|
||||
- **Step 2**: Execute GraphQL → results
|
||||
- **Use when**: Need to inspect/modify query before execution
|
||||
|
||||
### Triples Query (low-level)
|
||||
- **Input**: RDF pattern
|
||||
- **Output**: Matching triples
|
||||
- **Use when**: Need precise control over graph queries
|
||||
|
||||
## Response Format
|
||||
|
||||
Returns standard GraphQL response:
|
||||
- **data**: Query results (null if error)
|
||||
- **errors**: Field-level errors (array of strings)
|
||||
- **error**: System-level error (generation or execution failure)
|
||||
|
||||
## Error Handling
|
||||
|
||||
Three types of errors:
|
||||
1. **Query generation failed**: Couldn't understand question
|
||||
- Error in `error` object
|
||||
- data = null
|
||||
2. **Query execution failed**: Generated query had errors
|
||||
- Errors in `errors` array
|
||||
- data may be partial
|
||||
3. **System error**: Infrastructure issue
|
||||
- Error in `error` object
|
||||
|
||||
## Performance
|
||||
|
||||
Convenience vs control trade-off:
|
||||
- **Faster development**: One call instead of two
|
||||
- **Less control**: Can't inspect/modify generated query
|
||||
- **Simpler code**: No need to handle intermediate steps
|
||||
|
||||
operationId: structuredQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/StructuredQueryRequest.yaml'
|
||||
examples:
|
||||
simpleQuestion:
|
||||
summary: Simple relationship question
|
||||
value:
|
||||
question: Who does Alice know?
|
||||
user: alice
|
||||
collection: research
|
||||
complexQuestion:
|
||||
summary: Complex multi-hop question
|
||||
value:
|
||||
question: What companies employ engineers that Bob collaborates with?
|
||||
user: bob
|
||||
collection: work
|
||||
filterQuestion:
|
||||
summary: Question with implicit filters
|
||||
value:
|
||||
question: Which researchers work on quantum computing?
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/StructuredQueryResponse.yaml'
|
||||
examples:
|
||||
successfulQuery:
|
||||
summary: Successful query with results
|
||||
value:
|
||||
data:
|
||||
person:
|
||||
name: Alice
|
||||
knows:
|
||||
- name: Bob
|
||||
email: bob@example.com
|
||||
- name: Carol
|
||||
email: carol@example.com
|
||||
errors: []
|
||||
partialResults:
|
||||
summary: Partial results with errors
|
||||
value:
|
||||
data:
|
||||
person:
|
||||
name: Alice
|
||||
knows: null
|
||||
errors:
|
||||
- Cannot query field 'nonexistent' on type 'Person'
|
||||
generationFailed:
|
||||
summary: Query generation failed
|
||||
value:
|
||||
data: null
|
||||
errors: []
|
||||
error:
|
||||
type: QUERY_GENERATION_ERROR
|
||||
message: Could not understand question structure
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
125
specs/api/paths/flow/text-completion.yaml
Normal file
125
specs/api/paths/flow/text-completion.yaml
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Text completion - direct LLM generation
|
||||
description: |
|
||||
Direct text completion using LLM without retrieval augmentation.
|
||||
|
||||
## Text Completion Overview
|
||||
|
||||
Pure LLM generation for:
|
||||
- General knowledge questions
|
||||
- Creative writing
|
||||
- Code generation
|
||||
- Analysis and reasoning
|
||||
- Any task not requiring specific document/graph context
|
||||
|
||||
## System vs Prompt
|
||||
|
||||
- **system**: Sets LLM behavior, role, constraints
|
||||
- "You are a helpful assistant"
|
||||
- "You are an expert Python developer"
|
||||
- "Respond in JSON format"
|
||||
- **prompt**: The actual user request/question
|
||||
|
||||
## Streaming
|
||||
|
||||
Enable `streaming: true` to receive tokens as generated:
|
||||
- Multiple messages with partial `response`
|
||||
- Final message with `end-of-stream: true`
|
||||
|
||||
Without streaming, returns complete response in single message.
|
||||
|
||||
## Token Counting
|
||||
|
||||
Response includes token usage:
|
||||
- `in-token`: Input tokens (system + prompt)
|
||||
- `out-token`: Generated tokens
|
||||
- Useful for cost tracking and optimization
|
||||
|
||||
## When to Use
|
||||
|
||||
Use text-completion when:
|
||||
- No specific context needed (general knowledge)
|
||||
- System prompt provides sufficient context
|
||||
- Want direct control over prompting
|
||||
|
||||
Use document-rag/graph-rag when:
|
||||
- Need to ground response in specific documents
|
||||
- Want to leverage knowledge graph relationships
|
||||
- Require citations or provenance
|
||||
|
||||
operationId: textCompletionService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/text-completion/TextCompletionRequest.yaml'
|
||||
examples:
|
||||
basicCompletion:
|
||||
summary: Basic text completion
|
||||
value:
|
||||
system: You are a helpful assistant that provides concise answers.
|
||||
prompt: Explain the concept of recursion in programming.
|
||||
codeGeneration:
|
||||
summary: Code generation with streaming
|
||||
value:
|
||||
system: You are an expert Python developer. Provide clean, well-documented code.
|
||||
prompt: Write a function to calculate the Fibonacci sequence using memoization.
|
||||
streaming: true
|
||||
jsonResponse:
|
||||
summary: Structured output request
|
||||
value:
|
||||
system: You are a JSON API. Respond only with valid JSON, no other text.
|
||||
prompt: |
|
||||
Extract key information from this text and return as JSON with fields:
|
||||
title, author, year, summary.
|
||||
|
||||
Text: "The Theory of Everything by Stephen Hawking (2006) explores..."
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/text-completion/TextCompletionResponse.yaml'
|
||||
examples:
|
||||
completeResponse:
|
||||
summary: Complete non-streaming response
|
||||
value:
|
||||
response: |
|
||||
Recursion is a programming technique where a function calls itself
|
||||
to solve a problem by breaking it down into smaller, similar subproblems.
|
||||
Each recursive call works on a simpler version until reaching a base case.
|
||||
in-token: 45
|
||||
out-token: 128
|
||||
model: gpt-4
|
||||
end-of-stream: false
|
||||
streamingChunk:
|
||||
summary: Streaming response chunk
|
||||
value:
|
||||
response: "Recursion is a programming technique"
|
||||
end-of-stream: false
|
||||
streamingComplete:
|
||||
summary: Streaming complete with tokens
|
||||
value:
|
||||
response: ""
|
||||
in-token: 45
|
||||
out-token: 128
|
||||
model: gpt-4
|
||||
end-of-stream: true
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
111
specs/api/paths/flow/text-load.yaml
Normal file
111
specs/api/paths/flow/text-load.yaml
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Text Load - load text documents
|
||||
description: |
|
||||
Load text documents into processing pipeline for indexing and embedding.
|
||||
|
||||
## Text Load Overview
|
||||
|
||||
Fire-and-forget document loading:
|
||||
- **Input**: Text content (base64 encoded)
|
||||
- **Process**: Chunk, embed, store
|
||||
- **Output**: None (202 Accepted)
|
||||
|
||||
Asynchronous processing - document queued for background processing.
|
||||
|
||||
## Processing Pipeline
|
||||
|
||||
Text documents go through:
|
||||
1. **Chunking**: Split into overlapping chunks
|
||||
2. **Embedding**: Generate vectors for each chunk
|
||||
3. **Storage**: Store chunks + embeddings
|
||||
4. **Indexing**: Make searchable via document-embeddings query
|
||||
|
||||
Pipeline runs asynchronously after request returns.
|
||||
|
||||
## Text Format
|
||||
|
||||
Text must be base64 encoded:
|
||||
```
|
||||
text_content = "This is the document..."
|
||||
encoded = base64.b64encode(text_content.encode('utf-8'))
|
||||
```
|
||||
|
||||
Default charset is UTF-8, specify `charset` if different.
|
||||
|
||||
## Metadata
|
||||
|
||||
Optional RDF triples describing document:
|
||||
- Title, author, date
|
||||
- Source URL
|
||||
- Custom properties
|
||||
- Used for organization and retrieval
|
||||
|
||||
## Use Cases
|
||||
|
||||
- **Document ingestion**: Add documents to knowledge base
|
||||
- **Bulk loading**: Process multiple documents
|
||||
- **Content updates**: Replace existing documents
|
||||
- **Library integration**: Load from document library
|
||||
|
||||
## No Response Data
|
||||
|
||||
Returns 202 Accepted immediately:
|
||||
- Document queued for processing
|
||||
- No synchronous result
|
||||
- No processing status
|
||||
- Check document-embeddings query later to verify indexed
|
||||
|
||||
operationId: textLoadService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/loading/TextLoadRequest.yaml'
|
||||
examples:
|
||||
simpleLoad:
|
||||
summary: Load text document
|
||||
value:
|
||||
text: VGhpcyBpcyB0aGUgZG9jdW1lbnQgdGV4dC4uLg==
|
||||
id: doc-123
|
||||
user: alice
|
||||
collection: research
|
||||
withMetadata:
|
||||
summary: Load with RDF metadata
|
||||
value:
|
||||
text: UXVhbnR1bSBjb21wdXRpbmcgdXNlcyBxdWFudHVtIG1lY2hhbmljcyBwcmluY2lwbGVzLi4u
|
||||
id: doc-456
|
||||
user: alice
|
||||
collection: research
|
||||
metadata:
|
||||
- s: {v: "doc-456", e: false}
|
||||
p: {v: "http://purl.org/dc/terms/title", e: true}
|
||||
o: {v: "Introduction to Quantum Computing", e: false}
|
||||
- s: {v: "doc-456", e: false}
|
||||
p: {v: "http://purl.org/dc/terms/creator", e: true}
|
||||
o: {v: "Dr. Alice Smith", e: false}
|
||||
responses:
|
||||
'202':
|
||||
description: Document accepted for processing
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties: {}
|
||||
example: {}
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
129
specs/api/paths/flow/triples.yaml
Normal file
129
specs/api/paths/flow/triples.yaml
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
post:
|
||||
tags:
|
||||
- Flow Services
|
||||
summary: Triples query - pattern-based graph queries
|
||||
description: |
|
||||
Query knowledge graph using subject-predicate-object patterns.
|
||||
|
||||
## Triples Query Overview
|
||||
|
||||
Query RDF triples with flexible pattern matching:
|
||||
- Specify subject, predicate, and/or object
|
||||
- Any combination of filters (all optional)
|
||||
- Returns matching triples up to limit
|
||||
|
||||
## Pattern Matching
|
||||
|
||||
Pattern syntax supports:
|
||||
- **All triples**: Omit all filters (returns everything up to limit)
|
||||
- **Subject match**: Specify `s` only (all triples about that subject)
|
||||
- **Predicate match**: Specify `p` only (all uses of that property)
|
||||
- **Object match**: Specify `o` only (all triples with that value)
|
||||
- **Combinations**: Any combination of s/p/o
|
||||
|
||||
## RDF Value Format
|
||||
|
||||
Each component (s/p/o) uses RdfValue format:
|
||||
- **Entity/URI**: `{"v": "https://example.com/entity", "e": true}`
|
||||
- **Literal**: `{"v": "Some text", "e": false}`
|
||||
|
||||
## Query Examples
|
||||
|
||||
Find all properties of an entity:
|
||||
```json
|
||||
{"s": {"v": "https://example.com/person/alice", "e": true}}
|
||||
```
|
||||
|
||||
Find all instances of a type:
|
||||
```json
|
||||
{
|
||||
"p": {"v": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "e": true},
|
||||
"o": {"v": "https://example.com/type/Person", "e": true}
|
||||
}
|
||||
```
|
||||
|
||||
Find specific relationship:
|
||||
```json
|
||||
{
|
||||
"s": {"v": "https://example.com/person/alice", "e": true},
|
||||
"p": {"v": "https://example.com/knows", "e": true}
|
||||
}
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
- Default limit: 10,000 triples
|
||||
- Max limit: 100,000 triples
|
||||
- More specific patterns = faster queries
|
||||
- Consider limit for large result sets
|
||||
|
||||
operationId: triplesQueryService
|
||||
security:
|
||||
- bearerAuth: []
|
||||
parameters:
|
||||
- name: flow
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: Flow instance ID
|
||||
example: my-flow
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/TriplesQueryRequest.yaml'
|
||||
examples:
|
||||
allTriplesAboutEntity:
|
||||
summary: All triples about an entity
|
||||
value:
|
||||
s:
|
||||
v: https://example.com/person/alice
|
||||
e: true
|
||||
user: alice
|
||||
collection: research
|
||||
limit: 100
|
||||
allInstancesOfType:
|
||||
summary: Find all instances of a type
|
||||
value:
|
||||
p:
|
||||
v: http://www.w3.org/1999/02/22-rdf-syntax-ns#type
|
||||
e: true
|
||||
o:
|
||||
v: https://example.com/type/Person
|
||||
e: true
|
||||
limit: 50
|
||||
specificRelationship:
|
||||
summary: Find specific relationships
|
||||
value:
|
||||
p:
|
||||
v: https://example.com/knows
|
||||
e: true
|
||||
user: alice
|
||||
limit: 200
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '../../components/schemas/query/TriplesQueryResponse.yaml'
|
||||
examples:
|
||||
matchingTriples:
|
||||
summary: Matching triples
|
||||
value:
|
||||
response:
|
||||
- s: {v: "https://example.com/person/alice", e: true}
|
||||
p: {v: "https://www.w3.org/1999/02/22-rdf-syntax-ns#type", e: true}
|
||||
o: {v: "https://example.com/type/Person", e: true}
|
||||
- s: {v: "https://example.com/person/alice", e: true}
|
||||
p: {v: "https://www.w3.org/2000/01/rdf-schema#label", e: true}
|
||||
o: {v: "Alice", e: false}
|
||||
- s: {v: "https://example.com/person/alice", e: true}
|
||||
p: {v: "https://example.com/knows", e: true}
|
||||
o: {v: "https://example.com/person/bob", e: true}
|
||||
'401':
|
||||
$ref: '../../components/responses/Unauthorized.yaml'
|
||||
'500':
|
||||
$ref: '../../components/responses/Error.yaml'
|
||||
Loading…
Add table
Add a link
Reference in a new issue