From 01aeede78bcb4b87615d69c85e0ff594aede423a Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 4 Dec 2025 17:38:57 +0000
Subject: [PATCH] Python API implements streaming interfaces (#577)

* Tech spec

* Python CLI utilities updated to use the API including streaming features

* Added type safety to Python API

* Completed missing auth token support in CLI
---
 docs/tech-specs/python-api-refactor.md        | 1508 +++++++++++++++++
 tests/unit/test_python_api_client.py          |  446 +++++
 trustgraph-base/trustgraph/api/__init__.py    |   79 +-
 trustgraph-base/trustgraph/api/api.py         |  112 +-
 .../trustgraph/api/async_bulk_client.py       |  131 ++
 trustgraph-base/trustgraph/api/async_flow.py  |  245 +++
 .../trustgraph/api/async_metrics.py           |   33 +
 .../trustgraph/api/async_socket_client.py     |  335 ++++
 trustgraph-base/trustgraph/api/bulk_client.py |  270 +++
 trustgraph-base/trustgraph/api/flow.py        |   15 +
 trustgraph-base/trustgraph/api/metrics.py     |   27 +
 .../trustgraph/api/socket_client.py           |  445 +++++
 trustgraph-base/trustgraph/api/types.py       |   32 +-
 .../trustgraph/cli/delete_config_item.py      |   12 +-
 .../trustgraph/cli/get_config_item.py         |   12 +-
 trustgraph-cli/trustgraph/cli/get_kg_core.py  |   13 +-
 trustgraph-cli/trustgraph/cli/invoke_agent.py |  169 +-
 .../trustgraph/cli/invoke_document_rag.py     |  142 +-
 .../trustgraph/cli/invoke_graph_rag.py        |  172 +-
 trustgraph-cli/trustgraph/cli/invoke_llm.py   |   94 +-
 .../trustgraph/cli/invoke_prompt.py           |  118 +-
 .../trustgraph/cli/list_config_items.py       |   12 +-
 .../trustgraph/cli/load_knowledge.py          |  191 ++-
 .../trustgraph/cli/load_sample_documents.py   |   12 +-
 .../trustgraph/cli/load_structured_data.py    |   99 +-
 trustgraph-cli/trustgraph/cli/load_turtle.py  |  127 +-
 .../trustgraph/cli/put_config_item.py         |   12 +-
 .../trustgraph/cli/put_flow_class.py          |   12 +-
 trustgraph-cli/trustgraph/cli/put_kg_core.py  |   13 +-
 .../trustgraph/cli/remove_library_document.py |   13 +-
 .../trustgraph/cli/set_collection.py          |   14 +-
 trustgraph-cli/trustgraph/cli/set_mcp_tool.py |   13 +-
 trustgraph-cli/trustgraph/cli/set_prompt.py   |   21 +-
 .../trustgraph/cli/set_token_costs.py         |   11 +-
 trustgraph-cli/trustgraph/cli/set_tool.py     |   11 +-
 trustgraph-cli/trustgraph/cli/show_config.py  |   12 +-
 .../trustgraph/cli/show_flow_classes.py       |   12 +-
 .../trustgraph/cli/show_flow_state.py         |   13 +-
 trustgraph-cli/trustgraph/cli/show_flows.py   |   12 +-
 trustgraph-cli/trustgraph/cli/show_graph.py   |   12 +-
 .../trustgraph/cli/show_kg_cores.py           |   15 +-
 .../trustgraph/cli/show_library_documents.py  |   15 +-
 .../trustgraph/cli/show_library_processing.py |   13 +-
 .../trustgraph/cli/show_mcp_tools.py          |   12 +-
 .../trustgraph/cli/show_parameter_types.py    |   19 +-
 trustgraph-cli/trustgraph/cli/show_prompts.py |   12 +-
 .../trustgraph/cli/show_token_costs.py        |   12 +-
 trustgraph-cli/trustgraph/cli/show_tools.py   |   12 +-
 trustgraph-cli/trustgraph/cli/start_flow.py   |   12 +-
 .../cli/start_library_processing.py           |   14 +-
 trustgraph-cli/trustgraph/cli/stop_flow.py    |   12 +-
 .../trustgraph/cli/stop_library_processing.py |   12 +-
 .../trustgraph/cli/unload_kg_core.py          |   12 +-
 53 files changed, 4489 insertions(+), 715 deletions(-)
 create mode 100644 docs/tech-specs/python-api-refactor.md
 create mode 100644 tests/unit/test_python_api_client.py
 create mode 100644 trustgraph-base/trustgraph/api/async_bulk_client.py
 create mode 100644 trustgraph-base/trustgraph/api/async_flow.py
 create mode 100644 trustgraph-base/trustgraph/api/async_metrics.py
 create mode 100644 trustgraph-base/trustgraph/api/async_socket_client.py
 create mode 100644 trustgraph-base/trustgraph/api/bulk_client.py
 create mode 100644 trustgraph-base/trustgraph/api/metrics.py
 create mode 100644 trustgraph-base/trustgraph/api/socket_client.py

diff --git a/docs/tech-specs/python-api-refactor.md b/docs/tech-specs/python-api-refactor.md
new file mode 100644
index 00000000..6fcf2f22
--- /dev/null
+++ b/docs/tech-specs/python-api-refactor.md
@@ -0,0 +1,1508 @@
+# Python API Refactor Technical Specification
+
+## Overview
+
+This specification describes a comprehensive refactor of the TrustGraph Python API client library to achieve feature parity with the API Gateway and add support for modern real-time communication patterns.
+
+The refactor addresses four primary use cases:
+
+1. **Streaming LLM Interactions**: Enable real-time streaming of LLM responses (agent, graph RAG, document RAG, text completion, prompts) with ~60x lower latency (500ms vs 30s for first token)
+2. **Bulk Data Operations**: Support efficient bulk import/export of triples, graph embeddings, and document embeddings for large-scale knowledge graph management
+3. **Feature Parity**: Ensure every API Gateway endpoint has a corresponding Python API method, including graph embeddings query
+4. **Persistent Connections**: Enable WebSocket-based communication for multiplexed requests and reduced connection overhead
+
+## Goals
+
+- **Feature Parity**: Every Gateway API service has a corresponding Python API method
+- **Streaming Support**: All streaming-capable services (agent, RAG, text completion, prompt) support streaming in Python API
+- **WebSocket Transport**: Add optional WebSocket transport layer for persistent connections and multiplexing
+- **Bulk Operations**: Add efficient bulk import/export for triples, graph embeddings, and document embeddings
+- **Full Async Support**: Complete async/await implementation for all interfaces (REST, WebSocket, bulk operations, metrics)
+- **Backward Compatibility**: Existing code continues to work without modification
+- **Type Safety**: Maintain type-safe interfaces with dataclasses and type hints
+- **Progressive Enhancement**: Streaming and async are opt-in via explicit interface selection
+- **Performance**: Achieve 60x latency improvement for streaming operations
+- **Modern Python**: Support for both sync and async paradigms for maximum flexibility
+
+## Background
+
+### Current State
+
+The Python API (`trustgraph-base/trustgraph/api/`) is a REST-only client library with the following modules:
+
+- `flow.py`: Flow management and flow-scoped services (50 methods)
+- `library.py`: Document library operations (9 methods)
+- `knowledge.py`: KG core management (4 methods)
+- `collection.py`: Collection metadata (3 methods)
+- `config.py`: Configuration management (6 methods)
+- `types.py`: Data type definitions (5 dataclasses)
+
+**Total Operations**: 50/59 (85% coverage)
+
+### Current Limitations
+
+**Missing Operations**:
+- Graph embeddings query (semantic search over graph entities)
+- Bulk import/export for triples, graph embeddings, document embeddings, entity contexts, objects
+- Metrics endpoint
+
+**Missing Capabilities**:
+- Streaming support for LLM services
+- WebSocket transport
+- Multiplexed concurrent requests
+- Persistent connections
+
+**Performance Issues**:
+- High latency for LLM interactions (~30s time-to-first-token)
+- Inefficient bulk data transfer (REST request per item)
+- Connection overhead for multiple sequential operations
+
+**User Experience Issues**:
+- No real-time feedback during LLM generation
+- Cannot cancel long-running LLM operations
+- Poor scalability for bulk operations
+
+### Impact
+
+The November 2024 streaming enhancement to the Gateway API provided 60x latency improvement (500ms vs 30s first token) for LLM interactions, but Python API users cannot leverage this capability. This creates a significant experience gap between Python and non-Python users.
+
+## Technical Design
+
+### Architecture
+
+The refactored Python API uses a **modular interface approach** with separate objects for different communication patterns. All interfaces are available in both **synchronous and asynchronous** variants:
+
+1. **REST Interface** (existing, enhanced)
+   - **Sync**: `api.flow()`, `api.library()`, `api.knowledge()`, `api.collection()`, `api.config()`
+   - **Async**: `api.async_flow()`
+   - Synchronous/asynchronous request/response
+   - Simple connection model
+   - Default for backward compatibility
+
+2. **WebSocket Interface** (new)
+   - **Sync**: `api.socket()`
+   - **Async**: `api.async_socket()`
+   - Persistent connection
+   - Multiplexed requests
+   - Streaming support
+   - Same method signatures as REST where functionality overlaps
+
+3. **Bulk Operations Interface** (new)
+   - **Sync**: `api.bulk()`
+   - **Async**: `api.async_bulk()`
+   - WebSocket-based for efficiency
+   - Iterator/AsyncIterator-based import/export
+   - Handles large datasets
+
+4. **Metrics Interface** (new)
+   - **Sync**: `api.metrics()`
+   - **Async**: `api.async_metrics()`
+   - Prometheus metrics access
+
+```python
+import asyncio
+
+# Synchronous interfaces
+api = Api(url="http://localhost:8088/")
+
+# REST (existing, unchanged)
+flow = api.flow().id("default")
+response = flow.agent(question="...", user="...")
+
+# WebSocket (new)
+socket_flow = api.socket().flow("default")
+response = socket_flow.agent(question="...", user="...")
+for chunk in socket_flow.agent(question="...", user="...", streaming=True):
+    print(chunk)
+
+# Bulk operations (new)
+bulk = api.bulk()
+bulk.import_triples(flow="default", triples=triple_generator())
+
+# Asynchronous interfaces
+async def main():
+    api = Api(url="http://localhost:8088/")
+
+    # Async REST (new)
+    flow = api.async_flow().id("default")
+    response = await flow.agent(question="...", user="...")
+
+    # Async WebSocket (new)
+    socket_flow = api.async_socket().flow("default")
+    async for chunk in socket_flow.agent(question="...", streaming=True):
+        print(chunk)
+
+    # Async bulk operations (new)
+    bulk = api.async_bulk()
+    await bulk.import_triples(flow="default", triples=async_triple_generator())
+
+asyncio.run(main())
+```
+
+**Key Design Principles**:
+- **Same URL for all interfaces**: `Api(url="http://localhost:8088/")` works for all
+- **Sync/Async symmetry**: Every interface has both sync and async variants with identical method signatures
+- **Identical signatures**: Where functionality overlaps, method signatures are identical between REST and WebSocket, sync and async
+- **Progressive enhancement**: Choose interface based on needs (REST for simple, WebSocket for streaming, Bulk for large datasets, async for modern frameworks)
+- **Explicit intent**: `api.socket()` signals WebSocket, `api.async_socket()` signals async WebSocket
+- **Backward compatible**: Existing code unchanged
+
+### Components
+
+#### 1. Core API Class (Modified)
+
+Module: `trustgraph-base/trustgraph/api/api.py`
+
+**Enhanced API Class**:
+
+```python
+class Api:
+    def __init__(self, url: str, timeout: int = 60, token: Optional[str] = None):
+        self.url = url
+        self.timeout = timeout
+        self.token = token  # Optional bearer token for REST, query param for WebSocket
+        self._socket_client = None
+        self._bulk_client = None
+        self._async_flow = None
+        self._async_socket_client = None
+        self._async_bulk_client = None
+
+    # Existing synchronous methods (unchanged)
+    def flow(self) -> Flow:
+        """Synchronous REST-based flow interface"""
+        pass
+
+    def library(self) -> Library:
+        """Synchronous REST-based library interface"""
+        pass
+
+    def knowledge(self) -> Knowledge:
+        """Synchronous REST-based knowledge interface"""
+        pass
+
+    def collection(self) -> Collection:
+        """Synchronous REST-based collection interface"""
+        pass
+
+    def config(self) -> Config:
+        """Synchronous REST-based config interface"""
+        pass
+
+    # New synchronous methods
+    def socket(self) -> SocketClient:
+        """Synchronous WebSocket-based interface for streaming operations"""
+        if self._socket_client is None:
+            self._socket_client = SocketClient(self.url, self.timeout, self.token)
+        return self._socket_client
+
+    def bulk(self) -> BulkClient:
+        """Synchronous bulk operations interface for import/export"""
+        if self._bulk_client is None:
+            self._bulk_client = BulkClient(self.url, self.timeout, self.token)
+        return self._bulk_client
+
+    def metrics(self) -> Metrics:
+        """Synchronous metrics interface"""
+        return Metrics(self.url, self.timeout, self.token)
+
+    # New asynchronous methods
+    def async_flow(self) -> AsyncFlow:
+        """Asynchronous REST-based flow interface"""
+        if self._async_flow is None:
+            self._async_flow = AsyncFlow(self.url, self.timeout, self.token)
+        return self._async_flow
+
+    def async_socket(self) -> AsyncSocketClient:
+        """Asynchronous WebSocket-based interface for streaming operations"""
+        if self._async_socket_client is None:
+            self._async_socket_client = AsyncSocketClient(self.url, self.timeout, self.token)
+        return self._async_socket_client
+
+    def async_bulk(self) -> AsyncBulkClient:
+        """Asynchronous bulk operations interface for import/export"""
+        if self._async_bulk_client is None:
+            self._async_bulk_client = AsyncBulkClient(self.url, self.timeout, self.token)
+        return self._async_bulk_client
+
+    def async_metrics(self) -> AsyncMetrics:
+        """Asynchronous metrics interface"""
+        return AsyncMetrics(self.url, self.timeout, self.token)
+
+    # Resource management
+    def close(self) -> None:
+        """Close all synchronous connections"""
+        if self._socket_client:
+            self._socket_client.close()
+        if self._bulk_client:
+            self._bulk_client.close()
+
+    async def aclose(self) -> None:
+        """Close all asynchronous connections"""
+        if self._async_socket_client:
+            await self._async_socket_client.aclose()
+        if self._async_bulk_client:
+            await self._async_bulk_client.aclose()
+        if self._async_flow:
+            await self._async_flow.aclose()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *args):
+        await self.aclose()
+```
+
+#### 2. Synchronous WebSocket Client
+
+Module: `trustgraph-base/trustgraph/api/socket_client.py` (new)
+
+**SocketClient Class**:
+
+```python
+class SocketClient:
+    """Synchronous WebSocket client"""
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = self._convert_to_ws_url(url)
+        self.timeout = timeout
+        self.token = token
+        self._connection = None
+        self._request_counter = 0
+
+    def flow(self, flow_id: str) -> SocketFlowInstance:
+        """Get flow instance for WebSocket operations"""
+        return SocketFlowInstance(self, flow_id)
+
+    def _connect(self) -> WebSocket:
+        """Establish WebSocket connection (lazy)"""
+        # Uses asyncio.run() internally to wrap async websockets library
+        pass
+
+    def _send_request(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any],
+        streaming: bool = False
+    ) -> Union[Dict[str, Any], Iterator[Dict[str, Any]]]:
+        """Send request and handle response/streaming"""
+        # Synchronous wrapper around async WebSocket calls
+        pass
+
+    def close(self) -> None:
+        """Close WebSocket connection"""
+        pass
+
+class SocketFlowInstance:
+    """Synchronous WebSocket flow instance with same interface as REST FlowInstance"""
+    def __init__(self, client: SocketClient, flow_id: str):
+        self.client = client
+        self.flow_id = flow_id
+
+    # Same method signatures as FlowInstance
+    def agent(
+        self,
+        question: str,
+        user: str,
+        state: Optional[Dict[str, Any]] = None,
+        group: Optional[str] = None,
+        history: Optional[List[Dict[str, Any]]] = None,
+        streaming: bool = False,
+        **kwargs
+    ) -> Union[Dict[str, Any], Iterator[Dict[str, Any]]]:
+        """Agent with optional streaming"""
+        pass
+
+    def text_completion(
+        self,
+        system: str,
+        prompt: str,
+        streaming: bool = False,
+        **kwargs
+    ) -> Union[str, Iterator[str]]:
+        """Text completion with optional streaming"""
+        pass
+
+    # ... similar for graph_rag, document_rag, prompt, etc.
+```
+
+**Key Features**:
+- Lazy connection (only connects when first request sent)
+- Request multiplexing (up to 15 concurrent)
+- Automatic reconnection on disconnect
+- Streaming response parsing
+- Thread-safe operation
+- Synchronous wrapper around async websockets library
+
+#### 3. Asynchronous WebSocket Client
+
+Module: `trustgraph-base/trustgraph/api/async_socket_client.py` (new)
+
+**AsyncSocketClient Class**:
+
+```python
+class AsyncSocketClient:
+    """Asynchronous WebSocket client"""
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = self._convert_to_ws_url(url)
+        self.timeout = timeout
+        self.token = token
+        self._connection = None
+        self._request_counter = 0
+
+    def flow(self, flow_id: str) -> AsyncSocketFlowInstance:
+        """Get async flow instance for WebSocket operations"""
+        return AsyncSocketFlowInstance(self, flow_id)
+
+    async def _connect(self) -> WebSocket:
+        """Establish WebSocket connection (lazy)"""
+        # Native async websockets library
+        pass
+
+    async def _send_request(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any],
+        streaming: bool = False
+    ) -> Union[Dict[str, Any], AsyncIterator[Dict[str, Any]]]:
+        """Send request and handle response/streaming"""
+        pass
+
+    async def aclose(self) -> None:
+        """Close WebSocket connection"""
+        pass
+
+class AsyncSocketFlowInstance:
+    """Asynchronous WebSocket flow instance"""
+    def __init__(self, client: AsyncSocketClient, flow_id: str):
+        self.client = client
+        self.flow_id = flow_id
+
+    # Same method signatures as FlowInstance (but async)
+    async def agent(
+        self,
+        question: str,
+        user: str,
+        state: Optional[Dict[str, Any]] = None,
+        group: Optional[str] = None,
+        history: Optional[List[Dict[str, Any]]] = None,
+        streaming: bool = False,
+        **kwargs
+    ) -> Union[Dict[str, Any], AsyncIterator[Dict[str, Any]]]:
+        """Agent with optional streaming"""
+        pass
+
+    async def text_completion(
+        self,
+        system: str,
+        prompt: str,
+        streaming: bool = False,
+        **kwargs
+    ) -> Union[str, AsyncIterator[str]]:
+        """Text completion with optional streaming"""
+        pass
+
+    # ... similar for graph_rag, document_rag, prompt, etc.
+```
+
+**Key Features**:
+- Native async/await support
+- Efficient for async applications (FastAPI, aiohttp)
+- No thread blocking
+- Same interface as sync version
+- AsyncIterator for streaming
+
+#### 4. Synchronous Bulk Operations Client
+
+Module: `trustgraph-base/trustgraph/api/bulk_client.py` (new)
+
+**BulkClient Class**:
+
+```python
+class BulkClient:
+    """Synchronous bulk operations client"""
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = self._convert_to_ws_url(url)
+        self.timeout = timeout
+        self.token = token
+
+    def import_triples(
+        self,
+        flow: str,
+        triples: Iterator[Triple],
+        **kwargs
+    ) -> None:
+        """Bulk import triples via WebSocket"""
+        pass
+
+    def export_triples(
+        self,
+        flow: str,
+        **kwargs
+    ) -> Iterator[Triple]:
+        """Bulk export triples via WebSocket"""
+        pass
+
+    def import_graph_embeddings(
+        self,
+        flow: str,
+        embeddings: Iterator[Dict[str, Any]],
+        **kwargs
+    ) -> None:
+        """Bulk import graph embeddings via WebSocket"""
+        pass
+
+    def export_graph_embeddings(
+        self,
+        flow: str,
+        **kwargs
+    ) -> Iterator[Dict[str, Any]]:
+        """Bulk export graph embeddings via WebSocket"""
+        pass
+
+    # ... similar for document embeddings, entity contexts, objects
+
+    def close(self) -> None:
+        """Close connections"""
+        pass
+```
+
+**Key Features**:
+- Iterator-based for constant memory usage
+- Dedicated WebSocket connections per operation
+- Progress tracking (optional callback)
+- Error handling with partial success reporting
+
+#### 5. Asynchronous Bulk Operations Client
+
+Module: `trustgraph-base/trustgraph/api/async_bulk_client.py` (new)
+
+**AsyncBulkClient Class**:
+
+```python
+class AsyncBulkClient:
+    """Asynchronous bulk operations client"""
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = self._convert_to_ws_url(url)
+        self.timeout = timeout
+        self.token = token
+
+    async def import_triples(
+        self,
+        flow: str,
+        triples: AsyncIterator[Triple],
+        **kwargs
+    ) -> None:
+        """Bulk import triples via WebSocket"""
+        pass
+
+    async def export_triples(
+        self,
+        flow: str,
+        **kwargs
+    ) -> AsyncIterator[Triple]:
+        """Bulk export triples via WebSocket"""
+        pass
+
+    async def import_graph_embeddings(
+        self,
+        flow: str,
+        embeddings: AsyncIterator[Dict[str, Any]],
+        **kwargs
+    ) -> None:
+        """Bulk import graph embeddings via WebSocket"""
+        pass
+
+    async def export_graph_embeddings(
+        self,
+        flow: str,
+        **kwargs
+    ) -> AsyncIterator[Dict[str, Any]]:
+        """Bulk export graph embeddings via WebSocket"""
+        pass
+
+    # ... similar for document embeddings, entity contexts, objects
+
+    async def aclose(self) -> None:
+        """Close connections"""
+        pass
+```
+
+**Key Features**:
+- AsyncIterator-based for constant memory usage
+- Efficient for async applications
+- Native async/await support
+- Same interface as sync version
+
+#### 6. REST Flow API (Synchronous - Unchanged)
+
+Module: `trustgraph-base/trustgraph/api/flow.py`
+
+The REST Flow API remains **completely unchanged** for backward compatibility. All existing methods continue to work:
+
+- `Flow.list()`, `Flow.start()`, `Flow.stop()`, etc.
+- `FlowInstance.agent()`, `FlowInstance.text_completion()`, `FlowInstance.graph_rag()`, etc.
+- All existing signatures and return types preserved
+
+**New**: Add `graph_embeddings_query()` to REST FlowInstance for feature parity:
+
+```python
+class FlowInstance:
+    # All existing methods unchanged...
+
+    # New: Graph embeddings query (REST)
+    def graph_embeddings_query(
+        self,
+        text: str,
+        user: str,
+        collection: str,
+        limit: int = 10,
+        **kwargs
+    ) -> List[Dict[str, Any]]:
+        """Query graph embeddings for semantic search"""
+        # Calls POST /api/v1/flow/{flow}/service/graph-embeddings
+        pass
+```
+
+#### 7. Asynchronous REST Flow API
+
+Module: `trustgraph-base/trustgraph/api/async_flow.py` (new)
+
+**AsyncFlow and AsyncFlowInstance Classes**:
+
+```python
+class AsyncFlow:
+    """Asynchronous REST-based flow interface"""
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = url
+        self.timeout = timeout
+        self.token = token
+
+    async def list(self) -> List[Dict[str, Any]]:
+        """List all flows"""
+        pass
+
+    async def get(self, id: str) -> Dict[str, Any]:
+        """Get flow definition"""
+        pass
+
+    async def start(self, class_name: str, id: str, description: str, parameters: Dict) -> None:
+        """Start a flow"""
+        pass
+
+    async def stop(self, id: str) -> None:
+        """Stop a flow"""
+        pass
+
+    def id(self, flow_id: str) -> AsyncFlowInstance:
+        """Get async flow instance"""
+        return AsyncFlowInstance(self.url, self.timeout, self.token, flow_id)
+
+    async def aclose(self) -> None:
+        """Close connection"""
+        pass
+
+class AsyncFlowInstance:
+    """Asynchronous REST flow instance"""
+
+    async def agent(
+        self,
+        question: str,
+        user: str,
+        state: Optional[Dict[str, Any]] = None,
+        group: Optional[str] = None,
+        history: Optional[List[Dict[str, Any]]] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """Async agent execution"""
+        pass
+
+    async def text_completion(
+        self,
+        system: str,
+        prompt: str,
+        **kwargs
+    ) -> str:
+        """Async text completion"""
+        pass
+
+    async def graph_rag(
+        self,
+        question: str,
+        user: str,
+        collection: str,
+        **kwargs
+    ) -> str:
+        """Async graph RAG"""
+        pass
+
+    # ... all other FlowInstance methods as async versions
+```
+
+**Key Features**:
+- Native async HTTP using `aiohttp` or `httpx`
+- Same method signatures as sync REST API
+- No streaming (use `async_socket()` for streaming)
+- Efficient for async applications
+
+#### 8. Metrics API
+
+Module: `trustgraph-base/trustgraph/api/metrics.py` (new)
+
+**Synchronous Metrics**:
+
+```python
+class Metrics:
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = url
+        self.timeout = timeout
+        self.token = token
+
+    def get(self) -> str:
+        """Get Prometheus metrics as text"""
+        # Call GET /api/metrics
+        pass
+```
+
+**Asynchronous Metrics**:
+
+```python
+class AsyncMetrics:
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = url
+        self.timeout = timeout
+        self.token = token
+
+    async def get(self) -> str:
+        """Get Prometheus metrics as text"""
+        # Call GET /api/metrics
+        pass
+```
+
+#### 9. Enhanced Types
+
+Module: `trustgraph-base/trustgraph/api/types.py` (modified)
+
+**New Types**:
+
+```python
+from typing import Iterator, Union, Dict, Any
+import dataclasses
+
+@dataclasses.dataclass
+class StreamingChunk:
+    """Base class for streaming chunks"""
+    content: str
+    end_of_message: bool = False
+
+@dataclasses.dataclass
+class AgentThought(StreamingChunk):
+    """Agent reasoning chunk"""
+    chunk_type: str = "thought"
+
+@dataclasses.dataclass
+class AgentObservation(StreamingChunk):
+    """Agent tool observation chunk"""
+    chunk_type: str = "observation"
+
+@dataclasses.dataclass
+class AgentAnswer(StreamingChunk):
+    """Agent final answer chunk"""
+    chunk_type: str = "final-answer"
+    end_of_dialog: bool = False
+
+@dataclasses.dataclass
+class RAGChunk(StreamingChunk):
+    """RAG streaming chunk"""
+    end_of_stream: bool = False
+    error: Optional[Dict[str, str]] = None
+
+# Type aliases for clarity
+AgentStream = Iterator[Union[AgentThought, AgentObservation, AgentAnswer]]
+RAGStream = Iterator[RAGChunk]
+CompletionStream = Iterator[str]
+```
+
+#### 6. Metrics API
+
+Module: `trustgraph-base/trustgraph/api/metrics.py` (new)
+
+```python
+class Metrics:
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = url
+        self.timeout = timeout
+        self.token = token
+
+    def get(self) -> str:
+        """Get Prometheus metrics as text"""
+        # Call GET /api/metrics
+        pass
+```
+
+### Implementation Approach
+
+#### Phase 1: Core API Enhancement (Week 1)
+
+1. Add `socket()`, `bulk()`, and `metrics()` methods to `Api` class
+2. Implement lazy initialization for WebSocket and bulk clients
+3. Add context manager support (`__enter__`, `__exit__`)
+4. Add `close()` method for cleanup
+5. Add unit tests for API class enhancements
+6. Verify backward compatibility
+
+**Backward Compatibility**: Zero breaking changes. New methods only.
+
+#### Phase 2: WebSocket Client (Week 2-3)
+
+1. Implement `SocketClient` class with connection management
+2. Implement `SocketFlowInstance` with same method signatures as `FlowInstance`
+3. Add request multiplexing support (up to 15 concurrent)
+4. Add streaming response parsing for different chunk types
+5. Add automatic reconnection logic
+6. Add unit and integration tests
+7. Document WebSocket usage patterns
+
+**Backward Compatibility**: New interface only. Zero impact on existing code.
+
+#### Phase 3: Streaming Support (Week 3-4)
+
+1. Add streaming chunk type classes (`AgentThought`, `AgentObservation`, `AgentAnswer`, `RAGChunk`)
+2. Implement streaming response parsing in `SocketClient`
+3. Add streaming parameter to all LLM methods in `SocketFlowInstance`
+4. Handle error cases during streaming
+5. Add unit and integration tests for streaming
+6. Add streaming examples to documentation
+
+**Backward Compatibility**: New interface only. Existing REST API unchanged.
+
+#### Phase 4: Bulk Operations (Week 4-5)
+
+1. Implement `BulkClient` class
+2. Add bulk import/export methods for triples, embeddings, contexts, objects
+3. Implement iterator-based processing for constant memory
+4. Add progress tracking (optional callback)
+5. Add error handling with partial success reporting
+6. Add unit and integration tests
+7. Add bulk operation examples
+
+**Backward Compatibility**: New interface only. Zero impact on existing code.
+
+#### Phase 5: Feature Parity & Polish (Week 5)
+
+1. Add `graph_embeddings_query()` to REST `FlowInstance`
+2. Implement `Metrics` class
+3. Add comprehensive integration tests
+4. Performance benchmarking
+5. Update all documentation
+6. Create migration guide
+
+**Backward Compatibility**: New methods only. Zero impact on existing code.
+
+### Data Models
+
+#### Interface Selection
+
+```python
+# Single API instance, same URL for all interfaces
+api = Api(url="http://localhost:8088/")
+
+# Synchronous interfaces
+rest_flow = api.flow().id("default")           # Sync REST
+socket_flow = api.socket().flow("default")     # Sync WebSocket
+bulk = api.bulk()                               # Sync bulk operations
+metrics = api.metrics()                         # Sync metrics
+
+# Asynchronous interfaces
+async_rest_flow = api.async_flow().id("default")      # Async REST
+async_socket_flow = api.async_socket().flow("default") # Async WebSocket
+async_bulk = api.async_bulk()                          # Async bulk operations
+async_metrics = api.async_metrics()                    # Async metrics
+```
+
+#### Streaming Response Types
+
+**Agent Streaming**:
+
+```python
+api = Api(url="http://localhost:8088/")
+
+# REST interface - non-streaming (existing)
+rest_flow = api.flow().id("default")
+response = rest_flow.agent(question="What is ML?", user="user123")
+print(response["response"])
+
+# WebSocket interface - non-streaming (same signature)
+socket_flow = api.socket().flow("default")
+response = socket_flow.agent(question="What is ML?", user="user123")
+print(response["response"])
+
+# WebSocket interface - streaming (new)
+for chunk in socket_flow.agent(question="What is ML?", user="user123", streaming=True):
+    if isinstance(chunk, AgentThought):
+        print(f"Thinking: {chunk.content}")
+    elif isinstance(chunk, AgentObservation):
+        print(f"Observed: {chunk.content}")
+    elif isinstance(chunk, AgentAnswer):
+        print(f"Answer: {chunk.content}")
+        if chunk.end_of_dialog:
+            break
+```
+
+**RAG Streaming**:
+
+```python
+api = Api(url="http://localhost:8088/")
+
+# REST interface - non-streaming (existing)
+rest_flow = api.flow().id("default")
+response = rest_flow.graph_rag(question="What is Python?", user="user123", collection="default")
+print(response)
+
+# WebSocket interface - streaming (new)
+socket_flow = api.socket().flow("default")
+for chunk in socket_flow.graph_rag(
+    question="What is Python?",
+    user="user123",
+    collection="default",
+    streaming=True
+):
+    print(chunk.content, end="", flush=True)
+    if chunk.end_of_stream:
+        break
+```
+
+**Bulk Operations (Synchronous)**:
+
+```python
+api = Api(url="http://localhost:8088/")
+
+# Bulk import triples
+def triple_generator():
+    yield Triple(s="http://ex.com/alice", p="http://ex.com/type", o="Person")
+    yield Triple(s="http://ex.com/alice", p="http://ex.com/name", o="Alice")
+    yield Triple(s="http://ex.com/bob", p="http://ex.com/type", o="Person")
+
+bulk = api.bulk()
+bulk.import_triples(flow="default", triples=triple_generator())
+
+# Bulk export triples
+for triple in bulk.export_triples(flow="default"):
+    print(f"{triple.s} -> {triple.p} -> {triple.o}")
+```
+
+**Bulk Operations (Asynchronous)**:
+
+```python
+import asyncio
+
+async def main():
+    api = Api(url="http://localhost:8088/")
+
+    # Async bulk import triples
+    async def async_triple_generator():
+        yield Triple(s="http://ex.com/alice", p="http://ex.com/type", o="Person")
+        yield Triple(s="http://ex.com/alice", p="http://ex.com/name", o="Alice")
+        yield Triple(s="http://ex.com/bob", p="http://ex.com/type", o="Person")
+
+    bulk = api.async_bulk()
+    await bulk.import_triples(flow="default", triples=async_triple_generator())
+
+    # Async bulk export triples
+    async for triple in bulk.export_triples(flow="default"):
+        print(f"{triple.s} -> {triple.p} -> {triple.o}")
+
+asyncio.run(main())
+```
+
+**Async REST Example**:
+
+```python
+import asyncio
+
+async def main():
+    api = Api(url="http://localhost:8088/")
+
+    # Async REST flow operations
+    flow = api.async_flow().id("default")
+    response = await flow.agent(question="What is ML?", user="user123")
+    print(response["response"])
+
+asyncio.run(main())
+```
+
+**Async WebSocket Streaming Example**:
+
+```python
+import asyncio
+
+async def main():
+    api = Api(url="http://localhost:8088/")
+
+    # Async WebSocket streaming
+    socket = api.async_socket()
+    flow = socket.flow("default")
+
+    async for chunk in flow.agent(question="What is ML?", user="user123", streaming=True):
+        if isinstance(chunk, AgentAnswer):
+            print(chunk.content, end="", flush=True)
+            if chunk.end_of_dialog:
+                break
+
+asyncio.run(main())
+```
+
+### APIs
+
+#### New APIs
+
+1. **Core API Class**:
+   - **Synchronous**:
+     - `Api.socket()` - Get synchronous WebSocket client
+     - `Api.bulk()` - Get synchronous bulk operations client
+     - `Api.metrics()` - Get synchronous metrics client
+     - `Api.close()` - Close all synchronous connections
+     - Context manager support (`__enter__`, `__exit__`)
+   - **Asynchronous**:
+     - `Api.async_flow()` - Get asynchronous REST flow client
+     - `Api.async_socket()` - Get asynchronous WebSocket client
+     - `Api.async_bulk()` - Get asynchronous bulk operations client
+     - `Api.async_metrics()` - Get asynchronous metrics client
+     - `Api.aclose()` - Close all asynchronous connections
+     - Async context manager support (`__aenter__`, `__aexit__`)
+
+2. **Synchronous WebSocket Client**:
+   - `SocketClient.flow(flow_id)` - Get WebSocket flow instance
+   - `SocketFlowInstance.agent(..., streaming: bool = False)` - Agent with optional streaming
+   - `SocketFlowInstance.text_completion(..., streaming: bool = False)` - Text completion with optional streaming
+   - `SocketFlowInstance.graph_rag(..., streaming: bool = False)` - Graph RAG with optional streaming
+   - `SocketFlowInstance.document_rag(..., streaming: bool = False)` - Document RAG with optional streaming
+   - `SocketFlowInstance.prompt(..., streaming: bool = False)` - Prompt with optional streaming
+   - `SocketFlowInstance.graph_embeddings_query()` - Graph embeddings query
+   - All other FlowInstance methods with identical signatures
+
+3. **Asynchronous WebSocket Client**:
+   - `AsyncSocketClient.flow(flow_id)` - Get async WebSocket flow instance
+   - `AsyncSocketFlowInstance.agent(..., streaming: bool = False)` - Async agent with optional streaming
+   - `AsyncSocketFlowInstance.text_completion(..., streaming: bool = False)` - Async text completion with optional streaming
+   - `AsyncSocketFlowInstance.graph_rag(..., streaming: bool = False)` - Async graph RAG with optional streaming
+   - `AsyncSocketFlowInstance.document_rag(..., streaming: bool = False)` - Async document RAG with optional streaming
+   - `AsyncSocketFlowInstance.prompt(..., streaming: bool = False)` - Async prompt with optional streaming
+   - `AsyncSocketFlowInstance.graph_embeddings_query()` - Async graph embeddings query
+   - All other FlowInstance methods as async versions
+
+4. **Synchronous Bulk Operations Client**:
+   - `BulkClient.import_triples(flow, triples)` - Bulk triple import
+   - `BulkClient.export_triples(flow)` - Bulk triple export
+   - `BulkClient.import_graph_embeddings(flow, embeddings)` - Bulk graph embeddings import
+   - `BulkClient.export_graph_embeddings(flow)` - Bulk graph embeddings export
+   - `BulkClient.import_document_embeddings(flow, embeddings)` - Bulk document embeddings import
+   - `BulkClient.export_document_embeddings(flow)` - Bulk document embeddings export
+   - `BulkClient.import_entity_contexts(flow, contexts)` - Bulk entity contexts import
+   - `BulkClient.export_entity_contexts(flow)` - Bulk entity contexts export
+   - `BulkClient.import_objects(flow, objects)` - Bulk objects import
+
+5. **Asynchronous Bulk Operations Client**:
+   - `AsyncBulkClient.import_triples(flow, triples)` - Async bulk triple import
+   - `AsyncBulkClient.export_triples(flow)` - Async bulk triple export
+   - `AsyncBulkClient.import_graph_embeddings(flow, embeddings)` - Async bulk graph embeddings import
+   - `AsyncBulkClient.export_graph_embeddings(flow)` - Async bulk graph embeddings export
+   - `AsyncBulkClient.import_document_embeddings(flow, embeddings)` - Async bulk document embeddings import
+   - `AsyncBulkClient.export_document_embeddings(flow)` - Async bulk document embeddings export
+   - `AsyncBulkClient.import_entity_contexts(flow, contexts)` - Async bulk entity contexts import
+   - `AsyncBulkClient.export_entity_contexts(flow)` - Async bulk entity contexts export
+   - `AsyncBulkClient.import_objects(flow, objects)` - Async bulk objects import
+
+6. **Asynchronous REST Flow Client**:
+   - `AsyncFlow.list()` - Async list all flows
+   - `AsyncFlow.get(id)` - Async get flow definition
+   - `AsyncFlow.start(...)` - Async start flow
+   - `AsyncFlow.stop(id)` - Async stop flow
+   - `AsyncFlow.id(flow_id)` - Get async flow instance
+   - `AsyncFlowInstance.agent(...)` - Async agent execution
+   - `AsyncFlowInstance.text_completion(...)` - Async text completion
+   - `AsyncFlowInstance.graph_rag(...)` - Async graph RAG
+   - All other FlowInstance methods as async versions
+
+7. **Metrics Clients**:
+   - `Metrics.get()` - Synchronous Prometheus metrics
+   - `AsyncMetrics.get()` - Asynchronous Prometheus metrics
+
+8. **REST Flow API Enhancement**:
+   - `FlowInstance.graph_embeddings_query()` - Graph embeddings query (sync feature parity)
+   - `AsyncFlowInstance.graph_embeddings_query()` - Graph embeddings query (async feature parity)
+
+#### Modified APIs
+
+1. **Constructor** (minor enhancement):
+   ```python
+   Api(url: str, timeout: int = 60, token: Optional[str] = None)
+   ```
+   - Added `token` parameter (optional, for authentication)
+   - If `None` (default): No authentication used
+   - If specified: Used as bearer token for REST (`Authorization: Bearer <token>`), query param for WebSocket (`?token=<token>`)
+   - No other changes - fully backward compatible
+
+2. **No Breaking Changes**:
+   - All existing REST API methods unchanged
+   - All existing signatures preserved
+   - All existing return types preserved
+
+### Implementation Details
+
+#### Error Handling
+
+**WebSocket Connection Errors**:
+```python
+try:
+    api = Api(url="http://localhost:8088/")
+    socket = api.socket()
+    socket_flow = socket.flow("default")
+    response = socket_flow.agent(question="...", user="user123")
+except ConnectionError as e:
+    print(f"WebSocket connection failed: {e}")
+    print("Hint: Ensure Gateway is running and WebSocket endpoint is accessible")
+```
+
+**Graceful Fallback**:
+```python
+api = Api(url="http://localhost:8088/")
+
+try:
+    # Try WebSocket streaming first
+    socket_flow = api.socket().flow("default")
+    for chunk in socket_flow.agent(question="...", user="...", streaming=True):
+        print(chunk.content)
+except ConnectionError:
+    # Fall back to REST non-streaming
+    print("WebSocket unavailable, falling back to REST")
+    rest_flow = api.flow().id("default")
+    response = rest_flow.agent(question="...", user="...")
+    print(response["response"])
+```
+
+**Partial Streaming Errors**:
+```python
+api = Api(url="http://localhost:8088/")
+socket_flow = api.socket().flow("default")
+
+accumulated = []
+try:
+    for chunk in socket_flow.graph_rag(question="...", streaming=True):
+        accumulated.append(chunk.content)
+        if chunk.error:
+            print(f"Error occurred: {chunk.error}")
+            print(f"Partial response: {''.join(accumulated)}")
+            break
+except Exception as e:
+    print(f"Streaming error: {e}")
+    print(f"Partial response: {''.join(accumulated)}")
+```
+
+#### Resource Management
+
+**Context Manager Support**:
+```python
+# Automatic cleanup
+with Api(url="http://localhost:8088/") as api:
+    socket_flow = api.socket().flow("default")
+    response = socket_flow.agent(question="...", user="user123")
+# All connections automatically closed
+
+# Manual cleanup
+api = Api(url="http://localhost:8088/")
+try:
+    socket_flow = api.socket().flow("default")
+    response = socket_flow.agent(question="...", user="user123")
+finally:
+    api.close()  # Explicitly close all connections (WebSocket, bulk, etc.)
+```
+
+#### Threading and Concurrency
+
+**Thread Safety**:
+- Each `Api` instance maintains its own connection
+- WebSocket transport uses locks for thread-safe request multiplexing
+- Multiple threads can share an `Api` instance safely
+- Streaming iterators are not thread-safe (consume from single thread)
+
+**Async Support** (future consideration):
+```python
+# Phase 2 enhancement (not in initial scope)
+import asyncio
+
+async def main():
+    api = await AsyncApi(url="ws://localhost:8088/")
+    flow = api.flow().id("default")
+
+    async for chunk in flow.agent(question="...", streaming=True):
+        print(chunk.content)
+
+    await api.close()
+
+asyncio.run(main())
+```
+
+## Security Considerations
+
+### Authentication
+
+**Token Parameter**:
+```python
+# No authentication (default)
+api = Api(url="http://localhost:8088/")
+
+# With authentication
+api = Api(url="http://localhost:8088/", token="mytoken")
+```
+
+**REST Transport**:
+- Bearer token via `Authorization` header
+- Applied automatically to all REST requests
+- Format: `Authorization: Bearer <token>`
+
+**WebSocket Transport**:
+- Token via query parameter appended to WebSocket URL
+- Applied automatically during connection establishment
+- Format: `ws://localhost:8088/api/v1/socket?token=<token>`
+
+**Implementation**:
+```python
+class SocketClient:
+    def _connect(self) -> WebSocket:
+        # Construct WebSocket URL with optional token
+        ws_url = f"{self.url}/api/v1/socket"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+        # Connect to WebSocket
+        return websocket.connect(ws_url)
+```
+
+**Example**:
+```python
+# REST with auth
+api = Api(url="http://localhost:8088/", token="mytoken")
+flow = api.flow().id("default")
+# All REST calls include: Authorization: Bearer mytoken
+
+# WebSocket with auth
+socket = api.socket()
+# Connects to: ws://localhost:8088/api/v1/socket?token=mytoken
+```
+
+### Secure Communication
+
+- Support both WS (WebSocket) and WSS (WebSocket Secure) schemes
+- TLS certificate validation for WSS connections
+- Optional certificate verification disable for development (with warning)
+
+### Input Validation
+
+- Validate URL schemes (http, https, ws, wss)
+- Validate transport parameter values
+- Validate streaming parameter combinations
+- Validate bulk import data types
+
+## Performance Considerations
+
+### Latency Improvements
+
+**Streaming LLM Operations**:
+- **Time-to-first-token**: ~500ms (vs ~30s non-streaming)
+- **Improvement**: 60x faster perceived performance
+- **Applicable to**: Agent, Graph RAG, Document RAG, Text Completion, Prompt
+
+**Persistent Connections**:
+- **Connection overhead**: Eliminated for subsequent requests
+- **WebSocket handshake**: One-time cost (~100ms)
+- **Applicable to**: All operations when using WebSocket transport
+
+### Throughput Improvements
+
+**Bulk Operations**:
+- **Triples import**: ~10,000 triples/second (vs ~100/second with REST per-item)
+- **Embeddings import**: ~5,000 embeddings/second (vs ~50/second with REST per-item)
+- **Improvement**: 100x throughput for bulk operations
+
+**Request Multiplexing**:
+- **Concurrent requests**: Up to 15 simultaneous requests over single connection
+- **Connection reuse**: No connection overhead for concurrent operations
+
+### Memory Considerations
+
+**Streaming Responses**:
+- Constant memory usage (process chunks as they arrive)
+- No buffering of complete response
+- Suitable for very long outputs (>1MB)
+
+**Bulk Operations**:
+- Iterator-based processing (constant memory)
+- No loading of entire dataset into memory
+- Suitable for datasets with millions of items
+
+### Benchmarks (Expected)
+
+| Operation | REST (existing) | WebSocket (streaming) | Improvement |
+|-----------|----------------|----------------------|-------------|
+| Agent (time-to-first-token) | 30s | 0.5s | 60x |
+| Graph RAG (time-to-first-token) | 25s | 0.5s | 50x |
+| Import 10K triples | 100s | 1s | 100x |
+| Import 1M triples | 10,000s (2.7h) | 100s (1.6m) | 100x |
+| 10 concurrent small requests | 5s (sequential) | 0.5s (parallel) | 10x |
+
+## Testing Strategy
+
+### Unit Tests
+
+**Transport Layer** (`test_transport.py`):
+- Test REST transport request/response
+- Test WebSocket transport connection
+- Test WebSocket transport reconnection
+- Test request multiplexing
+- Test streaming response parsing
+- Mock WebSocket server for deterministic tests
+
+**API Methods** (`test_flow.py`, `test_library.py`, etc.):
+- Test new methods with mocked transport
+- Test streaming parameter handling
+- Test bulk operation iterators
+- Test error handling
+
+**Types** (`test_types.py`):
+- Test new streaming chunk types
+- Test type serialization/deserialization
+
+### Integration Tests
+
+**End-to-End REST** (`test_integration_rest.py`):
+- Test all operations against real Gateway (REST mode)
+- Verify backward compatibility
+- Test error conditions
+
+**End-to-End WebSocket** (`test_integration_websocket.py`):
+- Test all operations against real Gateway (WebSocket mode)
+- Test streaming operations
+- Test bulk operations
+- Test concurrent requests
+- Test connection recovery
+
+**Streaming Services** (`test_streaming_integration.py`):
+- Test agent streaming (thoughts, observations, answers)
+- Test RAG streaming (incremental chunks)
+- Test text completion streaming (token-by-token)
+- Test prompt streaming
+- Test error handling during streaming
+
+**Bulk Operations** (`test_bulk_integration.py`):
+- Test bulk import/export of triples (1K, 10K, 100K items)
+- Test bulk import/export of embeddings
+- Test memory usage during bulk operations
+- Test progress tracking
+
+### Performance Tests
+
+**Latency Benchmarks** (`test_performance_latency.py`):
+- Measure time-to-first-token (streaming vs non-streaming)
+- Measure connection overhead (REST vs WebSocket)
+- Compare against expected benchmarks
+
+**Throughput Benchmarks** (`test_performance_throughput.py`):
+- Measure bulk import throughput
+- Measure request multiplexing efficiency
+- Compare against expected benchmarks
+
+### Compatibility Tests
+
+**Backward Compatibility** (`test_backward_compatibility.py`):
+- Run existing test suite against refactored API
+- Verify zero breaking changes
+- Test migration path for common patterns
+
+## Migration Plan
+
+### Phase 1: Transparent Migration (Default)
+
+**No code changes required**. Existing code continues to work:
+
+```python
+# Existing code works unchanged
+api = Api(url="http://localhost:8088/")
+flow = api.flow().id("default")
+response = flow.agent(question="What is ML?", user="user123")
+```
+
+### Phase 2: Opt-in Streaming (Simple)
+
+**Use `api.socket()` interface** to enable streaming:
+
+```python
+# Before: Non-streaming REST
+api = Api(url="http://localhost:8088/")
+rest_flow = api.flow().id("default")
+response = rest_flow.agent(question="What is ML?", user="user123")
+print(response["response"])
+
+# After: Streaming WebSocket (same parameters!)
+api = Api(url="http://localhost:8088/")  # Same URL
+socket_flow = api.socket().flow("default")
+
+for chunk in socket_flow.agent(question="What is ML?", user="user123", streaming=True):
+    if isinstance(chunk, AgentAnswer):
+        print(chunk.content, end="", flush=True)
+```
+
+**Key Points**:
+- Same URL for both REST and WebSocket
+- Same method signatures (easy migration)
+- Just add `.socket()` and `streaming=True`
+
+### Phase 3: Bulk Operations (New Capability)
+
+**Use `api.bulk()` interface** for large datasets:
+
+```python
+# Before: Inefficient per-item operations
+api = Api(url="http://localhost:8088/")
+flow = api.flow().id("default")
+
+for triple in my_large_triple_list:
+    # Slow per-item operations
+    # (no direct bulk insert in REST API)
+    pass
+
+# After: Efficient bulk loading
+api = Api(url="http://localhost:8088/")  # Same URL
+bulk = api.bulk()
+
+# This is fast (10,000 triples/second)
+bulk.import_triples(flow="default", triples=iter(my_large_triple_list))
+```
+
+### Documentation Updates
+
+1. **README.md**: Add streaming and WebSocket examples
+2. **API Reference**: Document all new methods and parameters
+3. **Migration Guide**: Step-by-step guide for enabling streaming
+4. **Examples**: Add example scripts for common patterns
+5. **Performance Guide**: Document expected performance improvements
+
+### Deprecation Policy
+
+**No deprecations**. All existing APIs remain supported. This is a pure enhancement.
+
+## Timeline
+
+### Week 1: Foundation
+- Transport abstraction layer
+- Refactor existing REST code
+- Unit tests for transport layer
+- Backward compatibility verification
+
+### Week 2: WebSocket Transport
+- WebSocket transport implementation
+- Connection management and reconnection
+- Request multiplexing
+- Unit and integration tests
+
+### Week 3: Streaming Support
+- Add streaming parameter to LLM methods
+- Implement streaming response parsing
+- Add streaming chunk types
+- Streaming integration tests
+
+### Week 4: Bulk Operations
+- Add bulk import/export methods
+- Implement iterator-based operations
+- Performance testing
+- Bulk operation integration tests
+
+### Week 5: Feature Parity & Documentation
+- Add graph embeddings query
+- Add metrics API
+- Comprehensive documentation
+- Migration guide
+- Release candidate
+
+### Week 6: Release
+- Final integration testing
+- Performance benchmarking
+- Release documentation
+- Community announcement
+
+**Total Duration**: 6 weeks
+
+## Open Questions
+
+### API Design Questions
+
+1. **Async Support**: ✅ **RESOLVED** - Full async support included in initial release
+   - All interfaces have async variants: `async_flow()`, `async_socket()`, `async_bulk()`, `async_metrics()`
+   - Provides complete symmetry between sync and async APIs
+   - Essential for modern async frameworks (FastAPI, aiohttp)
+
+2. **Progress Tracking**: Should bulk operations support progress callbacks?
+   ```python
+   def progress_callback(processed: int, total: Optional[int]):
+       print(f"Processed {processed} items")
+
+   bulk.import_triples(flow="default", triples=triples, on_progress=progress_callback)
+   ```
+   - **Recommendation**: Add in Phase 2. Not critical for initial release.
+
+3. **Streaming Timeout**: How should we handle timeouts for streaming operations?
+   - **Recommendation**: Use same timeout as non-streaming, but reset on each chunk received.
+
+4. **Chunk Buffering**: Should we buffer chunks or yield immediately?
+   - **Recommendation**: Yield immediately for lowest latency.
+
+5. **Global Services via WebSocket**: Should `api.socket()` support global services (library, knowledge, collection, config) or only flow-scoped services?
+   - **Recommendation**: Start with flow-scoped only (where streaming matters). Add global services if needed in Phase 2.
+
+### Implementation Questions
+
+1. **WebSocket Library**: Should we use `websockets`, `websocket-client`, or `aiohttp`?
+   - **Recommendation**: `websockets` (async, mature, well-maintained). Wrap in sync interface using `asyncio.run()`.
+
+2. **Connection Pooling**: Should we support multiple concurrent `Api` instances sharing a connection pool?
+   - **Recommendation**: Defer to Phase 2. Each `Api` instance has its own connections initially.
+
+3. **Connection Reuse**: Should `SocketClient` and `BulkClient` share the same WebSocket connection, or use separate connections?
+   - **Recommendation**: Separate connections. Simpler implementation, clearer separation of concerns.
+
+4. **Lazy vs Eager Connection**: Should WebSocket connection be established in `api.socket()` or on first request?
+   - **Recommendation**: Lazy (on first request). Avoids connection overhead if user only uses REST methods.
+
+### Testing Questions
+
+1. **Mock Gateway**: Should we create a lightweight mock Gateway for testing, or test against real Gateway?
+   - **Recommendation**: Both. Use mocks for unit tests, real Gateway for integration tests.
+
+2. **Performance Regression Tests**: Should we add automated performance regression testing to CI?
+   - **Recommendation**: Yes, but with generous thresholds to account for CI environment variability.
+
+## References
+
+### Related Tech Specs
+- `docs/tech-specs/streaming-llm-responses.md` - Streaming implementation in Gateway
+- `docs/tech-specs/rag-streaming-support.md` - RAG streaming support
+
+### Implementation Files
+- `trustgraph-base/trustgraph/api/` - Python API source
+- `trustgraph-flow/trustgraph/gateway/` - Gateway source
+- `trustgraph-flow/trustgraph/gateway/dispatch/mux.py` - WebSocket multiplexer reference implementation
+
+### Documentation
+- `docs/apiSpecification.md` - Complete API reference
+- `docs/api-status-summary.md` - API status summary
+- `README.websocket` - WebSocket protocol documentation
+- `STREAMING-IMPLEMENTATION-NOTES.txt` - Streaming implementation notes
+
+### External Libraries
+- `websockets` - Python WebSocket library (https://websockets.readthedocs.io/)
+- `requests` - Python HTTP library (existing)
diff --git a/tests/unit/test_python_api_client.py b/tests/unit/test_python_api_client.py
new file mode 100644
index 00000000..f86ae3da
--- /dev/null
+++ b/tests/unit/test_python_api_client.py
@@ -0,0 +1,446 @@
+"""
+Unit tests for TrustGraph Python API client library
+
+These tests use mocks and do not require a running server.
+"""
+
+import pytest
+from unittest.mock import Mock, patch, MagicMock, call
+import json
+
+from trustgraph.api import (
+    Api,
+    Triple,
+    AgentThought,
+    AgentObservation,
+    AgentAnswer,
+    RAGChunk,
+)
+
+
+class TestApiInstantiation:
+    """Test Api class instantiation and configuration"""
+
+    def test_api_instantiation_defaults(self):
+        """Test Api with default parameters"""
+        api = Api()
+        assert api.url == "http://localhost:8088/api/v1/"
+        assert api.timeout == 60
+        assert api.token is None
+
+    def test_api_instantiation_with_url(self):
+        """Test Api with custom URL"""
+        api = Api(url="http://test-server:9000/")
+        assert api.url == "http://test-server:9000/api/v1/"
+
+    def test_api_instantiation_with_url_trailing_slash(self):
+        """Test Api adds trailing slash if missing"""
+        api = Api(url="http://test-server:9000")
+        assert api.url == "http://test-server:9000/api/v1/"
+
+    def test_api_instantiation_with_token(self):
+        """Test Api with authentication token"""
+        api = Api(token="test-token-123")
+        assert api.token == "test-token-123"
+
+    def test_api_instantiation_with_timeout(self):
+        """Test Api with custom timeout"""
+        api = Api(timeout=120)
+        assert api.timeout == 120
+
+
+class TestApiLazyInitialization:
+    """Test lazy initialization of client components"""
+
+    def test_socket_client_lazy_init(self):
+        """Test socket client is created on first access"""
+        api = Api(url="http://test/", token="token")
+
+        assert api._socket_client is None
+        socket = api.socket()
+        assert api._socket_client is not None
+        assert socket is api._socket_client
+
+        # Second access returns same instance
+        socket2 = api.socket()
+        assert socket2 is socket
+
+    def test_bulk_client_lazy_init(self):
+        """Test bulk client is created on first access"""
+        api = Api(url="http://test/")
+
+        assert api._bulk_client is None
+        bulk = api.bulk()
+        assert api._bulk_client is not None
+
+    def test_async_flow_lazy_init(self):
+        """Test async flow is created on first access"""
+        api = Api(url="http://test/")
+
+        assert api._async_flow is None
+        async_flow = api.async_flow()
+        assert api._async_flow is not None
+
+    def test_metrics_lazy_init(self):
+        """Test metrics client is created on first access"""
+        api = Api(url="http://test/")
+
+        assert api._metrics is None
+        metrics = api.metrics()
+        assert api._metrics is not None
+
+
+class TestApiContextManager:
+    """Test context manager functionality"""
+
+    def test_sync_context_manager(self):
+        """Test synchronous context manager"""
+        with Api(url="http://test/") as api:
+            assert api is not None
+            assert isinstance(api, Api)
+        # Should exit cleanly
+
+    @pytest.mark.asyncio
+    async def test_async_context_manager(self):
+        """Test asynchronous context manager"""
+        async with Api(url="http://test/") as api:
+            assert api is not None
+            assert isinstance(api, Api)
+        # Should exit cleanly
+
+
+class TestFlowClient:
+    """Test Flow client functionality"""
+
+    @patch('requests.post')
+    def test_flow_list(self, mock_post):
+        """Test listing flows"""
+        mock_post.return_value.status_code = 200
+        mock_post.return_value.json.return_value = {"flow-ids": ["flow1", "flow2"]}
+
+        api = Api(url="http://test/")
+        flows = api.flow().list()
+
+        assert flows == ["flow1", "flow2"]
+        assert mock_post.called
+
+    @patch('requests.post')
+    def test_flow_list_with_token(self, mock_post):
+        """Test flow listing includes auth token"""
+        mock_post.return_value.status_code = 200
+        mock_post.return_value.json.return_value = {"flow-ids": []}
+
+        api = Api(url="http://test/", token="my-token")
+        api.flow().list()
+
+        # Verify Authorization header was set
+        call_args = mock_post.call_args
+        headers = call_args[1]['headers'] if 'headers' in call_args[1] else {}
+        assert 'Authorization' in headers
+        assert headers['Authorization'] == 'Bearer my-token'
+
+    @patch('requests.post')
+    def test_flow_get(self, mock_post):
+        """Test getting flow definition"""
+        flow_def = {"name": "test-flow", "description": "Test"}
+        mock_post.return_value.status_code = 200
+        mock_post.return_value.json.return_value = {"flow": json.dumps(flow_def)}
+
+        api = Api(url="http://test/")
+        result = api.flow().get("test-flow")
+
+        assert result == flow_def
+
+    def test_flow_instance_creation(self):
+        """Test creating flow instance"""
+        api = Api(url="http://test/")
+        flow_instance = api.flow().id("my-flow")
+
+        assert flow_instance is not None
+        assert flow_instance.id == "my-flow"
+
+    def test_flow_instance_has_methods(self):
+        """Test flow instance has expected methods"""
+        api = Api(url="http://test/")
+        flow_instance = api.flow().id("my-flow")
+
+        expected_methods = [
+            'text_completion', 'agent', 'graph_rag', 'document_rag',
+            'graph_embeddings_query', 'embeddings', 'prompt',
+            'triples_query', 'objects_query'
+        ]
+
+        for method in expected_methods:
+            assert hasattr(flow_instance, method), f"Missing method: {method}"
+
+
+class TestSocketClient:
+    """Test WebSocket client functionality"""
+
+    def test_socket_client_url_conversion_http(self):
+        """Test HTTP URL converted to WebSocket"""
+        api = Api(url="http://test-server:8088/")
+        socket = api.socket()
+
+        assert socket.url.startswith("ws://")
+        assert "test-server" in socket.url
+
+    def test_socket_client_url_conversion_https(self):
+        """Test HTTPS URL converted to secure WebSocket"""
+        api = Api(url="https://test-server:8088/")
+        socket = api.socket()
+
+        assert socket.url.startswith("wss://")
+
+    def test_socket_client_token_passed(self):
+        """Test token is passed to socket client"""
+        api = Api(url="http://test/", token="socket-token")
+        socket = api.socket()
+
+        assert socket.token == "socket-token"
+
+    def test_socket_flow_instance(self):
+        """Test creating socket flow instance"""
+        api = Api(url="http://test/")
+        socket = api.socket()
+        flow_instance = socket.flow("test-flow")
+
+        assert flow_instance is not None
+        assert flow_instance.flow_id == "test-flow"
+
+    def test_socket_flow_has_methods(self):
+        """Test socket flow instance has expected methods"""
+        api = Api(url="http://test/")
+        flow_instance = api.socket().flow("test-flow")
+
+        expected_methods = [
+            'agent', 'text_completion', 'graph_rag', 'document_rag',
+            'prompt', 'graph_embeddings_query', 'embeddings',
+            'triples_query', 'objects_query', 'mcp_tool'
+        ]
+
+        for method in expected_methods:
+            assert hasattr(flow_instance, method), f"Missing method: {method}"
+
+
+class TestBulkClient:
+    """Test bulk operations client"""
+
+    def test_bulk_client_url_conversion(self):
+        """Test bulk client uses WebSocket URL"""
+        api = Api(url="http://test/")
+        bulk = api.bulk()
+
+        assert bulk.url.startswith("ws://")
+
+    def test_bulk_client_has_import_methods(self):
+        """Test bulk client has import methods"""
+        api = Api(url="http://test/")
+        bulk = api.bulk()
+
+        import_methods = [
+            'import_triples',
+            'import_graph_embeddings',
+            'import_document_embeddings',
+            'import_entity_contexts',
+            'import_objects'
+        ]
+
+        for method in import_methods:
+            assert hasattr(bulk, method), f"Missing method: {method}"
+
+    def test_bulk_client_has_export_methods(self):
+        """Test bulk client has export methods"""
+        api = Api(url="http://test/")
+        bulk = api.bulk()
+
+        export_methods = [
+            'export_triples',
+            'export_graph_embeddings',
+            'export_document_embeddings',
+            'export_entity_contexts'
+        ]
+
+        for method in export_methods:
+            assert hasattr(bulk, method), f"Missing method: {method}"
+
+
+class TestMetricsClient:
+    """Test metrics client"""
+
+    @patch('requests.get')
+    def test_metrics_get(self, mock_get):
+        """Test getting metrics"""
+        mock_get.return_value.status_code = 200
+        mock_get.return_value.text = "# HELP metric_name\nmetric_name 42"
+
+        api = Api(url="http://test/")
+        metrics_text = api.metrics().get()
+
+        assert "metric_name" in metrics_text
+        assert mock_get.called
+
+    @patch('requests.get')
+    def test_metrics_with_token(self, mock_get):
+        """Test metrics request includes token"""
+        mock_get.return_value.status_code = 200
+        mock_get.return_value.text = "metrics"
+
+        api = Api(url="http://test/", token="metrics-token")
+        api.metrics().get()
+
+        # Verify token in headers
+        call_args = mock_get.call_args
+        headers = call_args[1].get('headers', {})
+        assert 'Authorization' in headers
+
+
+class TestStreamingTypes:
+    """Test streaming chunk types"""
+
+    def test_agent_thought_creation(self):
+        """Test creating AgentThought chunk"""
+        chunk = AgentThought(content="thinking...", end_of_message=False)
+
+        assert chunk.content == "thinking..."
+        assert chunk.end_of_message is False
+        assert chunk.chunk_type == "thought"
+
+    def test_agent_observation_creation(self):
+        """Test creating AgentObservation chunk"""
+        chunk = AgentObservation(content="observing...", end_of_message=False)
+
+        assert chunk.content == "observing..."
+        assert chunk.chunk_type == "observation"
+
+    def test_agent_answer_creation(self):
+        """Test creating AgentAnswer chunk"""
+        chunk = AgentAnswer(
+            content="answer",
+            end_of_message=True,
+            end_of_dialog=True
+        )
+
+        assert chunk.content == "answer"
+        assert chunk.end_of_message is True
+        assert chunk.end_of_dialog is True
+        assert chunk.chunk_type == "final-answer"
+
+    def test_rag_chunk_creation(self):
+        """Test creating RAGChunk"""
+        chunk = RAGChunk(
+            content="response chunk",
+            end_of_stream=False,
+            error=None
+        )
+
+        assert chunk.content == "response chunk"
+        assert chunk.end_of_stream is False
+        assert chunk.error is None
+
+    def test_rag_chunk_with_error(self):
+        """Test RAGChunk with error"""
+        error_dict = {"type": "error", "message": "failed"}
+        chunk = RAGChunk(
+            content="",
+            end_of_stream=True,
+            error=error_dict
+        )
+
+        assert chunk.error == error_dict
+
+
+class TestTripleType:
+    """Test Triple data structure"""
+
+    def test_triple_creation(self):
+        """Test creating Triple"""
+        triple = Triple(s="subject", p="predicate", o="object")
+
+        assert triple.s == "subject"
+        assert triple.p == "predicate"
+        assert triple.o == "object"
+
+    def test_triple_with_uris(self):
+        """Test Triple with URI values"""
+        triple = Triple(
+            s="http://example.org/entity1",
+            p="http://example.org/relation",
+            o="http://example.org/entity2"
+        )
+
+        assert triple.s.startswith("http://")
+        assert triple.p.startswith("http://")
+        assert triple.o.startswith("http://")
+
+
+class TestAsyncClients:
+    """Test async client availability"""
+
+    def test_async_flow_creation(self):
+        """Test creating async flow client"""
+        api = Api(url="http://test/")
+        async_flow = api.async_flow()
+
+        assert async_flow is not None
+
+    def test_async_socket_creation(self):
+        """Test creating async socket client"""
+        api = Api(url="http://test/")
+        async_socket = api.async_socket()
+
+        assert async_socket is not None
+        assert async_socket.url.startswith("ws://")
+
+    def test_async_bulk_creation(self):
+        """Test creating async bulk client"""
+        api = Api(url="http://test/")
+        async_bulk = api.async_bulk()
+
+        assert async_bulk is not None
+
+    def test_async_metrics_creation(self):
+        """Test creating async metrics client"""
+        api = Api(url="http://test/")
+        async_metrics = api.async_metrics()
+
+        assert async_metrics is not None
+
+
+class TestErrorHandling:
+    """Test error handling"""
+
+    @patch('requests.post')
+    def test_protocol_exception_on_non_200(self, mock_post):
+        """Test ProtocolException raised on non-200 status"""
+        from trustgraph.api.exceptions import ProtocolException
+
+        mock_post.return_value.status_code = 500
+
+        api = Api(url="http://test/")
+
+        with pytest.raises(ProtocolException):
+            api.flow().list()
+
+    @patch('requests.post')
+    def test_application_exception_on_error_response(self, mock_post):
+        """Test ApplicationException on error in response"""
+        from trustgraph.api.exceptions import ApplicationException
+
+        mock_post.return_value.status_code = 200
+        mock_post.return_value.json.return_value = {
+            "error": {
+                "type": "ValidationError",
+                "message": "Invalid input"
+            }
+        }
+
+        api = Api(url="http://test/")
+
+        with pytest.raises(ApplicationException):
+            api.flow().list()
+
+
+# Run tests with: pytest tests/unit/test_python_api_client.py -v
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/trustgraph-base/trustgraph/api/__init__.py b/trustgraph-base/trustgraph/api/__init__.py
index daa6a964..7a3fc86d 100644
--- a/trustgraph-base/trustgraph/api/__init__.py
+++ b/trustgraph-base/trustgraph/api/__init__.py
@@ -1,3 +1,80 @@
 
-from . api import *
+# Core API
+from .api import Api
+
+# Flow clients
+from .flow import Flow, FlowInstance
+from .async_flow import AsyncFlow, AsyncFlowInstance
+
+# WebSocket clients
+from .socket_client import SocketClient, SocketFlowInstance
+from .async_socket_client import AsyncSocketClient, AsyncSocketFlowInstance
+
+# Bulk operation clients
+from .bulk_client import BulkClient
+from .async_bulk_client import AsyncBulkClient
+
+# Metrics clients
+from .metrics import Metrics
+from .async_metrics import AsyncMetrics
+
+# Types
+from .types import (
+    Triple,
+    ConfigKey,
+    ConfigValue,
+    DocumentMetadata,
+    ProcessingMetadata,
+    CollectionMetadata,
+    StreamingChunk,
+    AgentThought,
+    AgentObservation,
+    AgentAnswer,
+    RAGChunk,
+)
+
+# Exceptions
+from .exceptions import ProtocolException, ApplicationException
+
+__all__ = [
+    # Core API
+    "Api",
+
+    # Flow clients
+    "Flow",
+    "FlowInstance",
+    "AsyncFlow",
+    "AsyncFlowInstance",
+
+    # WebSocket clients
+    "SocketClient",
+    "SocketFlowInstance",
+    "AsyncSocketClient",
+    "AsyncSocketFlowInstance",
+
+    # Bulk operation clients
+    "BulkClient",
+    "AsyncBulkClient",
+
+    # Metrics clients
+    "Metrics",
+    "AsyncMetrics",
+
+    # Types
+    "Triple",
+    "ConfigKey",
+    "ConfigValue",
+    "DocumentMetadata",
+    "ProcessingMetadata",
+    "CollectionMetadata",
+    "StreamingChunk",
+    "AgentThought",
+    "AgentObservation",
+    "AgentAnswer",
+    "RAGChunk",
+
+    # Exceptions
+    "ProtocolException",
+    "ApplicationException",
+]
 
diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py
index b0bae8ce..d1f07513 100644
--- a/trustgraph-base/trustgraph/api/api.py
+++ b/trustgraph-base/trustgraph/api/api.py
@@ -3,6 +3,7 @@ import requests
 import json
 import base64
 import time
+from typing import Optional
 
 from . library import Library
 from . flow import Flow
@@ -26,7 +27,7 @@ def check_error(response):
 
 class Api:
 
-    def __init__(self, url="http://localhost:8088/", timeout=60):
+    def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None):
 
         self.url = url
 
@@ -36,6 +37,16 @@ class Api:
         self.url += "api/v1/"
 
         self.timeout = timeout
+        self.token = token
+
+        # Lazy initialization for new clients
+        self._socket_client = None
+        self._bulk_client = None
+        self._async_flow = None
+        self._async_socket_client = None
+        self._async_bulk_client = None
+        self._metrics = None
+        self._async_metrics = None
 
     def flow(self):
         return Flow(api=self)
@@ -50,8 +61,12 @@ class Api:
 
         url = f"{self.url}{path}"
 
+        headers = {}
+        if self.token:
+            headers["Authorization"] = f"Bearer {self.token}"
+
         # Invoke the API, input is passed as JSON
-        resp = requests.post(url, json=request, timeout=self.timeout)
+        resp = requests.post(url, json=request, timeout=self.timeout, headers=headers)
 
         # Should be a 200 status code
         if resp.status_code != 200:
@@ -72,3 +87,96 @@ class Api:
 
     def collection(self):
         return Collection(self)
+
+    # New synchronous methods
+    def socket(self):
+        """Synchronous WebSocket-based interface for streaming operations"""
+        if self._socket_client is None:
+            from . socket_client import SocketClient
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._socket_client = SocketClient(base_url, self.timeout, self.token)
+        return self._socket_client
+
+    def bulk(self):
+        """Synchronous bulk operations interface for import/export"""
+        if self._bulk_client is None:
+            from . bulk_client import BulkClient
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._bulk_client = BulkClient(base_url, self.timeout, self.token)
+        return self._bulk_client
+
+    def metrics(self):
+        """Synchronous metrics interface"""
+        if self._metrics is None:
+            from . metrics import Metrics
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._metrics = Metrics(base_url, self.timeout, self.token)
+        return self._metrics
+
+    # New asynchronous methods
+    def async_flow(self):
+        """Asynchronous REST-based flow interface"""
+        if self._async_flow is None:
+            from . async_flow import AsyncFlow
+            self._async_flow = AsyncFlow(self.url, self.timeout, self.token)
+        return self._async_flow
+
+    def async_socket(self):
+        """Asynchronous WebSocket-based interface for streaming operations"""
+        if self._async_socket_client is None:
+            from . async_socket_client import AsyncSocketClient
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._async_socket_client = AsyncSocketClient(base_url, self.timeout, self.token)
+        return self._async_socket_client
+
+    def async_bulk(self):
+        """Asynchronous bulk operations interface for import/export"""
+        if self._async_bulk_client is None:
+            from . async_bulk_client import AsyncBulkClient
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._async_bulk_client = AsyncBulkClient(base_url, self.timeout, self.token)
+        return self._async_bulk_client
+
+    def async_metrics(self):
+        """Asynchronous metrics interface"""
+        if self._async_metrics is None:
+            from . async_metrics import AsyncMetrics
+            # Extract base URL (remove api/v1/ suffix)
+            base_url = self.url.rsplit("api/v1/", 1)[0].rstrip("/")
+            self._async_metrics = AsyncMetrics(base_url, self.timeout, self.token)
+        return self._async_metrics
+
+    # Resource management
+    def close(self):
+        """Close all synchronous connections"""
+        if self._socket_client:
+            self._socket_client.close()
+        if self._bulk_client:
+            self._bulk_client.close()
+
+    async def aclose(self):
+        """Close all asynchronous connections"""
+        if self._async_socket_client:
+            await self._async_socket_client.aclose()
+        if self._async_bulk_client:
+            await self._async_bulk_client.aclose()
+        if self._async_flow:
+            await self._async_flow.aclose()
+
+    # Context manager support
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *args):
+        await self.aclose()
diff --git a/trustgraph-base/trustgraph/api/async_bulk_client.py b/trustgraph-base/trustgraph/api/async_bulk_client.py
new file mode 100644
index 00000000..76cb9f56
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/async_bulk_client.py
@@ -0,0 +1,131 @@
+
+import json
+import websockets
+from typing import Optional, AsyncIterator, Dict, Any, Iterator
+
+from . types import Triple
+
+
+class AsyncBulkClient:
+    """Asynchronous bulk operations client"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = self._convert_to_ws_url(url)
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+
+    def _convert_to_ws_url(self, url: str) -> str:
+        """Convert HTTP URL to WebSocket URL"""
+        if url.startswith("http://"):
+            return url.replace("http://", "ws://", 1)
+        elif url.startswith("https://"):
+            return url.replace("https://", "wss://", 1)
+        elif url.startswith("ws://") or url.startswith("wss://"):
+            return url
+        else:
+            return f"ws://{url}"
+
+    async def import_triples(self, flow: str, triples: AsyncIterator[Triple], **kwargs: Any) -> None:
+        """Bulk import triples via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/triples"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for triple in triples:
+                message = {
+                    "s": triple.s,
+                    "p": triple.p,
+                    "o": triple.o
+                }
+                await websocket.send(json.dumps(message))
+
+    async def export_triples(self, flow: str, **kwargs: Any) -> AsyncIterator[Triple]:
+        """Bulk export triples via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/triples"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                data = json.loads(raw_message)
+                yield Triple(
+                    s=data.get("s", ""),
+                    p=data.get("p", ""),
+                    o=data.get("o", "")
+                )
+
+    async def import_graph_embeddings(self, flow: str, embeddings: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import graph embeddings via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/graph-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for embedding in embeddings:
+                await websocket.send(json.dumps(embedding))
+
+    async def export_graph_embeddings(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]:
+        """Bulk export graph embeddings via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/graph-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    async def import_document_embeddings(self, flow: str, embeddings: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import document embeddings via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/document-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for embedding in embeddings:
+                await websocket.send(json.dumps(embedding))
+
+    async def export_document_embeddings(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]:
+        """Bulk export document embeddings via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/document-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    async def import_entity_contexts(self, flow: str, contexts: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import entity contexts via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/entity-contexts"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for context in contexts:
+                await websocket.send(json.dumps(context))
+
+    async def export_entity_contexts(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]:
+        """Bulk export entity contexts via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/entity-contexts"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    async def import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import objects via WebSocket"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for obj in objects:
+                await websocket.send(json.dumps(obj))
+
+    async def aclose(self) -> None:
+        """Close connections"""
+        # Cleanup handled by context managers
+        pass
diff --git a/trustgraph-base/trustgraph/api/async_flow.py b/trustgraph-base/trustgraph/api/async_flow.py
new file mode 100644
index 00000000..ba8b9598
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/async_flow.py
@@ -0,0 +1,245 @@
+
+import aiohttp
+import json
+from typing import Optional, Dict, Any, List
+
+from . exceptions import ProtocolException, ApplicationException
+
+
+def check_error(response):
+    if "error" in response:
+        try:
+            msg = response["error"]["message"]
+            tp = response["error"]["type"]
+        except:
+            raise ApplicationException(response["error"])
+
+        raise ApplicationException(f"{tp}: {msg}")
+
+
+class AsyncFlow:
+    """Asynchronous REST-based flow interface"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = url
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+
+    async def request(self, path: str, request_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Make async HTTP request to Gateway API"""
+        url = f"{self.url}{path}"
+
+        headers = {"Content-Type": "application/json"}
+        if self.token:
+            headers["Authorization"] = f"Bearer {self.token}"
+
+        timeout = aiohttp.ClientTimeout(total=self.timeout)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(url, json=request_data, headers=headers) as resp:
+                if resp.status != 200:
+                    raise ProtocolException(f"Status code {resp.status}")
+
+                try:
+                    obj = await resp.json()
+                except:
+                    raise ProtocolException(f"Expected JSON response")
+
+                check_error(obj)
+                return obj
+
+    async def list(self) -> List[str]:
+        """List all flows"""
+        result = await self.request("flow", {"operation": "list-flows"})
+        return result.get("flow-ids", [])
+
+    async def get(self, id: str) -> Dict[str, Any]:
+        """Get flow definition"""
+        result = await self.request("flow", {
+            "operation": "get-flow",
+            "flow-id": id
+        })
+        return json.loads(result.get("flow", "{}"))
+
+    async def start(self, class_name: str, id: str, description: str, parameters: Optional[Dict] = None):
+        """Start a flow"""
+        request_data = {
+            "operation": "start-flow",
+            "flow-id": id,
+            "class-name": class_name,
+            "description": description
+        }
+        if parameters:
+            request_data["parameters"] = json.dumps(parameters)
+
+        await self.request("flow", request_data)
+
+    async def stop(self, id: str):
+        """Stop a flow"""
+        await self.request("flow", {
+            "operation": "stop-flow",
+            "flow-id": id
+        })
+
+    async def list_classes(self) -> List[str]:
+        """List flow classes"""
+        result = await self.request("flow", {"operation": "list-classes"})
+        return result.get("class-names", [])
+
+    async def get_class(self, class_name: str) -> Dict[str, Any]:
+        """Get flow class definition"""
+        result = await self.request("flow", {
+            "operation": "get-class",
+            "class-name": class_name
+        })
+        return json.loads(result.get("class-definition", "{}"))
+
+    async def put_class(self, class_name: str, definition: Dict[str, Any]):
+        """Create/update flow class"""
+        await self.request("flow", {
+            "operation": "put-class",
+            "class-name": class_name,
+            "class-definition": json.dumps(definition)
+        })
+
+    async def delete_class(self, class_name: str):
+        """Delete flow class"""
+        await self.request("flow", {
+            "operation": "delete-class",
+            "class-name": class_name
+        })
+
+    def id(self, flow_id: str):
+        """Get async flow instance"""
+        return AsyncFlowInstance(self, flow_id)
+
+    async def aclose(self) -> None:
+        """Close connection (cleanup handled by aiohttp session)"""
+        pass
+
+
+class AsyncFlowInstance:
+    """Asynchronous REST flow instance"""
+
+    def __init__(self, flow: AsyncFlow, flow_id: str):
+        self.flow = flow
+        self.flow_id = flow_id
+
+    async def request(self, service: str, request_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Make request to flow-scoped service"""
+        return await self.flow.request(f"flow/{self.flow_id}/service/{service}", request_data)
+
+    async def agent(self, question: str, user: str, state: Optional[Dict] = None,
+                    group: Optional[str] = None, history: Optional[List] = None, **kwargs: Any) -> Dict[str, Any]:
+        """Execute agent (non-streaming, use async_socket for streaming)"""
+        request_data = {
+            "question": question,
+            "user": user,
+            "streaming": False  # REST doesn't support streaming
+        }
+        if state is not None:
+            request_data["state"] = state
+        if group is not None:
+            request_data["group"] = group
+        if history is not None:
+            request_data["history"] = history
+        request_data.update(kwargs)
+
+        return await self.request("agent", request_data)
+
+    async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> str:
+        """Text completion (non-streaming, use async_socket for streaming)"""
+        request_data = {
+            "system": system,
+            "prompt": prompt,
+            "streaming": False
+        }
+        request_data.update(kwargs)
+
+        result = await self.request("text-completion", request_data)
+        return result.get("response", "")
+
+    async def graph_rag(self, question: str, user: str, collection: str,
+                        max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
+                        max_entity_distance: int = 3, **kwargs: Any) -> str:
+        """Graph RAG (non-streaming, use async_socket for streaming)"""
+        request_data = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "max-subgraph-size": max_subgraph_size,
+            "max-subgraph-count": max_subgraph_count,
+            "max-entity-distance": max_entity_distance,
+            "streaming": False
+        }
+        request_data.update(kwargs)
+
+        result = await self.request("graph-rag", request_data)
+        return result.get("response", "")
+
+    async def document_rag(self, question: str, user: str, collection: str,
+                           doc_limit: int = 10, **kwargs: Any) -> str:
+        """Document RAG (non-streaming, use async_socket for streaming)"""
+        request_data = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "doc-limit": doc_limit,
+            "streaming": False
+        }
+        request_data.update(kwargs)
+
+        result = await self.request("document-rag", request_data)
+        return result.get("response", "")
+
+    async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs: Any):
+        """Query graph embeddings for semantic search"""
+        request_data = {
+            "text": text,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        request_data.update(kwargs)
+
+        return await self.request("graph-embeddings", request_data)
+
+    async def embeddings(self, text: str, **kwargs: Any):
+        """Generate text embeddings"""
+        request_data = {"text": text}
+        request_data.update(kwargs)
+
+        return await self.request("embeddings", request_data)
+
+    async def triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs: Any):
+        """Triple pattern query"""
+        request_data = {"limit": limit}
+        if s is not None:
+            request_data["s"] = str(s)
+        if p is not None:
+            request_data["p"] = str(p)
+        if o is not None:
+            request_data["o"] = str(o)
+        if user is not None:
+            request_data["user"] = user
+        if collection is not None:
+            request_data["collection"] = collection
+        request_data.update(kwargs)
+
+        return await self.request("triples", request_data)
+
+    async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
+                            operation_name: Optional[str] = None, **kwargs: Any):
+        """GraphQL query"""
+        request_data = {
+            "query": query,
+            "user": user,
+            "collection": collection
+        }
+        if variables:
+            request_data["variables"] = variables
+        if operation_name:
+            request_data["operationName"] = operation_name
+        request_data.update(kwargs)
+
+        return await self.request("objects", request_data)
diff --git a/trustgraph-base/trustgraph/api/async_metrics.py b/trustgraph-base/trustgraph/api/async_metrics.py
new file mode 100644
index 00000000..9ba22f02
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/async_metrics.py
@@ -0,0 +1,33 @@
+
+import aiohttp
+from typing import Optional, Dict
+
+
+class AsyncMetrics:
+    """Asynchronous metrics client"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = url
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+
+    async def get(self) -> str:
+        """Get Prometheus metrics as text"""
+        url: str = f"{self.url}/api/metrics"
+
+        headers: Dict[str, str] = {}
+        if self.token:
+            headers["Authorization"] = f"Bearer {self.token}"
+
+        timeout = aiohttp.ClientTimeout(total=self.timeout)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(url, headers=headers) as resp:
+                if resp.status != 200:
+                    raise Exception(f"Status code {resp.status}")
+
+                return await resp.text()
+
+    async def aclose(self) -> None:
+        """Close connections"""
+        pass
diff --git a/trustgraph-base/trustgraph/api/async_socket_client.py b/trustgraph-base/trustgraph/api/async_socket_client.py
new file mode 100644
index 00000000..b68a69d5
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/async_socket_client.py
@@ -0,0 +1,335 @@
+
+import json
+import websockets
+from typing import Optional, Dict, Any, AsyncIterator, Union
+
+from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk
+from . exceptions import ProtocolException, ApplicationException
+
+
+class AsyncSocketClient:
+    """Asynchronous WebSocket client"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]):
+        self.url = self._convert_to_ws_url(url)
+        self.timeout = timeout
+        self.token = token
+        self._request_counter = 0
+
+    def _convert_to_ws_url(self, url: str) -> str:
+        """Convert HTTP URL to WebSocket URL"""
+        if url.startswith("http://"):
+            return url.replace("http://", "ws://", 1)
+        elif url.startswith("https://"):
+            return url.replace("https://", "wss://", 1)
+        elif url.startswith("ws://") or url.startswith("wss://"):
+            return url
+        else:
+            # Assume ws://
+            return f"ws://{url}"
+
+    def flow(self, flow_id: str):
+        """Get async flow instance for WebSocket operations"""
+        return AsyncSocketFlowInstance(self, flow_id)
+
+    async def _send_request(self, service: str, flow: Optional[str], request: Dict[str, Any]):
+        """Async WebSocket request implementation (non-streaming)"""
+        # Generate unique request ID
+        self._request_counter += 1
+        request_id = f"req-{self._request_counter}"
+
+        # Build WebSocket URL with optional token
+        ws_url = f"{self.url}/api/v1/socket"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        # Build request message
+        message = {
+            "id": request_id,
+            "service": service,
+            "request": request
+        }
+        if flow:
+            message["flow"] = flow
+
+        # Connect and send request
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            await websocket.send(json.dumps(message))
+
+            # Wait for single response
+            raw_message = await websocket.recv()
+            response = json.loads(raw_message)
+
+            if response.get("id") != request_id:
+                raise ProtocolException(f"Response ID mismatch")
+
+            if "error" in response:
+                raise ApplicationException(response["error"])
+
+            if "response" not in response:
+                raise ProtocolException(f"Missing response in message")
+
+            return response["response"]
+
+    async def _send_request_streaming(self, service: str, flow: Optional[str], request: Dict[str, Any]):
+        """Async WebSocket request implementation (streaming)"""
+        # Generate unique request ID
+        self._request_counter += 1
+        request_id = f"req-{self._request_counter}"
+
+        # Build WebSocket URL with optional token
+        ws_url = f"{self.url}/api/v1/socket"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        # Build request message
+        message = {
+            "id": request_id,
+            "service": service,
+            "request": request
+        }
+        if flow:
+            message["flow"] = flow
+
+        # Connect and send request
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            await websocket.send(json.dumps(message))
+
+            # Yield chunks as they arrive
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if response.get("id") != request_id:
+                    continue  # Ignore messages for other requests
+
+                if "error" in response:
+                    raise ApplicationException(response["error"])
+
+                if "response" in response:
+                    resp = response["response"]
+
+                    # Parse different chunk types
+                    chunk = self._parse_chunk(resp)
+                    yield chunk
+
+                    # Check if this is the final chunk
+                    if resp.get("end_of_stream") or resp.get("end_of_dialog") or response.get("complete"):
+                        break
+
+    def _parse_chunk(self, resp: Dict[str, Any]):
+        """Parse response chunk into appropriate type"""
+        chunk_type = resp.get("chunk_type")
+
+        if chunk_type == "thought":
+            return AgentThought(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False)
+            )
+        elif chunk_type == "observation":
+            return AgentObservation(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False)
+            )
+        elif chunk_type == "final-answer":
+            return AgentAnswer(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False),
+                end_of_dialog=resp.get("end_of_dialog", False)
+            )
+        else:
+            # RAG-style chunk (or generic chunk)
+            return RAGChunk(
+                content=resp.get("chunk", ""),
+                end_of_stream=resp.get("end_of_stream", False),
+                error=resp.get("error")
+            )
+
+    async def aclose(self):
+        """Close WebSocket connection"""
+        # Cleanup handled by context manager
+        pass
+
+
+class AsyncSocketFlowInstance:
+    """Asynchronous WebSocket flow instance"""
+
+    def __init__(self, client: AsyncSocketClient, flow_id: str):
+        self.client = client
+        self.flow_id = flow_id
+
+    async def agent(self, question: str, user: str, state: Optional[Dict[str, Any]] = None,
+                    group: Optional[str] = None, history: Optional[list] = None,
+                    streaming: bool = False, **kwargs) -> Union[Dict[str, Any], AsyncIterator]:
+        """Agent with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "streaming": streaming
+        }
+        if state is not None:
+            request["state"] = state
+        if group is not None:
+            request["group"] = group
+        if history is not None:
+            request["history"] = history
+        request.update(kwargs)
+
+        if streaming:
+            return self.client._send_request_streaming("agent", self.flow_id, request)
+        else:
+            return await self.client._send_request("agent", self.flow_id, request)
+
+    async def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs):
+        """Text completion with optional streaming"""
+        request = {
+            "system": system,
+            "prompt": prompt,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        if streaming:
+            return self._text_completion_streaming(request)
+        else:
+            result = await self.client._send_request("text-completion", self.flow_id, request)
+            return result.get("response", "")
+
+    async def _text_completion_streaming(self, request):
+        """Helper for streaming text completion"""
+        async for chunk in self.client._send_request_streaming("text-completion", self.flow_id, request):
+            if hasattr(chunk, 'content'):
+                yield chunk.content
+
+    async def graph_rag(self, question: str, user: str, collection: str,
+                        max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
+                        max_entity_distance: int = 3, streaming: bool = False, **kwargs):
+        """Graph RAG with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "max-subgraph-size": max_subgraph_size,
+            "max-subgraph-count": max_subgraph_count,
+            "max-entity-distance": max_entity_distance,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        if streaming:
+            return self._graph_rag_streaming(request)
+        else:
+            result = await self.client._send_request("graph-rag", self.flow_id, request)
+            return result.get("response", "")
+
+    async def _graph_rag_streaming(self, request):
+        """Helper for streaming graph RAG"""
+        async for chunk in self.client._send_request_streaming("graph-rag", self.flow_id, request):
+            if hasattr(chunk, 'content'):
+                yield chunk.content
+
+    async def document_rag(self, question: str, user: str, collection: str,
+                           doc_limit: int = 10, streaming: bool = False, **kwargs):
+        """Document RAG with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "doc-limit": doc_limit,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        if streaming:
+            return self._document_rag_streaming(request)
+        else:
+            result = await self.client._send_request("document-rag", self.flow_id, request)
+            return result.get("response", "")
+
+    async def _document_rag_streaming(self, request):
+        """Helper for streaming document RAG"""
+        async for chunk in self.client._send_request_streaming("document-rag", self.flow_id, request):
+            if hasattr(chunk, 'content'):
+                yield chunk.content
+
+    async def prompt(self, id: str, variables: Dict[str, str], streaming: bool = False, **kwargs):
+        """Execute prompt with optional streaming"""
+        request = {
+            "id": id,
+            "variables": variables,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        if streaming:
+            return self._prompt_streaming(request)
+        else:
+            result = await self.client._send_request("prompt", self.flow_id, request)
+            return result.get("response", "")
+
+    async def _prompt_streaming(self, request):
+        """Helper for streaming prompt"""
+        async for chunk in self.client._send_request_streaming("prompt", self.flow_id, request):
+            if hasattr(chunk, 'content'):
+                yield chunk.content
+
+    async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs):
+        """Query graph embeddings for semantic search"""
+        request = {
+            "text": text,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        request.update(kwargs)
+
+        return await self.client._send_request("graph-embeddings", self.flow_id, request)
+
+    async def embeddings(self, text: str, **kwargs):
+        """Generate text embeddings"""
+        request = {"text": text}
+        request.update(kwargs)
+
+        return await self.client._send_request("embeddings", self.flow_id, request)
+
+    async def triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs):
+        """Triple pattern query"""
+        request = {"limit": limit}
+        if s is not None:
+            request["s"] = str(s)
+        if p is not None:
+            request["p"] = str(p)
+        if o is not None:
+            request["o"] = str(o)
+        if user is not None:
+            request["user"] = user
+        if collection is not None:
+            request["collection"] = collection
+        request.update(kwargs)
+
+        return await self.client._send_request("triples", self.flow_id, request)
+
+    async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None,
+                            operation_name: Optional[str] = None, **kwargs):
+        """GraphQL query"""
+        request = {
+            "query": query,
+            "user": user,
+            "collection": collection
+        }
+        if variables:
+            request["variables"] = variables
+        if operation_name:
+            request["operationName"] = operation_name
+        request.update(kwargs)
+
+        return await self.client._send_request("objects", self.flow_id, request)
+
+    async def mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs):
+        """Execute MCP tool"""
+        request = {
+            "name": name,
+            "parameters": parameters
+        }
+        request.update(kwargs)
+
+        return await self.client._send_request("mcp-tool", self.flow_id, request)
diff --git a/trustgraph-base/trustgraph/api/bulk_client.py b/trustgraph-base/trustgraph/api/bulk_client.py
new file mode 100644
index 00000000..a119668d
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/bulk_client.py
@@ -0,0 +1,270 @@
+
+import json
+import asyncio
+import websockets
+from typing import Optional, Iterator, Dict, Any, Coroutine
+
+from . types import Triple
+from . exceptions import ProtocolException
+
+
+class BulkClient:
+    """Synchronous bulk operations client"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = self._convert_to_ws_url(url)
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+
+    def _convert_to_ws_url(self, url: str) -> str:
+        """Convert HTTP URL to WebSocket URL"""
+        if url.startswith("http://"):
+            return url.replace("http://", "ws://", 1)
+        elif url.startswith("https://"):
+            return url.replace("https://", "wss://", 1)
+        elif url.startswith("ws://") or url.startswith("wss://"):
+            return url
+        else:
+            return f"ws://{url}"
+
+    def _run_async(self, coro: Coroutine[Any, Any, Any]) -> Any:
+        """Run async coroutine synchronously"""
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        return loop.run_until_complete(coro)
+
+    def import_triples(self, flow: str, triples: Iterator[Triple], **kwargs: Any) -> None:
+        """Bulk import triples via WebSocket"""
+        self._run_async(self._import_triples_async(flow, triples))
+
+    async def _import_triples_async(self, flow: str, triples: Iterator[Triple]) -> None:
+        """Async implementation of triple import"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/triples"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            for triple in triples:
+                message = {
+                    "s": triple.s,
+                    "p": triple.p,
+                    "o": triple.o
+                }
+                await websocket.send(json.dumps(message))
+
+    def export_triples(self, flow: str, **kwargs: Any) -> Iterator[Triple]:
+        """Bulk export triples via WebSocket"""
+        async_gen = self._export_triples_async(flow)
+
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        try:
+            while True:
+                try:
+                    triple = loop.run_until_complete(async_gen.__anext__())
+                    yield triple
+                except StopAsyncIteration:
+                    break
+        finally:
+            try:
+                loop.run_until_complete(async_gen.aclose())
+            except:
+                pass
+
+    async def _export_triples_async(self, flow: str) -> Iterator[Triple]:
+        """Async implementation of triple export"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/triples"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                data = json.loads(raw_message)
+                yield Triple(
+                    s=data.get("s", ""),
+                    p=data.get("p", ""),
+                    o=data.get("o", "")
+                )
+
+    def import_graph_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import graph embeddings via WebSocket"""
+        self._run_async(self._import_graph_embeddings_async(flow, embeddings))
+
+    async def _import_graph_embeddings_async(self, flow: str, embeddings: Iterator[Dict[str, Any]]) -> None:
+        """Async implementation of graph embeddings import"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/graph-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            for embedding in embeddings:
+                await websocket.send(json.dumps(embedding))
+
+    def export_graph_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]:
+        """Bulk export graph embeddings via WebSocket"""
+        async_gen = self._export_graph_embeddings_async(flow)
+
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        try:
+            while True:
+                try:
+                    embedding = loop.run_until_complete(async_gen.__anext__())
+                    yield embedding
+                except StopAsyncIteration:
+                    break
+        finally:
+            try:
+                loop.run_until_complete(async_gen.aclose())
+            except:
+                pass
+
+    async def _export_graph_embeddings_async(self, flow: str) -> Iterator[Dict[str, Any]]:
+        """Async implementation of graph embeddings export"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/graph-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    def import_document_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import document embeddings via WebSocket"""
+        self._run_async(self._import_document_embeddings_async(flow, embeddings))
+
+    async def _import_document_embeddings_async(self, flow: str, embeddings: Iterator[Dict[str, Any]]) -> None:
+        """Async implementation of document embeddings import"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/document-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            for embedding in embeddings:
+                await websocket.send(json.dumps(embedding))
+
+    def export_document_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]:
+        """Bulk export document embeddings via WebSocket"""
+        async_gen = self._export_document_embeddings_async(flow)
+
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        try:
+            while True:
+                try:
+                    embedding = loop.run_until_complete(async_gen.__anext__())
+                    yield embedding
+                except StopAsyncIteration:
+                    break
+        finally:
+            try:
+                loop.run_until_complete(async_gen.aclose())
+            except:
+                pass
+
+    async def _export_document_embeddings_async(self, flow: str) -> Iterator[Dict[str, Any]]:
+        """Async implementation of document embeddings export"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/document-embeddings"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    def import_entity_contexts(self, flow: str, contexts: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import entity contexts via WebSocket"""
+        self._run_async(self._import_entity_contexts_async(flow, contexts))
+
+    async def _import_entity_contexts_async(self, flow: str, contexts: Iterator[Dict[str, Any]]) -> None:
+        """Async implementation of entity contexts import"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/entity-contexts"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            for context in contexts:
+                await websocket.send(json.dumps(context))
+
+    def export_entity_contexts(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]:
+        """Bulk export entity contexts via WebSocket"""
+        async_gen = self._export_entity_contexts_async(flow)
+
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        try:
+            while True:
+                try:
+                    context = loop.run_until_complete(async_gen.__anext__())
+                    yield context
+                except StopAsyncIteration:
+                    break
+        finally:
+            try:
+                loop.run_until_complete(async_gen.aclose())
+            except:
+                pass
+
+    async def _export_entity_contexts_async(self, flow: str) -> Iterator[Dict[str, Any]]:
+        """Async implementation of entity contexts export"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/export/entity-contexts"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            async for raw_message in websocket:
+                yield json.loads(raw_message)
+
+    def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None:
+        """Bulk import objects via WebSocket"""
+        self._run_async(self._import_objects_async(flow, objects))
+
+    async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None:
+        """Async implementation of objects import"""
+        ws_url = f"{self.url}/api/v1/flow/{flow}/import/objects"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            for obj in objects:
+                await websocket.send(json.dumps(obj))
+
+    def close(self) -> None:
+        """Close connections"""
+        # Cleanup handled by context managers
+        pass
diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py
index 0214a4bd..f7cda5c2 100644
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@@ -211,6 +211,21 @@ class FlowInstance:
             input
         )["vectors"]
 
+    def graph_embeddings_query(self, text, user, collection, limit=10):
+
+        # Query graph embeddings for semantic search
+        input = {
+            "text": text,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+
+        return self.request(
+            "service/graph-embeddings",
+            input
+        )
+
     def prompt(self, id, variables):
 
         input = {
diff --git a/trustgraph-base/trustgraph/api/metrics.py b/trustgraph-base/trustgraph/api/metrics.py
new file mode 100644
index 00000000..68968349
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/metrics.py
@@ -0,0 +1,27 @@
+
+import requests
+from typing import Optional, Dict
+
+
+class Metrics:
+    """Synchronous metrics client"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = url
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+
+    def get(self) -> str:
+        """Get Prometheus metrics as text"""
+        url: str = f"{self.url}/api/metrics"
+
+        headers: Dict[str, str] = {}
+        if self.token:
+            headers["Authorization"] = f"Bearer {self.token}"
+
+        resp = requests.get(url, timeout=self.timeout, headers=headers)
+
+        if resp.status_code != 200:
+            raise Exception(f"Status code {resp.status_code}")
+
+        return resp.text
diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py
new file mode 100644
index 00000000..7e5dfaaf
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@@ -0,0 +1,445 @@
+
+import json
+import asyncio
+import websockets
+from typing import Optional, Dict, Any, Iterator, Union, List
+from threading import Lock
+
+from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk
+from . exceptions import ProtocolException, ApplicationException
+
+
+class SocketClient:
+    """Synchronous WebSocket client (wraps async websockets library)"""
+
+    def __init__(self, url: str, timeout: int, token: Optional[str]) -> None:
+        self.url: str = self._convert_to_ws_url(url)
+        self.timeout: int = timeout
+        self.token: Optional[str] = token
+        self._connection: Optional[Any] = None
+        self._request_counter: int = 0
+        self._lock: Lock = Lock()
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+
+    def _convert_to_ws_url(self, url: str) -> str:
+        """Convert HTTP URL to WebSocket URL"""
+        if url.startswith("http://"):
+            return url.replace("http://", "ws://", 1)
+        elif url.startswith("https://"):
+            return url.replace("https://", "wss://", 1)
+        elif url.startswith("ws://") or url.startswith("wss://"):
+            return url
+        else:
+            # Assume ws://
+            return f"ws://{url}"
+
+    def flow(self, flow_id: str) -> "SocketFlowInstance":
+        """Get flow instance for WebSocket operations"""
+        return SocketFlowInstance(self, flow_id)
+
+    def _send_request_sync(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any],
+        streaming: bool = False
+    ) -> Union[Dict[str, Any], Iterator[StreamingChunk]]:
+        """Synchronous wrapper around async WebSocket communication"""
+        # Create event loop if needed
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                # If loop is running (e.g., in Jupyter), create new loop
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        if streaming:
+            # For streaming, we need to return an iterator
+            # Create a generator that runs async code
+            return self._streaming_generator(service, flow, request, loop)
+        else:
+            # For non-streaming, just run the async code and return result
+            return loop.run_until_complete(self._send_request_async(service, flow, request))
+
+    def _streaming_generator(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any],
+        loop: asyncio.AbstractEventLoop
+    ) -> Iterator[StreamingChunk]:
+        """Generator that yields streaming chunks"""
+        async_gen = self._send_request_async_streaming(service, flow, request)
+
+        try:
+            while True:
+                try:
+                    chunk = loop.run_until_complete(async_gen.__anext__())
+                    yield chunk
+                except StopAsyncIteration:
+                    break
+        finally:
+            # Clean up async generator
+            try:
+                loop.run_until_complete(async_gen.aclose())
+            except:
+                pass
+
+    async def _send_request_async(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Async implementation of WebSocket request (non-streaming)"""
+        # Generate unique request ID
+        with self._lock:
+            self._request_counter += 1
+            request_id = f"req-{self._request_counter}"
+
+        # Build WebSocket URL with optional token
+        ws_url = f"{self.url}/api/v1/socket"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        # Build request message
+        message = {
+            "id": request_id,
+            "service": service,
+            "request": request
+        }
+        if flow:
+            message["flow"] = flow
+
+        # Connect and send request
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            await websocket.send(json.dumps(message))
+
+            # Wait for single response
+            raw_message = await websocket.recv()
+            response = json.loads(raw_message)
+
+            if response.get("id") != request_id:
+                raise ProtocolException(f"Response ID mismatch")
+
+            if "error" in response:
+                raise ApplicationException(response["error"])
+
+            if "response" not in response:
+                raise ProtocolException(f"Missing response in message")
+
+            return response["response"]
+
+    async def _send_request_async_streaming(
+        self,
+        service: str,
+        flow: Optional[str],
+        request: Dict[str, Any]
+    ) -> Iterator[StreamingChunk]:
+        """Async implementation of WebSocket request (streaming)"""
+        # Generate unique request ID
+        with self._lock:
+            self._request_counter += 1
+            request_id = f"req-{self._request_counter}"
+
+        # Build WebSocket URL with optional token
+        ws_url = f"{self.url}/api/v1/socket"
+        if self.token:
+            ws_url = f"{ws_url}?token={self.token}"
+
+        # Build request message
+        message = {
+            "id": request_id,
+            "service": service,
+            "request": request
+        }
+        if flow:
+            message["flow"] = flow
+
+        # Connect and send request
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=self.timeout) as websocket:
+            await websocket.send(json.dumps(message))
+
+            # Yield chunks as they arrive
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if response.get("id") != request_id:
+                    continue  # Ignore messages for other requests
+
+                if "error" in response:
+                    raise ApplicationException(response["error"])
+
+                if "response" in response:
+                    resp = response["response"]
+
+                    # Parse different chunk types
+                    chunk = self._parse_chunk(resp)
+                    yield chunk
+
+                    # Check if this is the final chunk
+                    if resp.get("end_of_stream") or resp.get("end_of_dialog") or response.get("complete"):
+                        break
+
+    def _parse_chunk(self, resp: Dict[str, Any]) -> StreamingChunk:
+        """Parse response chunk into appropriate type"""
+        chunk_type = resp.get("chunk_type")
+
+        if chunk_type == "thought":
+            return AgentThought(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False)
+            )
+        elif chunk_type == "observation":
+            return AgentObservation(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False)
+            )
+        elif chunk_type == "final-answer":
+            return AgentAnswer(
+                content=resp.get("content", ""),
+                end_of_message=resp.get("end_of_message", False),
+                end_of_dialog=resp.get("end_of_dialog", False)
+            )
+        else:
+            # RAG-style chunk (or generic chunk)
+            return RAGChunk(
+                content=resp.get("chunk", ""),
+                end_of_stream=resp.get("end_of_stream", False),
+                error=resp.get("error")
+            )
+
+    def close(self) -> None:
+        """Close WebSocket connection"""
+        # Cleanup handled by context manager in async code
+        pass
+
+
+class SocketFlowInstance:
+    """Synchronous WebSocket flow instance with same interface as REST FlowInstance"""
+
+    def __init__(self, client: SocketClient, flow_id: str) -> None:
+        self.client: SocketClient = client
+        self.flow_id: str = flow_id
+
+    def agent(
+        self,
+        question: str,
+        user: str,
+        state: Optional[Dict[str, Any]] = None,
+        group: Optional[str] = None,
+        history: Optional[List[Dict[str, Any]]] = None,
+        streaming: bool = False,
+        **kwargs: Any
+    ) -> Union[Dict[str, Any], Iterator[StreamingChunk]]:
+        """Agent with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "streaming": streaming
+        }
+        if state is not None:
+            request["state"] = state
+        if group is not None:
+            request["group"] = group
+        if history is not None:
+            request["history"] = history
+        request.update(kwargs)
+
+        return self.client._send_request_sync("agent", self.flow_id, request, streaming)
+
+    def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]:
+        """Text completion with optional streaming"""
+        request = {
+            "system": system,
+            "prompt": prompt,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        result = self.client._send_request_sync("text-completion", self.flow_id, request, streaming)
+
+        if streaming:
+            # For text completion, yield just the content
+            for chunk in result:
+                if hasattr(chunk, 'content'):
+                    yield chunk.content
+        else:
+            return result.get("response", "")
+
+    def graph_rag(
+        self,
+        question: str,
+        user: str,
+        collection: str,
+        max_subgraph_size: int = 1000,
+        max_subgraph_count: int = 5,
+        max_entity_distance: int = 3,
+        streaming: bool = False,
+        **kwargs: Any
+    ) -> Union[str, Iterator[str]]:
+        """Graph RAG with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "max-subgraph-size": max_subgraph_size,
+            "max-subgraph-count": max_subgraph_count,
+            "max-entity-distance": max_entity_distance,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        result = self.client._send_request_sync("graph-rag", self.flow_id, request, streaming)
+
+        if streaming:
+            for chunk in result:
+                if hasattr(chunk, 'content'):
+                    yield chunk.content
+        else:
+            return result.get("response", "")
+
+    def document_rag(
+        self,
+        question: str,
+        user: str,
+        collection: str,
+        doc_limit: int = 10,
+        streaming: bool = False,
+        **kwargs: Any
+    ) -> Union[str, Iterator[str]]:
+        """Document RAG with optional streaming"""
+        request = {
+            "question": question,
+            "user": user,
+            "collection": collection,
+            "doc-limit": doc_limit,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        result = self.client._send_request_sync("document-rag", self.flow_id, request, streaming)
+
+        if streaming:
+            for chunk in result:
+                if hasattr(chunk, 'content'):
+                    yield chunk.content
+        else:
+            return result.get("response", "")
+
+    def prompt(
+        self,
+        id: str,
+        variables: Dict[str, str],
+        streaming: bool = False,
+        **kwargs: Any
+    ) -> Union[str, Iterator[str]]:
+        """Execute prompt with optional streaming"""
+        request = {
+            "id": id,
+            "variables": variables,
+            "streaming": streaming
+        }
+        request.update(kwargs)
+
+        result = self.client._send_request_sync("prompt", self.flow_id, request, streaming)
+
+        if streaming:
+            for chunk in result:
+                if hasattr(chunk, 'content'):
+                    yield chunk.content
+        else:
+            return result.get("response", "")
+
+    def graph_embeddings_query(
+        self,
+        text: str,
+        user: str,
+        collection: str,
+        limit: int = 10,
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """Query graph embeddings for semantic search"""
+        request = {
+            "text": text,
+            "user": user,
+            "collection": collection,
+            "limit": limit
+        }
+        request.update(kwargs)
+
+        return self.client._send_request_sync("graph-embeddings", self.flow_id, request, False)
+
+    def embeddings(self, text: str, **kwargs: Any) -> Dict[str, Any]:
+        """Generate text embeddings"""
+        request = {"text": text}
+        request.update(kwargs)
+
+        return self.client._send_request_sync("embeddings", self.flow_id, request, False)
+
+    def triples_query(
+        self,
+        s: Optional[str] = None,
+        p: Optional[str] = None,
+        o: Optional[str] = None,
+        user: Optional[str] = None,
+        collection: Optional[str] = None,
+        limit: int = 100,
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """Triple pattern query"""
+        request = {"limit": limit}
+        if s is not None:
+            request["s"] = str(s)
+        if p is not None:
+            request["p"] = str(p)
+        if o is not None:
+            request["o"] = str(o)
+        if user is not None:
+            request["user"] = user
+        if collection is not None:
+            request["collection"] = collection
+        request.update(kwargs)
+
+        return self.client._send_request_sync("triples", self.flow_id, request, False)
+
+    def objects_query(
+        self,
+        query: str,
+        user: str,
+        collection: str,
+        variables: Optional[Dict[str, Any]] = None,
+        operation_name: Optional[str] = None,
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """GraphQL query"""
+        request = {
+            "query": query,
+            "user": user,
+            "collection": collection
+        }
+        if variables:
+            request["variables"] = variables
+        if operation_name:
+            request["operationName"] = operation_name
+        request.update(kwargs)
+
+        return self.client._send_request_sync("objects", self.flow_id, request, False)
+
+    def mcp_tool(
+        self,
+        name: str,
+        parameters: Dict[str, Any],
+        **kwargs: Any
+    ) -> Dict[str, Any]:
+        """Execute MCP tool"""
+        request = {
+            "name": name,
+            "parameters": parameters
+        }
+        request.update(kwargs)
+
+        return self.client._send_request_sync("mcp-tool", self.flow_id, request, False)
diff --git a/trustgraph-base/trustgraph/api/types.py b/trustgraph-base/trustgraph/api/types.py
index 71b438f6..016f2c7a 100644
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@@ -1,7 +1,7 @@
 
 import dataclasses
 import datetime
-from typing import List
+from typing import List, Optional, Dict, Any
 from .. knowledge import hash, Uri, Literal
 
 @dataclasses.dataclass
@@ -51,3 +51,33 @@ class CollectionMetadata:
     tags : List[str]
     created_at : str
     updated_at : str
+
+# Streaming chunk types
+
+@dataclasses.dataclass
+class StreamingChunk:
+    """Base class for streaming chunks"""
+    content: str
+    end_of_message: bool = False
+
+@dataclasses.dataclass
+class AgentThought(StreamingChunk):
+    """Agent reasoning chunk"""
+    chunk_type: str = "thought"
+
+@dataclasses.dataclass
+class AgentObservation(StreamingChunk):
+    """Agent tool observation chunk"""
+    chunk_type: str = "observation"
+
+@dataclasses.dataclass
+class AgentAnswer(StreamingChunk):
+    """Agent final answer chunk"""
+    chunk_type: str = "final-answer"
+    end_of_dialog: bool = False
+
+@dataclasses.dataclass
+class RAGChunk(StreamingChunk):
+    """RAG streaming chunk"""
+    end_of_stream: bool = False
+    error: Optional[Dict[str, str]] = None
diff --git a/trustgraph-cli/trustgraph/cli/delete_config_item.py b/trustgraph-cli/trustgraph/cli/delete_config_item.py
index 1de02890..cf4cba93 100644
--- a/trustgraph-cli/trustgraph/cli/delete_config_item.py
+++ b/trustgraph-cli/trustgraph/cli/delete_config_item.py
@@ -8,10 +8,11 @@ from trustgraph.api import Api
 from trustgraph.api.types import ConfigKey
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def delete_config_item(url, config_type, key):
+def delete_config_item(url, config_type, key, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     config_key = ConfigKey(type=config_type, key=key)
     api.delete([config_key])
@@ -43,6 +44,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -51,6 +58,7 @@ def main():
             url=args.api_url,
             config_type=args.type,
             key=args.key,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/get_config_item.py b/trustgraph-cli/trustgraph/cli/get_config_item.py
index 832d2711..c2421e94 100644
--- a/trustgraph-cli/trustgraph/cli/get_config_item.py
+++ b/trustgraph-cli/trustgraph/cli/get_config_item.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 from trustgraph.api.types import ConfigKey
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def get_config_item(url, config_type, key, format_type):
+def get_config_item(url, config_type, key, format_type, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     config_key = ConfigKey(type=config_type, key=key)
     values = api.get([config_key])
@@ -59,6 +60,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -68,6 +75,7 @@ def main():
             config_type=args.type,
             key=args.key,
             format_type=args.format,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/get_kg_core.py b/trustgraph-cli/trustgraph/cli/get_kg_core.py
index 6e0a8bc0..b75f7155 100644
--- a/trustgraph-cli/trustgraph/cli/get_kg_core.py
+++ b/trustgraph-cli/trustgraph/cli/get_kg_core.py
@@ -14,6 +14,7 @@ import msgpack
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
 default_user = 'trustgraph'
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def write_triple(f, data):
     msg = (
@@ -51,13 +52,16 @@ def write_ge(f, data):
     )
     f.write(msgpack.packb(msg, use_bin_type=True))
 
-async def fetch(url, user, id, output):
+async def fetch(url, user, id, output, token=None):
 
     if not url.endswith("/"):
         url += "/"
 
     url = url + "api/v1/socket"
 
+    if token:
+        url = f"{url}?token={token}"
+
     mid = str(uuid.uuid4())
 
     async with connect(url) as ws:
@@ -138,6 +142,12 @@ def main():
         help=f'Output file'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -148,6 +158,7 @@ def main():
                 user = args.user,
                 id = args.id,
                 output = args.output,
+                token = args.token,
             )
         )
 
diff --git a/trustgraph-cli/trustgraph/cli/invoke_agent.py b/trustgraph-cli/trustgraph/cli/invoke_agent.py
index e6e82edd..6af12cd5 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_agent.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_agent.py
@@ -5,12 +5,10 @@ Uses the agent service to answer a question
 import argparse
 import os
 import textwrap
-import uuid
-import asyncio
-import json
-from websockets.asyncio.client import connect
+from trustgraph.api import Api
 
-default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = 'trustgraph'
 default_collection = 'default'
 
@@ -99,79 +97,47 @@ def output(text, prefix="> ", width=78):
     )
     print(out)
 
-async def question(
+def question(
         url, question, flow_id, user, collection,
-        plan=None, state=None, group=None, verbose=False, streaming=True
+        plan=None, state=None, group=None, verbose=False, streaming=True,
+        token=None
 ):
 
-    if not url.endswith("/"):
-        url += "/"
-
-    url = url + "api/v1/socket"
-
     if verbose:
         output(wrap(question), "\U00002753 ")
         print()
 
-    # Track last chunk type and current outputter for streaming
-    last_chunk_type = None
-    current_outputter = None
+    # Create API client
+    api = Api(url=url, token=token)
+    socket = api.socket()
+    flow = socket.flow(flow_id)
 
-    def think(x):
-        if verbose:
-            output(wrap(x), "\U0001f914 ")
-            print()
+    # Prepare request parameters
+    request_params = {
+        "question": question,
+        "user": user,
+        "streaming": streaming,
+    }
 
-    def observe(x):
-        if verbose:
-            output(wrap(x), "\U0001f4a1 ")
-            print()
+    # Only add optional fields if they have values
+    if state is not None:
+        request_params["state"] = state
+    if group is not None:
+        request_params["group"] = group
 
-    mid = str(uuid.uuid4())
+    try:
+        # Call agent
+        response = flow.agent(**request_params)
 
-    async with connect(url) as ws:
+        # Handle streaming response
+        if streaming:
+            # Track last chunk type and current outputter for streaming
+            last_chunk_type = None
+            current_outputter = None
 
-        req = {
-            "id": mid,
-            "service": "agent",
-            "flow": flow_id,
-            "request": {
-                "question": question,
-                "user": user,
-                "history": [],
-                "streaming": streaming
-            }
-        }
-
-        # Only add optional fields if they have values
-        if state is not None:
-            req["request"]["state"] = state
-        if group is not None:
-            req["request"]["group"] = group
-
-        req = json.dumps(req)
-
-        await ws.send(req)
-
-        while True:
-
-            msg = await ws.recv()
-
-            obj = json.loads(msg)
-
-            if "error" in obj:
-                raise RuntimeError(obj["error"])
-
-            if obj["id"] != mid:
-                print("Ignore message")
-                continue
-
-            response = obj["response"]
-
-            # Handle streaming format (new format with chunk_type)
-            if "chunk_type" in response:
-                chunk_type = response["chunk_type"]
-                content = response.get("content", "")
+            for chunk in response:
+                chunk_type = chunk.chunk_type
+                content = chunk.content
 
                 # Check if we're switching to a new message type
                 if last_chunk_type != chunk_type:
@@ -195,33 +161,27 @@ async def question(
                 # Output the chunk
                 if current_outputter:
                     current_outputter.output(content)
-                elif chunk_type == "answer":
+                elif chunk_type == "final-answer":
                     print(content, end="", flush=True)
-            else:
-                # Handle legacy format (backward compatibility)
-                if "thought" in response:
-                    think(response["thought"])
 
-                if "observation" in response:
-                    observe(response["observation"])
+            # Close any remaining outputter
+            if current_outputter:
+                current_outputter.__exit__(None, None, None)
+                current_outputter = None
+            # Add final newline if we were outputting answer
+            elif last_chunk_type == "final-answer":
+                print()
 
-                if "answer" in response:
-                    print(response["answer"])
+        else:
+            # Non-streaming response
+            if "answer" in response:
+                print(response["answer"])
+            if "error" in response:
+                raise RuntimeError(response["error"])
 
-                if "error" in response:
-                    raise RuntimeError(response["error"])
-
-            if obj["complete"]:
-                # Close any remaining outputter
-                if current_outputter:
-                    current_outputter.__exit__(None, None, None)
-                    current_outputter = None
-                # Add final newline if we were outputting answer
-                elif last_chunk_type == "answer":
-                    print()
-                break
-
-        await ws.close()
+    finally:
+        # Clean up socket connection
+        socket.close()
 
 def main():
 
@@ -236,6 +196,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-f', '--flow-id',
         default="default",
@@ -292,19 +258,18 @@ def main():
 
     try:
 
-        asyncio.run(
-            question(
-                url = args.url,
-                flow_id = args.flow_id,
-                question = args.question,
-                user = args.user,
-                collection = args.collection,
-                plan = args.plan,
-                state = args.state,
-                group = args.group,
-                verbose = args.verbose,
-                streaming = not args.no_streaming,
-            )
+        question(
+            url = args.url,
+            flow_id = args.flow_id,
+            question = args.question,
+            user = args.user,
+            collection = args.collection,
+            plan = args.plan,
+            state = args.state,
+            group = args.group,
+            verbose = args.verbose,
+            streaming = not args.no_streaming,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/invoke_document_rag.py b/trustgraph-cli/trustgraph/cli/invoke_document_rag.py
index e6a040ac..6b3a44bc 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_document_rag.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_document_rag.py
@@ -4,89 +4,50 @@ Uses the DocumentRAG service to answer a question
 
 import argparse
 import os
-import asyncio
-import json
-import uuid
-from websockets.asyncio.client import connect
 from trustgraph.api import Api
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = 'trustgraph'
 default_collection = 'default'
 default_doc_limit = 10
 
-async def question_streaming(url, flow_id, question, user, collection, doc_limit):
-    """Streaming version using websockets"""
+def question(url, flow_id, question, user, collection, doc_limit, streaming=True, token=None):
 
-    # Convert http:// to ws://
-    if url.startswith('http://'):
-        url = 'ws://' + url[7:]
-    elif url.startswith('https://'):
-        url = 'wss://' + url[8:]
+    # Create API client
+    api = Api(url=url, token=token)
 
-    if not url.endswith("/"):
-        url += "/"
+    if streaming:
+        # Use socket client for streaming
+        socket = api.socket()
+        flow = socket.flow(flow_id)
 
-    url = url + "api/v1/socket"
+        try:
+            response = flow.document_rag(
+                question=question,
+                user=user,
+                collection=collection,
+                doc_limit=doc_limit,
+                streaming=True
+            )
 
-    mid = str(uuid.uuid4())
+            # Stream output
+            for chunk in response:
+                print(chunk.content, end="", flush=True)
+            print()  # Final newline
 
-    async with connect(url) as ws:
-        req = {
-            "id": mid,
-            "service": "document-rag",
-            "flow": flow_id,
-            "request": {
-                "query": question,
-                "user": user,
-                "collection": collection,
-                "doc-limit": doc_limit,
-                "streaming": True
-            }
-        }
-
-        req = json.dumps(req)
-        await ws.send(req)
-
-        while True:
-            msg = await ws.recv()
-            obj = json.loads(msg)
-
-            if "error" in obj:
-                raise RuntimeError(obj["error"])
-
-            if obj["id"] != mid:
-                print("Ignore message")
-                continue
-
-            response = obj["response"]
-
-            # Handle streaming format (chunk)
-            if "chunk" in response:
-                chunk = response["chunk"]
-                print(chunk, end="", flush=True)
-            elif "response" in response:
-                # Final response with complete text
-                # Already printed via chunks, just add newline
-                pass
-
-            if obj["complete"]:
-                print()  # Final newline
-                break
-
-        await ws.close()
-
-def question_non_streaming(url, flow_id, question, user, collection, doc_limit):
-    """Non-streaming version using HTTP API"""
-
-    api = Api(url).flow().id(flow_id)
-
-    resp = api.document_rag(
-        question=question, user=user, collection=collection,
-        doc_limit=doc_limit,
-    )
-
-    print(resp)
+        finally:
+            socket.close()
+    else:
+        # Use REST API for non-streaming
+        flow = api.flow().id(flow_id)
+        resp = flow.document_rag(
+            question=question,
+            user=user,
+            collection=collection,
+            doc_limit=doc_limit,
+        )
+        print(resp)
 
 def main():
 
@@ -101,6 +62,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-f', '--flow-id',
         default="default",
@@ -127,6 +94,7 @@ def main():
 
     parser.add_argument(
         '-d', '--doc-limit',
+        type=int,
         default=default_doc_limit,
         help=f'Document limit (default: {default_doc_limit})'
     )
@@ -141,30 +109,20 @@ def main():
 
     try:
 
-        if not args.no_streaming:
-            asyncio.run(
-                question_streaming(
-                    url=args.url,
-                    flow_id=args.flow_id,
-                    question=args.question,
-                    user=args.user,
-                    collection=args.collection,
-                    doc_limit=args.doc_limit,
-                )
-            )
-        else:
-            question_non_streaming(
-                url=args.url,
-                flow_id=args.flow_id,
-                question=args.question,
-                user=args.user,
-                collection=args.collection,
-                doc_limit=args.doc_limit,
-            )
+        question(
+            url=args.url,
+            flow_id=args.flow_id,
+            question=args.question,
+            user=args.user,
+            collection=args.collection,
+            doc_limit=args.doc_limit,
+            streaming=not args.no_streaming,
+            token=args.token,
+        )
 
     except Exception as e:
 
         print("Exception:", e, flush=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py b/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py
index 45d02b6d..725206c8 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py
@@ -4,13 +4,10 @@ Uses the GraphRAG service to answer a question
 
 import argparse
 import os
-import asyncio
-import json
-import uuid
-from websockets.asyncio.client import connect
 from trustgraph.api import Api
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = 'trustgraph'
 default_collection = 'default'
 default_entity_limit = 50
@@ -18,89 +15,51 @@ default_triple_limit = 30
 default_max_subgraph_size = 150
 default_max_path_length = 2
 
-async def question_streaming(
+def question(
         url, flow_id, question, user, collection, entity_limit, triple_limit,
-        max_subgraph_size, max_path_length
+        max_subgraph_size, max_path_length, streaming=True, token=None
 ):
-    """Streaming version using websockets"""
 
-    # Convert http:// to ws://
-    if url.startswith('http://'):
-        url = 'ws://' + url[7:]
-    elif url.startswith('https://'):
-        url = 'wss://' + url[8:]
+    # Create API client
+    api = Api(url=url, token=token)
 
-    if not url.endswith("/"):
-        url += "/"
+    if streaming:
+        # Use socket client for streaming
+        socket = api.socket()
+        flow = socket.flow(flow_id)
 
-    url = url + "api/v1/socket"
+        try:
+            response = flow.graph_rag(
+                question=question,
+                user=user,
+                collection=collection,
+                entity_limit=entity_limit,
+                triple_limit=triple_limit,
+                max_subgraph_size=max_subgraph_size,
+                max_path_length=max_path_length,
+                streaming=True
+            )
 
-    mid = str(uuid.uuid4())
+            # Stream output
+            for chunk in response:
+                print(chunk.content, end="", flush=True)
+            print()  # Final newline
 
-    async with connect(url) as ws:
-        req = {
-            "id": mid,
-            "service": "graph-rag",
-            "flow": flow_id,
-            "request": {
-                "query": question,
-                "user": user,
-                "collection": collection,
-                "entity-limit": entity_limit,
-                "triple-limit": triple_limit,
-                "max-subgraph-size": max_subgraph_size,
-                "max-path-length": max_path_length,
-                "streaming": True
-            }
-        }
-
-        req = json.dumps(req)
-        await ws.send(req)
-
-        while True:
-            msg = await ws.recv()
-            obj = json.loads(msg)
-
-            if "error" in obj:
-                raise RuntimeError(obj["error"])
-
-            if obj["id"] != mid:
-                print("Ignore message")
-                continue
-
-            response = obj["response"]
-
-            # Handle streaming format (chunk)
-            if "chunk" in response:
-                chunk = response["chunk"]
-                print(chunk, end="", flush=True)
-            elif "response" in response:
-                # Final response with complete text
-                # Already printed via chunks, just add newline
-                pass
-
-            if obj["complete"]:
-                print()  # Final newline
-                break
-
-        await ws.close()
-
-def question_non_streaming(
-        url, flow_id, question, user, collection, entity_limit, triple_limit,
-        max_subgraph_size, max_path_length
-):
-    """Non-streaming version using HTTP API"""
-
-    api = Api(url).flow().id(flow_id)
-
-    resp = api.graph_rag(
-        question=question, user=user, collection=collection,
-        entity_limit=entity_limit, triple_limit=triple_limit,
-        max_subgraph_size=max_subgraph_size,
-        max_path_length=max_path_length
-    )
-
-    print(resp)
+        finally:
+            socket.close()
+    else:
+        # Use REST API for non-streaming
+        flow = api.flow().id(flow_id)
+        resp = flow.graph_rag(
+            question=question,
+            user=user,
+            collection=collection,
+            entity_limit=entity_limit,
+            triple_limit=triple_limit,
+            max_subgraph_size=max_subgraph_size,
+            max_path_length=max_path_length
+        )
+        print(resp)
 
 def main():
 
@@ -115,6 +74,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-f', '--flow-id',
         default="default",
@@ -141,24 +106,28 @@ def main():
 
     parser.add_argument(
         '-e', '--entity-limit',
+        type=int,
         default=default_entity_limit,
         help=f'Entity limit (default: {default_entity_limit})'
     )
 
     parser.add_argument(
-        '-t', '--triple-limit',
+        '--triple-limit',
+        type=int,
         default=default_triple_limit,
         help=f'Triple limit (default: {default_triple_limit})'
     )
 
     parser.add_argument(
         '-s', '--max-subgraph-size',
+        type=int,
         default=default_max_subgraph_size,
         help=f'Max subgraph size (default: {default_max_subgraph_size})'
     )
 
     parser.add_argument(
         '-p', '--max-path-length',
+        type=int,
         default=default_max_path_length,
         help=f'Max path length (default: {default_max_path_length})'
     )
@@ -173,36 +142,23 @@ def main():
 
     try:
 
-        if not args.no_streaming:
-            asyncio.run(
-                question_streaming(
-                    url=args.url,
-                    flow_id=args.flow_id,
-                    question=args.question,
-                    user=args.user,
-                    collection=args.collection,
-                    entity_limit=args.entity_limit,
-                    triple_limit=args.triple_limit,
-                    max_subgraph_size=args.max_subgraph_size,
-                    max_path_length=args.max_path_length,
-                )
-            )
-        else:
-            question_non_streaming(
-                url=args.url,
-                flow_id=args.flow_id,
-                question=args.question,
-                user=args.user,
-                collection=args.collection,
-                entity_limit=args.entity_limit,
-                triple_limit=args.triple_limit,
-                max_subgraph_size=args.max_subgraph_size,
-                max_path_length=args.max_path_length,
-            )
+        question(
+            url=args.url,
+            flow_id=args.flow_id,
+            question=args.question,
+            user=args.user,
+            collection=args.collection,
+            entity_limit=args.entity_limit,
+            triple_limit=args.triple_limit,
+            max_subgraph_size=args.max_subgraph_size,
+            max_path_length=args.max_path_length,
+            streaming=not args.no_streaming,
+            token=args.token,
+        )
 
     except Exception as e:
 
         print("Exception:", e, flush=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/invoke_llm.py b/trustgraph-cli/trustgraph/cli/invoke_llm.py
index da69dcd6..261993d9 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_llm.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_llm.py
@@ -5,64 +5,39 @@ and user prompt.  Both arguments are required.
 
 import argparse
 import os
-import json
-import uuid
-import asyncio
-from websockets.asyncio.client import connect
+from trustgraph.api import Api
 
-default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-async def query(url, flow_id, system, prompt, streaming=True):
+def query(url, flow_id, system, prompt, streaming=True, token=None):
 
-    if not url.endswith("/"):
-        url += "/"
+    # Create API client
+    api = Api(url=url, token=token)
+    socket = api.socket()
+    flow = socket.flow(flow_id)
 
-    url = url + "api/v1/socket"
+    try:
+        # Call text completion
+        response = flow.text_completion(
+            system=system,
+            prompt=prompt,
+            streaming=streaming
+        )
 
-    mid = str(uuid.uuid4())
+        if streaming:
+            # Stream output to stdout without newline
+            for chunk in response:
+                print(chunk.content, end="", flush=True)
+            # Add final newline after streaming
+            print()
+        else:
+            # Non-streaming: print complete response
+            print(response)
 
-    async with connect(url) as ws:
-
-        req = {
-            "id": mid,
-            "service": "text-completion",
-            "flow": flow_id,
-            "request": {
-                "system": system,
-                "prompt": prompt,
-                "streaming": streaming
-            }
-        }
-
-        await ws.send(json.dumps(req))
-
-        while True:
-
-            msg = await ws.recv()
-
-            obj = json.loads(msg)
-
-            if "error" in obj:
-                raise RuntimeError(obj["error"])
-
-            if obj["id"] != mid:
-                continue
-
-            if "response" in obj["response"]:
-                if streaming:
-                    # Stream output to stdout without newline
-                    print(obj["response"]["response"], end="", flush=True)
-                else:
-                    # Non-streaming: print complete response
-                    print(obj["response"]["response"])
-
-            if obj["complete"]:
-                if streaming:
-                    # Add final newline after streaming
-                    print()
-                break
-
-        await ws.close()
+    finally:
+        # Clean up socket connection
+        socket.close()
 
 def main():
 
@@ -77,6 +52,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         'system',
         nargs=1,
@@ -105,17 +86,18 @@ def main():
 
     try:
 
-        asyncio.run(query(
+        query(
             url=args.url,
             flow_id=args.flow_id,
             system=args.system[0],
             prompt=args.prompt[0],
-            streaming=not args.no_streaming
-        ))
+            streaming=not args.no_streaming,
+            token=args.token,
+        )
 
     except Exception as e:
 
         print("Exception:", e, flush=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/invoke_prompt.py b/trustgraph-cli/trustgraph/cli/invoke_prompt.py
index c996c57d..b1eba5aa 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_prompt.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_prompt.py
@@ -10,76 +10,61 @@ using key=value arguments on the command line, and these replace
 import argparse
 import os
 import json
-import uuid
-import asyncio
-from websockets.asyncio.client import connect
+from trustgraph.api import Api
 
-default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-async def query(url, flow_id, template_id, variables, streaming=True):
+def query(url, flow_id, template_id, variables, streaming=True, token=None):
 
-    if not url.endswith("/"):
-        url += "/"
+    # Create API client
+    api = Api(url=url, token=token)
+    socket = api.socket()
+    flow = socket.flow(flow_id)
 
-    url = url + "api/v1/socket"
+    try:
+        # Call prompt
+        response = flow.prompt(
+            id=template_id,
+            variables=variables,
+            streaming=streaming
+        )
 
-    mid = str(uuid.uuid4())
+        if streaming:
+            full_response = {"text": "", "object": ""}
 
-    async with connect(url) as ws:
+            # Stream output
+            for chunk in response:
+                content = chunk.content
+                if content:
+                    print(content, end="", flush=True)
+                    full_response["text"] += content
 
-        req = {
-            "id": mid,
-            "service": "prompt",
-            "flow": flow_id,
-            "request": {
-                "id": template_id,
-                "variables": variables,
-                "streaming": streaming
-            }
-        }
+                # Check if this is an object response (JSON)
+                if hasattr(chunk, 'object') and chunk.object:
+                    full_response["object"] = chunk.object
 
-        await ws.send(json.dumps(req))
+            # Handle final output
+            if full_response["text"]:
+                # Add final newline after streaming text
+                print()
+            elif full_response["object"]:
+                # Print JSON object (pretty-printed)
+                print(json.dumps(json.loads(full_response["object"]), indent=4))
 
-        full_response = {"text": "", "object": ""}
-
-        while True:
-
-            msg = await ws.recv()
-
-            obj = json.loads(msg)
-
-            if "error" in obj:
-                raise RuntimeError(obj["error"])
-
-            if obj["id"] != mid:
-                continue
-
-            response = obj["response"]
-
-            # Handle text responses (streaming)
-            if "text" in response and response["text"]:
-                if streaming:
-                    # Stream output to stdout without newline
-                    print(response["text"], end="", flush=True)
-                    full_response["text"] += response["text"]
-                else:
-                    # Non-streaming: print complete response
+        else:
+            # Non-streaming: handle response
+            if isinstance(response, str):
+                print(response)
+            elif isinstance(response, dict):
+                if "text" in response:
                     print(response["text"])
+                elif "object" in response:
+                    print(json.dumps(json.loads(response["object"]), indent=4))
 
-            # Handle object responses (JSON, never streamed)
-            if "object" in response and response["object"]:
-                full_response["object"] = response["object"]
-
-            if obj["complete"]:
-                if streaming and full_response["text"]:
-                    # Add final newline after streaming text
-                    print()
-                elif full_response["object"]:
-                    # Print JSON object (pretty-printed)
-                    print(json.dumps(json.loads(full_response["object"]), indent=4))
-                break
-
-        await ws.close()
+    finally:
+        # Clean up socket connection
+        socket.close()
 
 def main():
 
@@ -94,6 +79,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-f', '--flow-id',
         default="default",
@@ -135,17 +126,18 @@ specified multiple times''',
 
     try:
 
-        asyncio.run(query(
+        query(
             url=args.url,
             flow_id=args.flow_id,
             template_id=args.id[0],
             variables=variables,
-            streaming=not args.no_streaming
-        ))
+            streaming=not args.no_streaming,
+            token=args.token,
+        )
 
     except Exception as e:
 
         print("Exception:", e, flush=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/list_config_items.py b/trustgraph-cli/trustgraph/cli/list_config_items.py
index 33e8f7ba..5cd0f233 100644
--- a/trustgraph-cli/trustgraph/cli/list_config_items.py
+++ b/trustgraph-cli/trustgraph/cli/list_config_items.py
@@ -8,10 +8,11 @@ import json
 from trustgraph.api import Api
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def list_config_items(url, config_type, format_type):
+def list_config_items(url, config_type, format_type, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     keys = api.list(config_type)
 
@@ -47,6 +48,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -55,6 +62,7 @@ def main():
             url=args.api_url,
             config_type=args.type,
             format_type=args.format,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/load_knowledge.py b/trustgraph-cli/trustgraph/cli/load_knowledge.py
index 58081fa1..ff6ca980 100644
--- a/trustgraph-cli/trustgraph/cli/load_knowledge.py
+++ b/trustgraph-cli/trustgraph/cli/load_knowledge.py
@@ -2,18 +2,17 @@
 Loads triples and entity contexts into the knowledge graph.
 """
 
-import asyncio
 import argparse
 import os
 import time
 import rdflib
-import json
-from websockets.asyncio.client import connect
-from typing import List, Dict, Any
+from typing import Iterator, Tuple
 
+from trustgraph.api import Api, Triple
 from trustgraph.log_level import LogLevel
 
-default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = 'trustgraph'
 default_collection = 'default'
 
@@ -26,108 +25,114 @@ class KnowledgeLoader:
             user,
             collection,
             document_id,
-            url = default_url,
+            url=default_url,
+            token=None,
     ):
-
-        if not url.endswith("/"):
-            url += "/"
-
-        self.triples_url = url + f"api/v1/flow/{flow}/import/triples"
-        self.entity_contexts_url = url + f"api/v1/flow/{flow}/import/entity-contexts"
-
         self.files = files
+        self.flow = flow
         self.user = user
         self.collection = collection
         self.document_id = document_id
+        self.url = url
+        self.token = token
 
-    async def run(self):
-
-        try:
-            # Load triples first
-            async with connect(self.triples_url) as ws:
-                for file in self.files:
-                    await self.load_triples(file, ws)
-
-            # Then load entity contexts
-            async with connect(self.entity_contexts_url) as ws:
-                for file in self.files:
-                    await self.load_entity_contexts(file, ws)
-
-        except Exception as e:
-            print(e, flush=True)
-
-    async def load_triples(self, file, ws):
+    def load_triples_from_file(self, file) -> Iterator[Triple]:
+        """Generator that yields Triple objects from a Turtle file"""
 
         g = rdflib.Graph()
         g.parse(file, format="turtle")
 
-        def Value(value, is_uri):
-            return { "v": value, "e": is_uri }
-
         for e in g:
-            s = Value(value=str(e[0]), is_uri=True)
-            p = Value(value=str(e[1]), is_uri=True)
-            if type(e[2]) == rdflib.term.URIRef:
-                o = Value(value=str(e[2]), is_uri=True)
+            # Extract subject, predicate, object
+            s_value = str(e[0])
+            p_value = str(e[1])
+
+            # Check if object is a URI or literal
+            if isinstance(e[2], rdflib.term.URIRef):
+                o_value = str(e[2])
+                o_is_uri = True
             else:
-                o = Value(value=str(e[2]), is_uri=False)
+                o_value = str(e[2])
+                o_is_uri = False
 
-            req = {
-                "metadata": {
-                    "id": self.document_id,
-                    "metadata": [],
-                    "user": self.user,
-                    "collection": self.collection
-                },
-                "triples": [
-                    {
-                        "s": s,
-                        "p": p,
-                        "o": o,
-                    }
-                ]
-            }
+            # Create Triple object
+            # Note: The Triple dataclass has 's', 'p', 'o' fields as strings
+            # The API will handle the metadata wrapping
+            yield Triple(s=s_value, p=p_value, o=o_value)
 
-            await ws.send(json.dumps(req))
-
-    async def load_entity_contexts(self, file, ws):
-        """
-        Load entity contexts by extracting entities from the RDF graph
-        and generating contextual descriptions based on their relationships.
-        """
+    def load_entity_contexts_from_file(self, file) -> Iterator[Tuple[str, str]]:
+        """Generator that yields (entity, context) tuples from a Turtle file"""
 
         g = rdflib.Graph()
         g.parse(file, format="turtle")
 
         for s, p, o in g:
-            # If object is a URI, do nothing
+            # If object is a URI, skip (we only want literal contexts)
             if isinstance(o, rdflib.term.URIRef):
                 continue
-                
-            # If object is a literal, create entity context for subject with literal as context
+
+            # If object is a literal, create entity context for subject
             s_str = str(s)
             o_str = str(o)
-            
-            req = {
-                "metadata": {
-                    "id": self.document_id,
-                    "metadata": [],
-                    "user": self.user,
-                    "collection": self.collection
-                },
-                "entities": [
-                    {
-                        "entity": {
-                            "v": s_str,
-                            "e": True
-                        },
-                        "context": o_str
+
+            yield (s_str, o_str)
+
+    def run(self):
+        """Load triples and entity contexts using Python API"""
+
+        try:
+            # Create API client
+            api = Api(url=self.url, token=self.token)
+            bulk = api.bulk()
+
+            # Load triples from all files
+            print("Loading triples...")
+            for file in self.files:
+                print(f"  Processing {file}...")
+                triples = self.load_triples_from_file(file)
+
+                bulk.import_triples(
+                    flow=self.flow,
+                    triples=triples,
+                    metadata={
+                        "id": self.document_id,
+                        "metadata": [],
+                        "user": self.user,
+                        "collection": self.collection
                     }
-                ]
-            }
+                )
 
-            await ws.send(json.dumps(req))
+            print("Triples loaded.")
 
+            # Load entity contexts from all files
+            print("Loading entity contexts...")
+            for file in self.files:
+                print(f"  Processing {file}...")
+
+                # Convert tuples to the format expected by import_entity_contexts
+                def entity_context_generator():
+                    for entity, context in self.load_entity_contexts_from_file(file):
+                        yield {
+                            "entity": {"v": entity, "e": True},
+                            "context": context
+                        }
+
+                bulk.import_entity_contexts(
+                    flow=self.flow,
+                    entities=entity_context_generator(),
+                    metadata={
+                        "id": self.document_id,
+                        "metadata": [],
+                        "user": self.user,
+                        "collection": self.collection
+                    }
+                )
+
+            print("Entity contexts loaded.")
+
+        except Exception as e:
+            print(f"Error: {e}", flush=True)
+            raise
 
 def main():
 
@@ -142,6 +147,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-i', '--document-id',
         required=True,
@@ -166,7 +177,6 @@ def main():
         help=f'Collection ID (default: {default_collection})'
     )
 
-
     parser.add_argument(
         'files', nargs='+',
         help=f'Turtle files to load'
@@ -178,15 +188,16 @@ def main():
 
         try:
             loader = KnowledgeLoader(
-                document_id = args.document_id,
-                url = args.api_url,
-                flow = args.flow_id,
-                files = args.files,
-                user = args.user,
-                collection = args.collection,
+                document_id=args.document_id,
+                url=args.api_url,
+                token=args.token,
+                flow=args.flow_id,
+                files=args.files,
+                user=args.user,
+                collection=args.collection,
             )
 
-            asyncio.run(loader.run())
+            loader.run()
 
             print("Triples and entity contexts loaded.")
             break
@@ -199,4 +210,4 @@ def main():
         time.sleep(10)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/load_sample_documents.py b/trustgraph-cli/trustgraph/cli/load_sample_documents.py
index fd6751be..186006a8 100644
--- a/trustgraph-cli/trustgraph/cli/load_sample_documents.py
+++ b/trustgraph-cli/trustgraph/cli/load_sample_documents.py
@@ -13,6 +13,7 @@ from trustgraph.api.types import hash, Uri, Literal, Triple
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_user = 'trustgraph'
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 
 from requests.adapters import HTTPAdapter
@@ -655,10 +656,10 @@ documents = [
 class Loader:
 
     def __init__(
-            self, url, user
+            self, url, user, token=None
     ):
 
-        self.api = Api(url).library()
+        self.api = Api(url, token=token).library()
         self.user = user
 
     def load(self, documents):
@@ -719,6 +720,12 @@ def main():
         help=f'User ID (default: {default_user})'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -726,6 +733,7 @@ def main():
         p = Loader(
             url=args.url,
             user=args.user,
+            token=args.token,
         )
 
         p.load(documents)
diff --git a/trustgraph-cli/trustgraph/cli/load_structured_data.py b/trustgraph-cli/trustgraph/cli/load_structured_data.py
index 9bb9f78c..bf112417 100644
--- a/trustgraph-cli/trustgraph/cli/load_structured_data.py
+++ b/trustgraph-cli/trustgraph/cli/load_structured_data.py
@@ -22,6 +22,7 @@ import logging
 logger = logging.getLogger(__name__)
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 
 def load_structured_data(
@@ -41,7 +42,8 @@ def load_structured_data(
     user: str = 'trustgraph',
     collection: str = 'default',
     dry_run: bool = False,
-    verbose: bool = False
+    verbose: bool = False,
+    token: str = None
 ):
     """
     Load structured data using a descriptor configuration.
@@ -133,9 +135,9 @@ def load_structured_data(
                 
                 # Get batch size from descriptor
                 batch_size = descriptor.get('output', {}).get('options', {}).get('batch_size', 1000)
-                
+
                 # Send to TrustGraph using shared function
-                imported_count = _send_to_trustgraph(output_objects, api_url, flow, batch_size)
+                imported_count = _send_to_trustgraph(output_objects, api_url, flow, batch_size, token=token)
                 
                 # Summary
                 format_info = descriptor.get('format', {})
@@ -288,10 +290,10 @@ def load_structured_data(
         
         # Get batch size from descriptor or use default
         batch_size = descriptor.get('output', {}).get('options', {}).get('batch_size', 1000)
-        
+
         # Send to TrustGraph
         print(f"🚀 Importing {len(output_records)} records to TrustGraph...")
-        imported_count = _send_to_trustgraph(output_records, api_url, flow, batch_size)
+        imported_count = _send_to_trustgraph(output_records, api_url, flow, batch_size, token=token)
         
         # Get summary info from descriptor
         format_info = descriptor.get('format', {})
@@ -571,66 +573,30 @@ def _process_data_pipeline(input_file, descriptor_file, user, collection, sample
     return output_records, descriptor
 
 
-def _send_to_trustgraph(objects, api_url, flow, batch_size=1000):
-    """Send ExtractedObject records to TrustGraph using WebSocket"""
-    import json
-    import asyncio
-    from websockets.asyncio.client import connect
-    
+def _send_to_trustgraph(objects, api_url, flow, batch_size=1000, token=None):
+    """Send ExtractedObject records to TrustGraph using Python API"""
+    from trustgraph.api import Api
+
     try:
-        # Construct objects import URL similar to load_knowledge pattern
-        if not api_url.endswith("/"):
-            api_url += "/"
-        
-        # Convert HTTP URL to WebSocket URL if needed
-        ws_url = api_url.replace("http://", "ws://").replace("https://", "wss://")
-        objects_url = ws_url + f"api/v1/flow/{flow}/import/objects"
-        
-        logger.info(f"Connecting to objects import endpoint: {objects_url}")
-        
-        async def import_objects():
-            async with connect(objects_url) as ws:
-                imported_count = 0
-                
-                for record in objects:
-                    try:
-                        # Send individual ExtractedObject records
-                        await ws.send(json.dumps(record))
-                        imported_count += 1
-                        
-                        if imported_count % 100 == 0:
-                            logger.debug(f"Imported {imported_count}/{len(objects)} records...")
-                            
-                    except Exception as e:
-                        logger.error(f"Failed to send record {imported_count + 1}: {e}")
-                        print(f"❌ Failed to send record {imported_count + 1}: {e}")
-                
-                logger.info(f"Successfully imported {imported_count} records to TrustGraph")
-                return imported_count
-        
-        # Run the async import
-        imported_count = asyncio.run(import_objects())
-        
-        # Summary
         total_records = len(objects)
-        failed_count = total_records - imported_count
-        
+        logger.info(f"Importing {total_records} records to TrustGraph...")
+
+        # Use Python API bulk import
+        api = Api(api_url, token=token)
+        bulk = api.bulk()
+
+        bulk.import_objects(flow=flow, objects=iter(objects))
+
+        logger.info(f"Successfully imported {total_records} records to TrustGraph")
+
+        # Summary
         print(f"\n📊 Import Summary:")
         print(f"- Total records: {total_records}")
-        print(f"- Successfully imported: {imported_count}")
-        print(f"- Failed: {failed_count}")
-        
-        if failed_count > 0:
-            print(f"⚠️  {failed_count} records failed to import. Check logs for details.")
-        else:
-            print("✅ All records imported successfully!")
-            
-        return imported_count
-        
-    except ImportError as e:
-        logger.error(f"Failed to import required modules: {e}")
-        print(f"Error: Required modules not available - {e}")
-        raise
+        print(f"- Successfully imported: {total_records}")
+        print("✅ All records imported successfully!")
+
+        return total_records
+
     except Exception as e:
         logger.error(f"Failed to import data to TrustGraph: {e}")
         print(f"Import failed: {e}")
@@ -1024,7 +990,13 @@ For more information on the descriptor format, see:
         '--error-file',
         help='Path to write error records (optional)'
     )
-    
+
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
     
     # Input validation
@@ -1077,7 +1049,8 @@ For more information on the descriptor format, see:
             user=args.user,
             collection=args.collection,
             dry_run=args.dry_run,
-            verbose=args.verbose
+            verbose=args.verbose,
+            token=args.token
         )
     except FileNotFoundError as e:
         print(f"Error: File not found - {e}", file=sys.stderr)
diff --git a/trustgraph-cli/trustgraph/cli/load_turtle.py b/trustgraph-cli/trustgraph/cli/load_turtle.py
index c357c5d9..adb578f5 100644
--- a/trustgraph-cli/trustgraph/cli/load_turtle.py
+++ b/trustgraph-cli/trustgraph/cli/load_turtle.py
@@ -1,18 +1,18 @@
 """
-Loads triples into the knowledge graph.
+Loads triples into the knowledge graph from Turtle files.
 """
 
-import asyncio
 import argparse
 import os
 import time
 import rdflib
-import json
-from websockets.asyncio.client import connect
+from typing import Iterator
 
+from trustgraph.api import Api, Triple
 from trustgraph.log_level import LogLevel
 
-default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = 'trustgraph'
 default_collection = 'default'
 
@@ -25,67 +25,67 @@ class Loader:
             user,
             collection,
             document_id,
-            url = default_url,
+            url=default_url,
+            token=None,
     ):
-
-        if not url.endswith("/"):
-            url += "/"
-
-        url = url + f"api/v1/flow/{flow}/import/triples"
-
-        self.url = url
-
         self.files = files
+        self.flow = flow
         self.user = user
         self.collection = collection
         self.document_id = document_id
+        self.url = url
+        self.token = token
 
-    async def run(self):
-
-        try:
-
-            async with connect(self.url) as ws:
-                for file in self.files:
-                    await self.load_file(file, ws)
-
-        except Exception as e:
-            print(e, flush=True)
-
-    async def load_file(self, file, ws):
+    def load_triples_from_file(self, file) -> Iterator[Triple]:
+        """Generator that yields Triple objects from a Turtle file"""
 
         g = rdflib.Graph()
         g.parse(file, format="turtle")
 
-        def Value(value, is_uri):
-            return { "v": value, "e": is_uri }
-
-        triples = []
-
         for e in g:
-            s = Value(value=str(e[0]), is_uri=True)
-            p = Value(value=str(e[1]), is_uri=True)
-            if type(e[2]) == rdflib.term.URIRef:
-                o = Value(value=str(e[2]), is_uri=True)
+            # Extract subject, predicate, object
+            s_value = str(e[0])
+            p_value = str(e[1])
+
+            # Check if object is a URI or literal
+            if isinstance(e[2], rdflib.term.URIRef):
+                o_value = str(e[2])
             else:
-                o = Value(value=str(e[2]), is_uri=False)
+                o_value = str(e[2])
 
-            req = {
-                "metadata": {
-                    "id": self.document_id,
-                    "metadata": [],
-                    "user": self.user,
-                    "collection": self.collection
-                },
-                "triples": [
-                    {
-                        "s": s,
-                        "p": p,
-                        "o": o,
+            # Create Triple object
+            yield Triple(s=s_value, p=p_value, o=o_value)
+
+    def run(self):
+        """Load triples using Python API"""
+
+        try:
+            # Create API client
+            api = Api(url=self.url, token=self.token)
+            bulk = api.bulk()
+
+            # Load triples from all files
+            print("Loading triples...")
+            for file in self.files:
+                print(f"  Processing {file}...")
+                triples = self.load_triples_from_file(file)
+
+                bulk.import_triples(
+                    flow=self.flow,
+                    triples=triples,
+                    metadata={
+                        "id": self.document_id,
+                        "metadata": [],
+                        "user": self.user,
+                        "collection": self.collection
                     }
-                ]
-            }
+                )
 
-            await ws.send(json.dumps(req))
+            print("Triples loaded.")
+
+        except Exception as e:
+            print(f"Error: {e}", flush=True)
+            raise
 
 def main():
 
@@ -100,6 +100,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-i', '--document-id',
         required=True,
@@ -134,16 +140,17 @@ def main():
     while True:
 
         try:
-            p = Loader(
-                document_id = args.document_id,
-                url = args.api_url,
-                flow = args.flow_id,
-                files = args.files,
-                user = args.user,
-                collection = args.collection,
+            loader = Loader(
+                document_id=args.document_id,
+                url=args.api_url,
+                token=args.token,
+                flow=args.flow_id,
+                files=args.files,
+                user=args.user,
+                collection=args.collection,
             )
 
-            asyncio.run(p.run())
+            loader.run()
 
             print("File loaded.")
             break
@@ -156,4 +163,4 @@ def main():
         time.sleep(10)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/trustgraph-cli/trustgraph/cli/put_config_item.py b/trustgraph-cli/trustgraph/cli/put_config_item.py
index d48e29a7..d79864a4 100644
--- a/trustgraph-cli/trustgraph/cli/put_config_item.py
+++ b/trustgraph-cli/trustgraph/cli/put_config_item.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 from trustgraph.api.types import ConfigValue
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def put_config_item(url, config_type, key, value):
+def put_config_item(url, config_type, key, value, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     config_value = ConfigValue(type=config_type, key=key, value=value)
     api.put([config_value])
@@ -56,6 +57,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -70,6 +77,7 @@ def main():
             config_type=args.type,
             key=args.key,
             value=value,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/put_flow_class.py b/trustgraph-cli/trustgraph/cli/put_flow_class.py
index 5b4bc44b..6a88421d 100644
--- a/trustgraph-cli/trustgraph/cli/put_flow_class.py
+++ b/trustgraph-cli/trustgraph/cli/put_flow_class.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def put_flow_class(url, class_name, config):
+def put_flow_class(url, class_name, config, token=None):
 
-    api = Api(url)
+    api = Api(url, token=token)
 
     class_names = api.flow().put_class(class_name, config)
 
@@ -29,6 +30,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-n', '--class-name',
         help=f'Flow class name',
@@ -47,6 +54,7 @@ def main():
             url=args.api_url,
             class_name=args.class_name,
             config=json.loads(args.config),
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/put_kg_core.py b/trustgraph-cli/trustgraph/cli/put_kg_core.py
index 6374e2f6..cd0738fe 100644
--- a/trustgraph-cli/trustgraph/cli/put_kg_core.py
+++ b/trustgraph-cli/trustgraph/cli/put_kg_core.py
@@ -13,6 +13,7 @@ import msgpack
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
 default_user = 'trustgraph'
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def read_message(unpacked, id, user):
     
@@ -47,13 +48,16 @@ def read_message(unpacked, id, user):
     else:
         raise RuntimeError("Unpacked unexpected messsage type", unpacked[0])
 
-async def put(url, user, id, input):
+async def put(url, user, id, input, token=None):
 
     if not url.endswith("/"):
         url += "/"
 
     url = url + "api/v1/socket"
 
+    if token:
+        url = f"{url}?token={token}"
+
     async with connect(url) as ws:
 
 
@@ -160,6 +164,12 @@ def main():
         help=f'Input file'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -170,6 +180,7 @@ def main():
                 user = args.user,
                 id = args.id,
                 input = args.input,
+                token = args.token,
             )
         )
 
diff --git a/trustgraph-cli/trustgraph/cli/remove_library_document.py b/trustgraph-cli/trustgraph/cli/remove_library_document.py
index f6e6813c..07a1fd59 100644
--- a/trustgraph-cli/trustgraph/cli/remove_library_document.py
+++ b/trustgraph-cli/trustgraph/cli/remove_library_document.py
@@ -10,11 +10,12 @@ from trustgraph.api import Api
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_user = 'trustgraph'
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 
-def remove_doc(url, user, id):
+def remove_doc(url, user, id, token=None):
 
-    api = Api(url).library()
+    api = Api(url, token=token).library()
 
     api.remove_document(user=user, id=id)
 
@@ -43,11 +44,17 @@ def main():
         help=f'Document ID'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
-        remove_doc(args.url, args.user, args.identifier)
+        remove_doc(args.url, args.user, args.identifier, token=args.token)
 
     except Exception as e:
 
diff --git a/trustgraph-cli/trustgraph/cli/set_collection.py b/trustgraph-cli/trustgraph/cli/set_collection.py
index e987c4c8..1a39eb08 100644
--- a/trustgraph-cli/trustgraph/cli/set_collection.py
+++ b/trustgraph-cli/trustgraph/cli/set_collection.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_user = "trustgraph"
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def set_collection(url, user, collection, name, description, tags):
+def set_collection(url, user, collection, name, description, tags, token=None):
 
-    api = Api(url).collection()
+    api = Api(url, token=token).collection()
 
     result = api.update_collection(
         user=user,
@@ -82,6 +83,12 @@ def main():
         help='Collection tags (can be specified multiple times)'
     )
 
+    parser.add_argument(
+        '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -92,7 +99,8 @@ def main():
             collection = args.collection,
             name = args.name,
             description = args.description,
-            tags = args.tags
+            tags = args.tags,
+            token = args.token
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/set_mcp_tool.py b/trustgraph-cli/trustgraph/cli/set_mcp_tool.py
index 05e3823c..7976adbc 100644
--- a/trustgraph-cli/trustgraph/cli/set_mcp_tool.py
+++ b/trustgraph-cli/trustgraph/cli/set_mcp_tool.py
@@ -20,6 +20,7 @@ import textwrap
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def set_mcp_tool(
         url : str,
@@ -27,9 +28,10 @@ def set_mcp_tool(
         remote_name : str,
         tool_url : str,
         auth_token : str = None,
+        token : str = None,
 ):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     # Build the MCP tool configuration
     config = {
@@ -72,6 +74,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-i', '--id',
         required=True,
@@ -116,7 +124,8 @@ def main():
             id=args.id,
             remote_name=remote_name,
             tool_url=args.tool_url,
-            auth_token=args.auth_token
+            auth_token=args.auth_token,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/set_prompt.py b/trustgraph-cli/trustgraph/cli/set_prompt.py
index f287a9cc..bffc2cf2 100644
--- a/trustgraph-cli/trustgraph/cli/set_prompt.py
+++ b/trustgraph-cli/trustgraph/cli/set_prompt.py
@@ -10,10 +10,11 @@ import tabulate
 import textwrap
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def set_system(url, system):
+def set_system(url, system, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     api.put([
         ConfigValue(type="prompt", key="system", value=json.dumps(system))
@@ -21,9 +22,9 @@ def set_system(url, system):
 
     print("System prompt set.")
 
-def set_prompt(url, id, prompt, response, schema):
+def set_prompt(url, id, prompt, response, schema, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     values = api.get([
         ConfigKey(type="prompt", key="template-index")
@@ -71,6 +72,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '--id',
         help=f'Prompt ID',
@@ -103,9 +110,9 @@ def main():
         if args.system:
             if args.id or args.prompt or args.schema or args.response:
                 raise RuntimeError("Can't use --system with other args")
-                
+
             set_system(
-                url=args.api_url, system=args.system
+                url=args.api_url, system=args.system, token=args.token
             )
 
         else:
@@ -130,7 +137,7 @@ def main():
 
             set_prompt(
                 url=args.api_url, id=args.id, prompt=args.prompt,
-                response=args.response, schema=schobj
+                response=args.response, schema=schobj, token=args.token
             )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/set_token_costs.py b/trustgraph-cli/trustgraph/cli/set_token_costs.py
index 87a4e264..19b8c703 100644
--- a/trustgraph-cli/trustgraph/cli/set_token_costs.py
+++ b/trustgraph-cli/trustgraph/cli/set_token_costs.py
@@ -10,10 +10,11 @@ import tabulate
 import textwrap
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def set_costs(api_url, model, input_costs, output_costs):
+def set_costs(api_url, model, input_costs, output_costs, token=None):
 
-    api = Api(api_url).config()
+    api = Api(api_url, token=token).config()
 
     api.put([
         ConfigValue(
@@ -95,6 +96,12 @@ def main():
         help=f'Input costs in $ per 1M tokens',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
diff --git a/trustgraph-cli/trustgraph/cli/set_tool.py b/trustgraph-cli/trustgraph/cli/set_tool.py
index 2174c79b..36701a8e 100644
--- a/trustgraph-cli/trustgraph/cli/set_tool.py
+++ b/trustgraph-cli/trustgraph/cli/set_tool.py
@@ -26,6 +26,7 @@ import textwrap
 import dataclasses
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 @dataclasses.dataclass
 class Argument:
@@ -67,9 +68,10 @@ def set_tool(
         group : List[str],
         state : str,
         applicable_states : List[str],
+        token : str = None,
 ):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     values = api.get([
         ConfigKey(type="agent", key="tool-index")
@@ -156,6 +158,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '--id',
         help=f'Unique tool identifier',
@@ -257,6 +265,7 @@ def main():
             group=args.group,
             state=args.state,
             applicable_states=args.applicable_states,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_config.py b/trustgraph-cli/trustgraph/cli/show_config.py
index 03b2636a..6f426533 100644
--- a/trustgraph-cli/trustgraph/cli/show_config.py
+++ b/trustgraph-cli/trustgraph/cli/show_config.py
@@ -8,10 +8,11 @@ from trustgraph.api import Api
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_config(url):
+def show_config(url, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     config, version = api.all()
 
@@ -31,12 +32,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_config(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_flow_classes.py b/trustgraph-cli/trustgraph/cli/show_flow_classes.py
index d9ce96a7..123f5380 100644
--- a/trustgraph-cli/trustgraph/cli/show_flow_classes.py
+++ b/trustgraph-cli/trustgraph/cli/show_flow_classes.py
@@ -9,6 +9,7 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def format_parameters(params_metadata, config_api):
     """
@@ -57,9 +58,9 @@ def format_parameters(params_metadata, config_api):
 
     return "\n".join(param_list)
 
-def show_flow_classes(url):
+def show_flow_classes(url, token=None):
 
-    api = Api(url)
+    api = Api(url, token=token)
     flow_api = api.flow()
     config_api = api.config()
 
@@ -106,12 +107,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_flow_classes(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_flow_state.py b/trustgraph-cli/trustgraph/cli/show_flow_state.py
index ca6d2b1d..6ca4df8f 100644
--- a/trustgraph-cli/trustgraph/cli/show_flow_state.py
+++ b/trustgraph-cli/trustgraph/cli/show_flow_state.py
@@ -9,10 +9,11 @@ import os
 
 default_metrics_url = "http://localhost:8088/api/metrics"
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def dump_status(metrics_url, api_url, flow_id):
+def dump_status(metrics_url, api_url, flow_id, token=None):
 
-    api = Api(api_url).flow()
+    api = Api(api_url, token=token).flow()
 
     flow = api.get(flow_id)
     class_name = flow["class-name"]
@@ -77,11 +78,17 @@ def main():
         help=f'Metrics URL (default: {default_metrics_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
-        dump_status(args.metrics_url, args.api_url, args.flow_id)
+        dump_status(args.metrics_url, args.api_url, args.flow_id, token=args.token)
 
     except Exception as e:
 
diff --git a/trustgraph-cli/trustgraph/cli/show_flows.py b/trustgraph-cli/trustgraph/cli/show_flows.py
index 18c1234e..b383ff56 100644
--- a/trustgraph-cli/trustgraph/cli/show_flows.py
+++ b/trustgraph-cli/trustgraph/cli/show_flows.py
@@ -9,6 +9,7 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def get_interface(config_api, i):
 
@@ -128,9 +129,9 @@ def format_parameters(flow_params, class_params_metadata, config_api):
 
     return "\n".join(param_list) if param_list else "None"
 
-def show_flows(url):
+def show_flows(url, token=None):
 
-    api = Api(url)
+    api = Api(url, token=token)
     config_api = api.config()
     flow_api = api.flow()
 
@@ -199,12 +200,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_flows(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_graph.py b/trustgraph-cli/trustgraph/cli/show_graph.py
index 232ebb34..b5b15e3c 100644
--- a/trustgraph-cli/trustgraph/cli/show_graph.py
+++ b/trustgraph-cli/trustgraph/cli/show_graph.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_user = 'trustgraph'
 default_collection = 'default'
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_graph(url, flow_id, user, collection):
+def show_graph(url, flow_id, user, collection, token=None):
 
-    api = Api(url).flow().id(flow_id)
+    api = Api(url, token=token).flow().id(flow_id)
 
     rows = api.triples_query(
         user=user, collection=collection,
@@ -53,6 +54,12 @@ def main():
         help=f'Collection ID (default: {default_collection})'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
@@ -62,6 +69,7 @@ def main():
             flow_id = args.flow_id,
             user = args.user,
             collection = args.collection,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_kg_cores.py b/trustgraph-cli/trustgraph/cli/show_kg_cores.py
index e3cf9eb4..ea295543 100644
--- a/trustgraph-cli/trustgraph/cli/show_kg_cores.py
+++ b/trustgraph-cli/trustgraph/cli/show_kg_cores.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_cores(url, user):
+def show_cores(url, user, token=None):
 
-    api = Api(url).knowledge()
+    api = Api(url, token=token).knowledge()
 
     ids = api.list_kg_cores()
 
@@ -35,6 +36,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-U', '--user',
         default="trustgraph",
@@ -46,7 +53,9 @@ def main():
     try:
 
         show_cores(
-            url=args.api_url, user=args.user
+            url=args.api_url,
+            user=args.user,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_library_documents.py b/trustgraph-cli/trustgraph/cli/show_library_documents.py
index b086238d..6eeceb70 100644
--- a/trustgraph-cli/trustgraph/cli/show_library_documents.py
+++ b/trustgraph-cli/trustgraph/cli/show_library_documents.py
@@ -9,11 +9,12 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = "trustgraph"
 
-def show_docs(url, user):
+def show_docs(url, user, token=None):
 
-    api = Api(url).library()
+    api = Api(url, token=token).library()
 
     docs = api.get_documents(user=user)
 
@@ -52,6 +53,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-U', '--user',
         default=default_user,
@@ -63,7 +70,9 @@ def main():
     try:
 
         show_docs(
-            url = args.api_url, user = args.user
+            url = args.api_url,
+            user = args.user,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_library_processing.py b/trustgraph-cli/trustgraph/cli/show_library_processing.py
index 51dbe865..9ab69355 100644
--- a/trustgraph-cli/trustgraph/cli/show_library_processing.py
+++ b/trustgraph-cli/trustgraph/cli/show_library_processing.py
@@ -9,10 +9,11 @@ import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_user = "trustgraph"
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_procs(url, user):
+def show_procs(url, user, token=None):
 
-    api = Api(url).library()
+    api = Api(url, token=token).library()
 
     procs = api.get_processings(user = user)
 
@@ -57,12 +58,18 @@ def main():
         help=f'User ID (default: {default_user})'
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_procs(
-            url = args.api_url, user = args.user
+            url = args.api_url, user = args.user, token = args.token
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_mcp_tools.py b/trustgraph-cli/trustgraph/cli/show_mcp_tools.py
index da0154ed..24cbfcfe 100644
--- a/trustgraph-cli/trustgraph/cli/show_mcp_tools.py
+++ b/trustgraph-cli/trustgraph/cli/show_mcp_tools.py
@@ -10,10 +10,11 @@ import tabulate
 import textwrap
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_config(url):
+def show_config(url, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     values = api.get_values(type="mcp")
 
@@ -57,12 +58,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_config(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_parameter_types.py b/trustgraph-cli/trustgraph/cli/show_parameter_types.py
index 606c5016..e5b842b5 100644
--- a/trustgraph-cli/trustgraph/cli/show_parameter_types.py
+++ b/trustgraph-cli/trustgraph/cli/show_parameter_types.py
@@ -13,6 +13,7 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
 def format_enum_values(enum_list):
     """
@@ -75,11 +76,11 @@ def format_constraints(param_type_def):
 
     return ", ".join(constraints) if constraints else "None"
 
-def show_parameter_types(url):
+def show_parameter_types(url, token=None):
     """
     Show all parameter type definitions
     """
-    api = Api(url)
+    api = Api(url, token=token)
     config_api = api.config()
 
     # Get list of all parameter types
@@ -145,6 +146,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-t', '--type',
         help='Show only the specified parameter type',
@@ -155,19 +162,19 @@ def main():
     try:
         if args.type:
             # Show specific parameter type
-            show_specific_parameter_type(args.api_url, args.type)
+            show_specific_parameter_type(args.api_url, args.type, args.token)
         else:
             # Show all parameter types
-            show_parameter_types(args.api_url)
+            show_parameter_types(args.api_url, args.token)
 
     except Exception as e:
         print("Exception:", e, flush=True)
 
-def show_specific_parameter_type(url, param_type_name):
+def show_specific_parameter_type(url, param_type_name, token=None):
     """
     Show a specific parameter type definition
     """
-    api = Api(url)
+    api = Api(url, token=token)
     config_api = api.config()
 
     try:
diff --git a/trustgraph-cli/trustgraph/cli/show_prompts.py b/trustgraph-cli/trustgraph/cli/show_prompts.py
index 4c2ca4d7..0e1cb2ae 100644
--- a/trustgraph-cli/trustgraph/cli/show_prompts.py
+++ b/trustgraph-cli/trustgraph/cli/show_prompts.py
@@ -10,10 +10,11 @@ import tabulate
 import textwrap
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_config(url):
+def show_config(url, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     values = api.get([
         ConfigKey(type="prompt", key="system"),
@@ -78,12 +79,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_config(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_token_costs.py b/trustgraph-cli/trustgraph/cli/show_token_costs.py
index 2f889eef..9e7c352a 100644
--- a/trustgraph-cli/trustgraph/cli/show_token_costs.py
+++ b/trustgraph-cli/trustgraph/cli/show_token_costs.py
@@ -12,10 +12,11 @@ import textwrap
 tabulate.PRESERVE_WHITESPACE = True
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_config(url):
+def show_config(url, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     models = api.list("token-costs")
 
@@ -61,12 +62,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_config(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/show_tools.py b/trustgraph-cli/trustgraph/cli/show_tools.py
index ce79fffc..b8c9a012 100644
--- a/trustgraph-cli/trustgraph/cli/show_tools.py
+++ b/trustgraph-cli/trustgraph/cli/show_tools.py
@@ -17,10 +17,11 @@ import tabulate
 import textwrap
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def show_config(url):
+def show_config(url, token=None):
 
-    api = Api(url).config()
+    api = Api(url, token=token).config()
 
     values = api.get_values(type="tool")
 
@@ -100,12 +101,19 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     args = parser.parse_args()
 
     try:
 
         show_config(
             url=args.api_url,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/start_flow.py b/trustgraph-cli/trustgraph/cli/start_flow.py
index fa9ce6a8..4f9954b0 100644
--- a/trustgraph-cli/trustgraph/cli/start_flow.py
+++ b/trustgraph-cli/trustgraph/cli/start_flow.py
@@ -17,10 +17,11 @@ from trustgraph.api import Api
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def start_flow(url, class_name, flow_id, description, parameters=None):
+def start_flow(url, class_name, flow_id, description, parameters=None, token=None):
 
-    api = Api(url).flow()
+    api = Api(url, token=token).flow()
 
     api.start(
         class_name = class_name,
@@ -42,6 +43,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-n', '--class-name',
         required=True,
@@ -112,6 +119,7 @@ def main():
             flow_id = args.flow_id,
             description = args.description,
             parameters = parameters,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/start_library_processing.py b/trustgraph-cli/trustgraph/cli/start_library_processing.py
index 3619628c..ff87ea9f 100644
--- a/trustgraph-cli/trustgraph/cli/start_library_processing.py
+++ b/trustgraph-cli/trustgraph/cli/start_library_processing.py
@@ -9,13 +9,14 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = "trustgraph"
 
 def start_processing(
-        url, user, document_id, id, flow, collection, tags
+        url, user, document_id, id, flow, collection, tags, token=None
 ):
 
-    api = Api(url).library()
+    api = Api(url, token=token).library()
 
     if tags:
         tags = tags.split(",")
@@ -44,6 +45,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-U', '--user',
         default=default_user,
@@ -90,7 +97,8 @@ def main():
             id = args.id,
             flow = args.flow_id,
             collection = args.collection,
-            tags = args.tags
+            tags = args.tags,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/stop_flow.py b/trustgraph-cli/trustgraph/cli/stop_flow.py
index a5107579..ae3a0415 100644
--- a/trustgraph-cli/trustgraph/cli/stop_flow.py
+++ b/trustgraph-cli/trustgraph/cli/stop_flow.py
@@ -9,10 +9,11 @@ from trustgraph.api import Api
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def stop_flow(url, flow_id):
+def stop_flow(url, flow_id, token=None):
 
-    api = Api(url).flow()
+    api = Api(url, token=token).flow()
 
     api.stop(id = flow_id)
 
@@ -29,6 +30,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-i', '--flow-id',
         required=True,
@@ -42,6 +49,7 @@ def main():
         stop_flow(
             url=args.api_url,
             flow_id=args.flow_id,
+            token=args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/stop_library_processing.py b/trustgraph-cli/trustgraph/cli/stop_library_processing.py
index 638ab71c..3d8a2c56 100644
--- a/trustgraph-cli/trustgraph/cli/stop_library_processing.py
+++ b/trustgraph-cli/trustgraph/cli/stop_library_processing.py
@@ -10,13 +10,14 @@ from trustgraph.api import Api, ConfigKey
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_user = "trustgraph"
 
 def stop_processing(
-        url, user, id
+        url, user, id, token=None
 ):
 
-    api = Api(url).library()
+    api = Api(url, token=token).library()
 
     api.stop_processing(user = user, id = id)
 
@@ -33,6 +34,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-U', '--user',
         default=default_user,
@@ -53,6 +60,7 @@ def main():
             url = args.api_url,
             user = args.user,
             id = args.id,
+            token = args.token,
         )
 
     except Exception as e:
diff --git a/trustgraph-cli/trustgraph/cli/unload_kg_core.py b/trustgraph-cli/trustgraph/cli/unload_kg_core.py
index 079766d2..47f811f3 100644
--- a/trustgraph-cli/trustgraph/cli/unload_kg_core.py
+++ b/trustgraph-cli/trustgraph/cli/unload_kg_core.py
@@ -11,12 +11,13 @@ from trustgraph.api import Api
 import json
 
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_flow = "default"
 default_collection = "default"
 
-def unload_kg_core(url, user, id, flow):
+def unload_kg_core(url, user, id, flow, token=None):
 
-    api = Api(url).knowledge()
+    api = Api(url, token=token).knowledge()
 
     class_names = api.unload_kg_core(user = user, id = id, flow=flow)
 
@@ -33,6 +34,12 @@ def main():
         help=f'API URL (default: {default_url})',
     )
 
+    parser.add_argument(
+        '-t', '--token',
+        default=default_token,
+        help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
+    )
+
     parser.add_argument(
         '-U', '--user',
         default="trustgraph",
@@ -60,6 +67,7 @@ def main():
             user=args.user,
             id=args.id,
             flow=args.flow_id,
+            token=args.token,
         )
 
     except Exception as e: