mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-30 17:09:38 +02:00
Merge remote-tracking branch 'origin/master' into ts-port-effect-v4
This commit is contained in:
commit
92dae8c374
117 changed files with 7392 additions and 3410 deletions
2
.github/workflows/pull-request.yaml
vendored
2
.github/workflows/pull-request.yaml
vendored
|
|
@ -22,7 +22,7 @@ jobs:
|
|||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup packages
|
||||
run: make update-package-versions VERSION=2.4.999
|
||||
run: make update-package-versions VERSION=2.5.999
|
||||
|
||||
- name: Setup environment
|
||||
run: python3 -m venv env
|
||||
|
|
|
|||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -16,4 +16,7 @@ trustgraph-vertexai/trustgraph/vertexai_version.py
|
|||
trustgraph-unstructured/trustgraph/unstructured_version.py
|
||||
trustgraph-mcp/trustgraph/mcp_version.py
|
||||
trustgraph/trustgraph/trustgraph_version.py
|
||||
vertexai/
|
||||
vertexai/
|
||||
venv/
|
||||
.venv/
|
||||
.env
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ RUN pip3 install --no-cache-dir \
|
|||
langchain==1.2.16 langchain-core==1.3.2 langchain-huggingface==1.2.2 \
|
||||
langchain-community==0.4.1 \
|
||||
sentence-transformers==5.4.1 transformers==5.7.0 \
|
||||
huggingface-hub==1.13.0 \
|
||||
huggingface-hub==1.13.0 click \
|
||||
pulsar-client==3.11.0
|
||||
|
||||
# Most commonly used embeddings model, just build it into the container
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ BUCKET_URL = "https://storage.googleapis.com/trustgraph-library"
|
|||
INDEX_URL = f"{BUCKET_URL}/index.json"
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
|
||||
default_user = "trustgraph"
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ def convert_metadata(metadata_json):
|
|||
return triples
|
||||
|
||||
|
||||
def load_document(api, user, doc_entry):
|
||||
def load_document(api, doc_entry):
|
||||
"""Fetch metadata and content for a document, then load into TrustGraph."""
|
||||
doc_id = doc_entry["id"]
|
||||
title = doc_entry["title"]
|
||||
|
|
@ -133,7 +133,6 @@ def load_document(api, user, doc_entry):
|
|||
api.add_document(
|
||||
id=doc["id"],
|
||||
metadata=metadata,
|
||||
user=user,
|
||||
kind=doc["kind"],
|
||||
title=doc["title"],
|
||||
comments=doc["comments"],
|
||||
|
|
@ -144,12 +143,12 @@ def load_document(api, user, doc_entry):
|
|||
print(f" done.")
|
||||
|
||||
|
||||
def load_documents(api, user, docs):
|
||||
def load_documents(api, docs):
|
||||
"""Load a list of documents."""
|
||||
print(f"Loading {len(docs)} document(s)...\n")
|
||||
for doc in docs:
|
||||
try:
|
||||
load_document(api, user, doc)
|
||||
load_document(api, doc)
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}", file=sys.stderr)
|
||||
print()
|
||||
|
|
@ -166,8 +165,8 @@ def main():
|
|||
help=f"TrustGraph API URL (default: {default_url})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-U", "--user", default=default_user,
|
||||
help=f"User ID (default: {default_user})",
|
||||
"-w", "--workspace", default=default_workspace,
|
||||
help=f"Workspace (default: {default_workspace})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--token", default=default_token,
|
||||
|
|
@ -212,22 +211,22 @@ def main():
|
|||
return
|
||||
|
||||
# Load commands need the API
|
||||
api = Api(args.url, token=args.token).library()
|
||||
api = Api(args.url, token=args.token, workspace=args.workspace).library()
|
||||
|
||||
if args.command == "load-all":
|
||||
load_documents(api, args.user, index)
|
||||
load_documents(api, index)
|
||||
|
||||
elif args.command == "load-doc":
|
||||
matches = [d for d in index if str(d.get("id")) == args.id]
|
||||
if not matches:
|
||||
print(f"No document with ID '{args.id}' found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
load_documents(api, args.user, matches)
|
||||
load_documents(api, matches)
|
||||
|
||||
elif args.command == "load-match":
|
||||
results = search_index(index, args.query)
|
||||
if results:
|
||||
load_documents(api, args.user, results)
|
||||
load_documents(api, results)
|
||||
else:
|
||||
print("No matches found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -3,208 +3,278 @@
|
|||
WebSocket Relay Test Harness
|
||||
|
||||
This script creates a relay server with two WebSocket endpoints:
|
||||
- /in - for test clients to connect to
|
||||
- /out - for reverse gateway to connect to
|
||||
- /in - for test clients to connect to (speaks api-gateway protocol)
|
||||
- /out - for reverse gateway to connect to (speaks rev-gateway protocol)
|
||||
|
||||
Messages are bidirectionally relayed between the two connections.
|
||||
Clients on /in authenticate with a first-frame auth message:
|
||||
{"type": "auth", "token": "..."}
|
||||
|
||||
The relay stores the token and injects it into each subsequent message
|
||||
before forwarding to /out. Responses from /out are forwarded back to
|
||||
the originating /in connection unchanged.
|
||||
|
||||
Usage:
|
||||
python websocket_relay.py [--port PORT] [--host HOST]
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
from aiohttp import web, WSMsgType
|
||||
import weakref
|
||||
from typing import Optional, Set
|
||||
from typing import Dict, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("websocket_relay")
|
||||
|
||||
|
||||
class InConnection:
|
||||
def __init__(self, ws, conn_id):
|
||||
self.ws = ws
|
||||
self.conn_id = conn_id
|
||||
self.token: Optional[str] = None
|
||||
self.authenticated = False
|
||||
|
||||
|
||||
class WebSocketRelay:
|
||||
"""WebSocket relay that forwards messages between 'in' and 'out' connections"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.in_connections: Set = weakref.WeakSet()
|
||||
self.out_connections: Set = weakref.WeakSet()
|
||||
|
||||
self.in_connections: Dict[str, InConnection] = {}
|
||||
self.out_connections: set = set()
|
||||
self._conn_counter = 0
|
||||
|
||||
def _next_conn_id(self):
|
||||
self._conn_counter += 1
|
||||
return f"conn-{self._conn_counter}"
|
||||
|
||||
async def handle_in_connection(self, request):
|
||||
"""Handle incoming connections on /in endpoint"""
|
||||
ws = web.WebSocketResponse()
|
||||
await ws.prepare(request)
|
||||
|
||||
self.in_connections.add(ws)
|
||||
logger.info(f"New 'in' connection. Total in: {len(self.in_connections)}, out: {len(self.out_connections)}")
|
||||
|
||||
|
||||
conn_id = self._next_conn_id()
|
||||
conn = InConnection(ws, conn_id)
|
||||
self.in_connections[conn_id] = conn
|
||||
logger.info(
|
||||
f"New 'in' connection {conn_id}. "
|
||||
f"Total in: {len(self.in_connections)}, "
|
||||
f"out: {len(self.out_connections)}"
|
||||
)
|
||||
|
||||
try:
|
||||
async for msg in ws:
|
||||
if msg.type == WSMsgType.TEXT:
|
||||
data = msg.data
|
||||
logger.info(f"IN → OUT: {data}")
|
||||
await self._forward_to_out(data)
|
||||
elif msg.type == WSMsgType.BINARY:
|
||||
data = msg.data
|
||||
logger.info(f"IN → OUT: {len(data)} bytes (binary)")
|
||||
await self._forward_to_out(data, binary=True)
|
||||
await self._handle_in_message(conn, msg.data)
|
||||
elif msg.type == WSMsgType.ERROR:
|
||||
logger.error(f"WebSocket error on 'in' connection: {ws.exception()}")
|
||||
logger.error(
|
||||
f"WebSocket error on 'in' connection "
|
||||
f"{conn_id}: {ws.exception()}"
|
||||
)
|
||||
break
|
||||
else:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in 'in' connection handler: {e}")
|
||||
logger.error(
|
||||
f"Error in 'in' connection {conn_id}: {e}"
|
||||
)
|
||||
finally:
|
||||
logger.info(f"'in' connection closed. Remaining in: {len(self.in_connections)}, out: {len(self.out_connections)}")
|
||||
|
||||
del self.in_connections[conn_id]
|
||||
logger.info(
|
||||
f"'in' connection {conn_id} closed. "
|
||||
f"Remaining in: {len(self.in_connections)}, "
|
||||
f"out: {len(self.out_connections)}"
|
||||
)
|
||||
|
||||
return ws
|
||||
|
||||
async def handle_out_connection(self, request):
|
||||
"""Handle outgoing connections on /out endpoint"""
|
||||
ws = web.WebSocketResponse()
|
||||
await ws.prepare(request)
|
||||
|
||||
self.out_connections.add(ws)
|
||||
logger.info(f"New 'out' connection. Total in: {len(self.in_connections)}, out: {len(self.out_connections)}")
|
||||
|
||||
|
||||
async def _handle_in_message(self, conn, data):
|
||||
try:
|
||||
async for msg in ws:
|
||||
if msg.type == WSMsgType.TEXT:
|
||||
data = msg.data
|
||||
logger.info(f"OUT → IN: {data}")
|
||||
await self._forward_to_in(data)
|
||||
elif msg.type == WSMsgType.BINARY:
|
||||
data = msg.data
|
||||
logger.info(f"OUT → IN: {len(data)} bytes (binary)")
|
||||
await self._forward_to_in(data, binary=True)
|
||||
elif msg.type == WSMsgType.ERROR:
|
||||
logger.error(f"WebSocket error on 'out' connection: {ws.exception()}")
|
||||
break
|
||||
else:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in 'out' connection handler: {e}")
|
||||
finally:
|
||||
logger.info(f"'out' connection closed. Remaining in: {len(self.in_connections)}, out: {len(self.out_connections)}")
|
||||
|
||||
return ws
|
||||
|
||||
async def _forward_to_out(self, data, binary=False):
|
||||
"""Forward message from 'in' to all 'out' connections"""
|
||||
if not self.out_connections:
|
||||
logger.warning("No 'out' connections available to forward message")
|
||||
message = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"{conn.conn_id}: received non-JSON message"
|
||||
)
|
||||
return
|
||||
|
||||
closed_connections = []
|
||||
|
||||
if isinstance(message, dict) and message.get("type") == "auth":
|
||||
conn.token = message.get("token", "")
|
||||
conn.authenticated = True
|
||||
logger.info(f"{conn.conn_id}: authenticated")
|
||||
await conn.ws.send_json({
|
||||
"type": "auth-ok",
|
||||
"workspace": "relayed",
|
||||
})
|
||||
return
|
||||
|
||||
if not conn.authenticated:
|
||||
await conn.ws.send_json({
|
||||
"error": {
|
||||
"message": "auth required",
|
||||
"type": "auth-required",
|
||||
},
|
||||
"complete": True,
|
||||
})
|
||||
return
|
||||
|
||||
message["token"] = conn.token
|
||||
message["_relay_conn"] = conn.conn_id
|
||||
|
||||
forwarded = json.dumps(message)
|
||||
logger.info(f"IN {conn.conn_id} → OUT: {forwarded}")
|
||||
await self._forward_to_out(forwarded)
|
||||
|
||||
async def handle_out_connection(self, request):
|
||||
ws = web.WebSocketResponse()
|
||||
await ws.prepare(request)
|
||||
|
||||
self.out_connections.add(ws)
|
||||
logger.info(
|
||||
f"New 'out' connection. "
|
||||
f"Total in: {len(self.in_connections)}, "
|
||||
f"out: {len(self.out_connections)}"
|
||||
)
|
||||
|
||||
try:
|
||||
async for msg in ws:
|
||||
if msg.type == WSMsgType.TEXT:
|
||||
await self._handle_out_message(msg.data)
|
||||
elif msg.type == WSMsgType.ERROR:
|
||||
logger.error(
|
||||
f"WebSocket error on 'out' connection: "
|
||||
f"{ws.exception()}"
|
||||
)
|
||||
break
|
||||
else:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in 'out' connection: {e}")
|
||||
finally:
|
||||
self.out_connections.discard(ws)
|
||||
logger.info(
|
||||
f"'out' connection closed. "
|
||||
f"Remaining in: {len(self.in_connections)}, "
|
||||
f"out: {len(self.out_connections)}"
|
||||
)
|
||||
|
||||
return ws
|
||||
|
||||
async def _handle_out_message(self, data):
|
||||
try:
|
||||
message = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("OUT: received non-JSON message")
|
||||
return
|
||||
|
||||
conn_id = message.pop("_relay_conn", None)
|
||||
|
||||
forwarded = json.dumps(message)
|
||||
logger.info(f"OUT → IN {conn_id or 'broadcast'}: {forwarded}")
|
||||
|
||||
if conn_id and conn_id in self.in_connections:
|
||||
conn = self.in_connections[conn_id]
|
||||
try:
|
||||
if not conn.ws.closed:
|
||||
await conn.ws.send_str(forwarded)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error forwarding to 'in' {conn_id}: {e}"
|
||||
)
|
||||
else:
|
||||
await self._broadcast_to_in(forwarded)
|
||||
|
||||
async def _broadcast_to_in(self, data):
|
||||
closed = []
|
||||
for conn_id, conn in list(self.in_connections.items()):
|
||||
try:
|
||||
if conn.ws.closed:
|
||||
closed.append(conn_id)
|
||||
continue
|
||||
await conn.ws.send_str(data)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error broadcasting to 'in' {conn_id}: {e}"
|
||||
)
|
||||
closed.append(conn_id)
|
||||
for conn_id in closed:
|
||||
self.in_connections.pop(conn_id, None)
|
||||
|
||||
async def _forward_to_out(self, data):
|
||||
closed = []
|
||||
for ws in list(self.out_connections):
|
||||
try:
|
||||
if ws.closed:
|
||||
closed_connections.append(ws)
|
||||
closed.append(ws)
|
||||
continue
|
||||
|
||||
if binary:
|
||||
await ws.send_bytes(data)
|
||||
else:
|
||||
await ws.send_str(data)
|
||||
await ws.send_str(data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error forwarding to 'out' connection: {e}")
|
||||
closed_connections.append(ws)
|
||||
|
||||
# Clean up closed connections
|
||||
for ws in closed_connections:
|
||||
if ws in self.out_connections:
|
||||
self.out_connections.discard(ws)
|
||||
|
||||
async def _forward_to_in(self, data, binary=False):
|
||||
"""Forward message from 'out' to all 'in' connections"""
|
||||
if not self.in_connections:
|
||||
logger.warning("No 'in' connections available to forward message")
|
||||
return
|
||||
|
||||
closed_connections = []
|
||||
for ws in list(self.in_connections):
|
||||
try:
|
||||
if ws.closed:
|
||||
closed_connections.append(ws)
|
||||
continue
|
||||
|
||||
if binary:
|
||||
await ws.send_bytes(data)
|
||||
else:
|
||||
await ws.send_str(data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error forwarding to 'in' connection: {e}")
|
||||
closed_connections.append(ws)
|
||||
|
||||
# Clean up closed connections
|
||||
for ws in closed_connections:
|
||||
if ws in self.in_connections:
|
||||
self.in_connections.discard(ws)
|
||||
logger.error(f"Error forwarding to 'out': {e}")
|
||||
closed.append(ws)
|
||||
for ws in closed:
|
||||
self.out_connections.discard(ws)
|
||||
|
||||
|
||||
async def create_app(relay):
|
||||
"""Create the web application with routes"""
|
||||
app = web.Application()
|
||||
|
||||
# Add routes
|
||||
app.router.add_get('/in', relay.handle_in_connection)
|
||||
|
||||
app.router.add_get('/in/api/v1/socket', relay.handle_in_connection)
|
||||
app.router.add_get('/out', relay.handle_out_connection)
|
||||
|
||||
# Add a simple status endpoint
|
||||
|
||||
async def status(request):
|
||||
status_info = {
|
||||
return web.json_response({
|
||||
'in_connections': len(relay.in_connections),
|
||||
'out_connections': len(relay.out_connections),
|
||||
'status': 'running'
|
||||
}
|
||||
return web.json_response(status_info)
|
||||
|
||||
'status': 'running',
|
||||
})
|
||||
|
||||
app.router.add_get('/status', status)
|
||||
app.router.add_get('/', status) # Root also shows status
|
||||
|
||||
app.router.add_get('/', status)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="WebSocket Relay Test Harness"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--host',
|
||||
'--host',
|
||||
default='localhost',
|
||||
help='Host to bind to (default: localhost)'
|
||||
help='Host to bind to (default: localhost)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--port',
|
||||
type=int,
|
||||
'--port',
|
||||
type=int,
|
||||
default=8080,
|
||||
help='Port to bind to (default: 8080)'
|
||||
help='Port to bind to (default: 8080)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help='Enable verbose logging'
|
||||
help='Enable verbose logging',
|
||||
)
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
relay = WebSocketRelay()
|
||||
|
||||
|
||||
print(f"Starting WebSocket Relay on {args.host}:{args.port}")
|
||||
print(f" 'in' endpoint: ws://{args.host}:{args.port}/in")
|
||||
print(f" 'in' endpoint: ws://{args.host}:{args.port}/in/api/v1/socket")
|
||||
print(f" 'out' endpoint: ws://{args.host}:{args.port}/out")
|
||||
print(f" Status: http://{args.host}:{args.port}/status")
|
||||
print()
|
||||
print("Usage:")
|
||||
print(f" Test client connects to: ws://{args.host}:{args.port}/in")
|
||||
print(f" Reverse gateway connects to: ws://{args.host}:{args.port}/out")
|
||||
|
||||
print("Client protocol (same as api-gateway):")
|
||||
print(' 1. Connect to /in/api/v1/socket')
|
||||
print(' 2. Send: {"type": "auth", "token": "tg_..."}')
|
||||
print(' 3. Receive: {"type": "auth-ok", "workspace": "relayed"}')
|
||||
print(' 4. Send requests as normal')
|
||||
|
||||
web.run_app(create_app(relay), host=args.host, port=args.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
|||
186
docs/tech-specs/no-auth-regime.md
Normal file
186
docs/tech-specs/no-auth-regime.md
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
---
|
||||
layout: default
|
||||
title: "No-Auth IAM Regime"
|
||||
parent: "Tech Specs"
|
||||
---
|
||||
|
||||
# No-Auth IAM Regime
|
||||
|
||||
## Overview
|
||||
|
||||
A minimal IAM regime that permits all access unconditionally.
|
||||
Implements the same Pulsar request/response protocol as `iam-svc`
|
||||
(see [iam-contract.md](iam-contract.md)) so it is a drop-in
|
||||
replacement: swap `iam-svc` for `no-auth-svc` in the deployment
|
||||
and the gateway, bootstrapper, and all other components continue
|
||||
to work without modification.
|
||||
|
||||
Intended for development, testing, single-tenant self-hosted
|
||||
deployments, and evaluation environments where authentication
|
||||
overhead is unwanted.
|
||||
|
||||
## Motivation
|
||||
|
||||
The full IAM regime requires Cassandra tables, a bootstrap
|
||||
sequence, API key management, and signing key rotation. For
|
||||
many deployments this is unnecessary friction:
|
||||
|
||||
- Local development and CI/CD pipelines.
|
||||
- Single-user or small-team self-hosted instances.
|
||||
- Evaluation and demo environments.
|
||||
- Deployments behind an external authentication proxy
|
||||
(e.g. OAuth2 reverse proxy, VPN-gated access).
|
||||
|
||||
Today operators who want no auth must still deploy `iam-svc` and
|
||||
complete the bootstrap ceremony. A purpose-built no-auth regime
|
||||
eliminates that requirement entirely.
|
||||
|
||||
## Design
|
||||
|
||||
### Deployment
|
||||
|
||||
Replace `iam-svc` with `no-auth-svc` in the processor group or
|
||||
container configuration. No other services change. The no-auth
|
||||
service listens on the standard IAM Pulsar topics:
|
||||
|
||||
- Request: `request:<topicspace>:iam`
|
||||
- Response: `response:<topicspace>:iam`
|
||||
|
||||
### Dependencies
|
||||
|
||||
None. No database, no config entries, no signing keys, no
|
||||
bootstrap sequence.
|
||||
|
||||
### Operation responses
|
||||
|
||||
The service implements the IAM contract
|
||||
([iam-contract.md](iam-contract.md)) with the following
|
||||
behaviour for each operation:
|
||||
|
||||
| Operation | Behaviour |
|
||||
|---|---|
|
||||
| `authenticate-anonymous` | Returns a default identity: `user_id="anonymous"`, `workspace="default"`, `roles=["admin"]`. This is the key operation that distinguishes no-auth from the full regime. |
|
||||
| `resolve-api-key` | Accepts any token. Returns the same default identity as `authenticate-anonymous`. |
|
||||
| `authorise` | Always allows. Returns `decision_allow=True`, `decision_ttl_seconds=3600`. |
|
||||
| `authorise-many` | Always allows all checks. |
|
||||
| `get-signing-key-public` | Returns an empty string. The gateway skips JWT validation when no key is available. |
|
||||
| `bootstrap` | No-op. Returns empty admin user/key. |
|
||||
| `bootstrap-status` | Returns `bootstrap_available=False`. |
|
||||
| `whoami` | Returns a stub user record for the actor. |
|
||||
| `login` | Returns empty JWT (not supported under no-auth). |
|
||||
| `create-user`, `list-users`, `get-user`, `update-user`, `delete-user`, `disable-user`, `enable-user` | Return empty/stub responses. User management is meaningless without auth. |
|
||||
| `create-workspace`, `list-workspaces`, `get-workspace`, `update-workspace`, `disable-workspace` | Return empty/stub responses. |
|
||||
| `create-api-key`, `list-api-keys`, `revoke-api-key` | Return empty/stub responses. |
|
||||
| `change-password`, `reset-password` | No-op. |
|
||||
| `rotate-signing-key` | No-op. |
|
||||
| Unknown operation | Returns an error response (same as `iam-svc`). |
|
||||
|
||||
### Workspace resolution
|
||||
|
||||
When `resolve-api-key` is called, the returned workspace
|
||||
determines which workspace the request operates against. The
|
||||
no-auth service defaults to `"default"`.
|
||||
|
||||
A configurable `--default-workspace` flag allows operators to
|
||||
change this without code changes.
|
||||
|
||||
### Anonymous authentication
|
||||
|
||||
A new `authenticate-anonymous` operation is added to the IAM
|
||||
protocol. This is a small, backward-compatible addition to the
|
||||
contract:
|
||||
|
||||
**Gateway change** (`auth.py`): when `authenticate()` receives a
|
||||
request with no `Authorization` header (or an empty bearer
|
||||
token), instead of immediately returning 401, it sends an
|
||||
`authenticate-anonymous` request to the IAM service. If the
|
||||
regime returns a valid identity, the request proceeds. If the
|
||||
regime returns an error, the gateway returns 401 as before.
|
||||
|
||||
**`iam-svc` (full regime)**: returns `auth-failed` for
|
||||
`authenticate-anonymous`. Behaviour is unchanged — unauthenticated
|
||||
requests are rejected exactly as they are today.
|
||||
|
||||
**`no-auth-svc`**: returns the default identity (`anonymous` /
|
||||
`default` workspace). No token required.
|
||||
|
||||
This keeps the policy decision ("is anonymous access allowed?")
|
||||
in the IAM regime, not in the gateway. The gateway is a generic
|
||||
enforcement point that asks and respects the answer.
|
||||
|
||||
**Wire format**: uses the existing `IamRequest` / `IamResponse`
|
||||
schema with `operation="authenticate-anonymous"`. No new fields
|
||||
required — the response uses `resolved_user_id`,
|
||||
`resolved_workspace`, and `resolved_roles`, same as
|
||||
`resolve-api-key`.
|
||||
|
||||
Requests that do carry a bearer token follow the existing
|
||||
`resolve-api-key` / JWT paths unchanged.
|
||||
|
||||
## Implementation
|
||||
|
||||
### Service structure
|
||||
|
||||
The service is a standard `AsyncProcessor` that consumes IAM
|
||||
requests and produces IAM responses, identical in shape to the
|
||||
existing `iam-svc` processor:
|
||||
|
||||
```
|
||||
trustgraph-flow/
|
||||
trustgraph/
|
||||
iam/
|
||||
noauth/
|
||||
__init__.py
|
||||
__main__.py
|
||||
service.py # AsyncProcessor wiring
|
||||
handler.py # Operation dispatch, always-allow logic
|
||||
```
|
||||
|
||||
### Handler
|
||||
|
||||
The handler is a single `handle(request) -> response` function
|
||||
with a dispatch table. Each operation returns a pre-built
|
||||
`IamResponse` with the appropriate fields set. No database
|
||||
access, no crypto, no state.
|
||||
|
||||
### Configuration
|
||||
|
||||
| Flag | Default | Description |
|
||||
|---|---|---|
|
||||
| `--default-workspace` | `"default"` | Workspace returned by `resolve-api-key` |
|
||||
| `--default-user-id` | `"anonymous"` | User ID returned by `resolve-api-key` |
|
||||
|
||||
### Entry point
|
||||
|
||||
```
|
||||
tg-no-auth-svc
|
||||
```
|
||||
|
||||
Or via processor group:
|
||||
|
||||
```yaml
|
||||
- class: trustgraph.iam.noauth.Processor
|
||||
params:
|
||||
<<: *defaults
|
||||
id: no-auth-svc
|
||||
```
|
||||
|
||||
## Security considerations
|
||||
|
||||
This regime provides **no security whatsoever**. Any caller with
|
||||
network access to the API gateway has full admin access to all
|
||||
workspaces.
|
||||
|
||||
Operators must ensure that network-level controls (firewall,
|
||||
VPN, private network) provide adequate protection when deploying
|
||||
this regime. The regime is explicitly not suitable for multi-
|
||||
tenant or internet-facing deployments.
|
||||
|
||||
## Testing
|
||||
|
||||
- Unit: verify each operation returns the expected stub response.
|
||||
- Integration: deploy `no-auth-svc` in place of `iam-svc`, confirm
|
||||
the gateway starts, accepts requests with a dummy bearer token,
|
||||
and routes them to the default workspace.
|
||||
- E2E: run the standard e2e test suite with `no-auth-svc` to
|
||||
confirm no regressions.
|
||||
|
|
@ -278,7 +278,7 @@ The system uses **FAISS (Facebook AI Similarity Search)** with IndexFlatIP for e
|
|||
3. **Similarity Search**:
|
||||
- For each text segment embedding, search the vector store
|
||||
- Retrieve top-k (e.g., 10) most similar ontology elements
|
||||
- Apply similarity threshold (e.g., 0.7) to filter weak matches
|
||||
- Apply similarity threshold (e.g., 0.3) to filter weak matches
|
||||
- Aggregate results across all segments, tracking match frequencies
|
||||
|
||||
4. **Dependency Resolution**:
|
||||
|
|
|
|||
|
|
@ -63,26 +63,26 @@ class TestEndToEndConfigurationFlow:
|
|||
'CASSANDRA_USERNAME': 'obj-user',
|
||||
'CASSANDRA_PASSWORD': 'obj-pass'
|
||||
}
|
||||
|
||||
|
||||
mock_auth_instance = MagicMock()
|
||||
mock_auth_provider.return_value = mock_auth_instance
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_session = MagicMock()
|
||||
mock_cluster_instance.connect.return_value = mock_session
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = RowsWriter(taskgroup=MagicMock())
|
||||
|
||||
|
||||
# Trigger Cassandra connection
|
||||
processor.connect_cassandra()
|
||||
|
||||
|
||||
# Verify auth provider was created with env vars
|
||||
mock_auth_provider.assert_called_once_with(
|
||||
username='obj-user',
|
||||
password='obj-pass'
|
||||
)
|
||||
|
||||
|
||||
# Verify cluster was created with hosts from env and auth
|
||||
mock_cluster.assert_called_once()
|
||||
call_args = mock_cluster.call_args
|
||||
|
|
@ -188,37 +188,34 @@ class TestConfigurationPriorityEndToEnd:
|
|||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
||||
async def test_no_config_defaults_end_to_end(self, mock_cluster):
|
||||
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
|
||||
async def test_no_config_defaults_end_to_end(self, mock_kg_class):
|
||||
"""Test that defaults are used when no configuration provided end-to-end."""
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_session = MagicMock()
|
||||
mock_cluster_instance.connect.return_value = mock_session
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_get_all = AsyncMock(return_value=[])
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
processor = TriplesQuery(taskgroup=MagicMock())
|
||||
|
||||
|
||||
# Mock query to trigger TrustGraph creation
|
||||
mock_query = MagicMock()
|
||||
mock_query.collection = 'default_collection'
|
||||
mock_query.s = None
|
||||
mock_query.p = None
|
||||
mock_query.o = None
|
||||
mock_query.g = None
|
||||
mock_query.limit = 100
|
||||
|
||||
# Mock the get_all method to return empty list
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.get_all.return_value = []
|
||||
processor.tg = mock_tg_instance
|
||||
|
||||
|
||||
await processor.query_triples('default_user', mock_query)
|
||||
|
||||
|
||||
# Should use defaults
|
||||
mock_cluster.assert_called_once()
|
||||
call_args = mock_cluster.call_args
|
||||
assert call_args.args[0] == ['cassandra'] # Default host
|
||||
assert 'auth_provider' not in call_args.kwargs # No auth with default config
|
||||
mock_kg_class.assert_called_once_with(
|
||||
hosts=['cassandra'],
|
||||
keyspace='default_user'
|
||||
)
|
||||
|
||||
|
||||
class TestNoBackwardCompatibilityEndToEnd:
|
||||
|
|
@ -324,16 +321,16 @@ class TestMultipleHostsHandling:
|
|||
env_vars = {
|
||||
'CASSANDRA_HOST': 'host1,host2,host3,host4,host5'
|
||||
}
|
||||
|
||||
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_session = MagicMock()
|
||||
mock_cluster_instance.connect.return_value = mock_session
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = RowsWriter(taskgroup=MagicMock())
|
||||
processor.connect_cassandra()
|
||||
|
||||
|
||||
# Verify all hosts were passed to Cluster
|
||||
mock_cluster.assert_called_once()
|
||||
call_args = mock_cluster.call_args
|
||||
|
|
@ -392,27 +389,27 @@ class TestAuthenticationFlow:
|
|||
'CASSANDRA_USERNAME': 'auth-user',
|
||||
'CASSANDRA_PASSWORD': 'auth-secret'
|
||||
}
|
||||
|
||||
|
||||
mock_auth_instance = MagicMock()
|
||||
mock_auth_provider.return_value = mock_auth_instance
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = RowsWriter(taskgroup=MagicMock())
|
||||
processor.connect_cassandra()
|
||||
|
||||
|
||||
# Auth provider should be created
|
||||
mock_auth_provider.assert_called_once_with(
|
||||
username='auth-user',
|
||||
password='auth-secret'
|
||||
)
|
||||
|
||||
|
||||
# Cluster should be created with auth provider
|
||||
call_args = mock_cluster.call_args
|
||||
assert 'auth_provider' in call_args.kwargs
|
||||
assert call_args.kwargs['auth_provider'] == mock_auth_instance
|
||||
|
||||
|
||||
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
||||
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
||||
def test_no_authentication_when_credentials_missing(self, mock_auth_provider, mock_cluster):
|
||||
|
|
@ -421,21 +418,21 @@ class TestAuthenticationFlow:
|
|||
'CASSANDRA_HOST': 'no-auth-host'
|
||||
# No username/password
|
||||
}
|
||||
|
||||
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = RowsWriter(taskgroup=MagicMock())
|
||||
processor.connect_cassandra()
|
||||
|
||||
|
||||
# Auth provider should not be created
|
||||
mock_auth_provider.assert_not_called()
|
||||
|
||||
|
||||
# Cluster should be created without auth provider
|
||||
call_args = mock_cluster.call_args
|
||||
assert 'auth_provider' not in call_args.kwargs
|
||||
|
||||
|
||||
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
||||
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
||||
def test_no_authentication_when_only_username_provided(self, mock_auth_provider, mock_cluster):
|
||||
|
|
@ -446,15 +443,15 @@ class TestAuthenticationFlow:
|
|||
cassandra_username='partial-user'
|
||||
# No password
|
||||
)
|
||||
|
||||
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
|
||||
processor.connect_cassandra()
|
||||
|
||||
|
||||
# Auth provider should not be created (needs both username AND password)
|
||||
mock_auth_provider.assert_not_called()
|
||||
|
||||
|
||||
# Cluster should be created without auth provider
|
||||
call_args = mock_cluster.call_args
|
||||
assert 'auth_provider' not in call_args.kwargs
|
||||
|
|
@ -101,6 +101,8 @@ class TestRowsCassandraIntegration:
|
|||
processor.session = None
|
||||
|
||||
# Bind actual methods from the new unified table implementation
|
||||
import asyncio
|
||||
processor._setup_lock = asyncio.Lock()
|
||||
processor.connect_cassandra = Processor.connect_cassandra.__get__(processor, Processor)
|
||||
processor.ensure_keyspace = Processor.ensure_keyspace.__get__(processor, Processor)
|
||||
processor.ensure_tables = Processor.ensure_tables.__get__(processor, Processor)
|
||||
|
|
@ -108,6 +110,7 @@ class TestRowsCassandraIntegration:
|
|||
processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
|
||||
processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
|
||||
processor.register_partitions = Processor.register_partitions.__get__(processor, Processor)
|
||||
processor._apply_schema_config = Processor._apply_schema_config.__get__(processor, Processor)
|
||||
processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
|
||||
processor.on_object = Processor.on_object.__get__(processor, Processor)
|
||||
processor.collection_exists = MagicMock(return_value=True)
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
|
||||
# Connect to Cassandra
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
assert processor.session is not None
|
||||
|
||||
# Create test keyspace and table
|
||||
|
|
@ -219,7 +219,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
"""Test inserting data and querying via GraphQL"""
|
||||
# Load schema and connect
|
||||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
|
||||
# Setup test data
|
||||
keyspace = "test_user"
|
||||
|
|
@ -293,7 +293,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
"""Test GraphQL queries with filtering on indexed fields"""
|
||||
# Setup (reuse previous setup)
|
||||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
|
||||
keyspace = "test_user"
|
||||
collection = "filter_test"
|
||||
|
|
@ -387,7 +387,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
"""Test full message processing workflow"""
|
||||
# Setup
|
||||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
|
||||
# Create mock message
|
||||
request = RowsQueryRequest(
|
||||
|
|
@ -433,7 +433,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
"""Test handling multiple concurrent GraphQL queries"""
|
||||
# Setup
|
||||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
|
||||
# Create multiple query tasks
|
||||
queries = [
|
||||
|
|
@ -519,7 +519,7 @@ class TestObjectsGraphQLQueryIntegration:
|
|||
"""Test handling of large query result sets"""
|
||||
# Setup
|
||||
await processor.on_schema_config("default", sample_schema_config, version=1)
|
||||
processor.connect_cassandra()
|
||||
await processor.connect_cassandra()
|
||||
|
||||
keyspace = "large_test_user"
|
||||
collection = "large_collection"
|
||||
|
|
|
|||
|
|
@ -16,4 +16,13 @@ markers =
|
|||
unit: marks tests as unit tests
|
||||
contract: marks tests as contract tests (service interface validation)
|
||||
vertexai: marks tests as vertex ai specific tests
|
||||
asyncio: marks tests that use asyncio
|
||||
asyncio: marks tests that use asyncio
|
||||
# This is helpful if you're bored with deprecationwarnings. I prefer to
|
||||
# keep the warnings for now, it avoids masking problems.
|
||||
#
|
||||
# filterwarnings =
|
||||
# ignore:Core Pydantic V1 functionality isn't compatible with Python 3.14.*:UserWarning
|
||||
# ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning
|
||||
# ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning
|
||||
# ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning
|
||||
# ignore:.*_UnionGenericAlias.*is deprecated and slated for removal in Python 3.17:DeprecationWarning
|
||||
|
|
|
|||
296
tests/unit/test_api/test_library_api.py
Normal file
296
tests/unit/test_api/test_library_api.py
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
"""
|
||||
Tests for the Library API wrapper round-trip behavior.
|
||||
Covers the get_documents → update_document path and edge cases
|
||||
from issue #893.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from trustgraph.api.library import Library, to_value, from_value
|
||||
from trustgraph.api.types import DocumentMetadata, Triple
|
||||
from trustgraph.knowledge import Uri, Literal
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_library(response=None):
|
||||
api = MagicMock()
|
||||
api.workspace = "default"
|
||||
api.request.return_value = response or {}
|
||||
lib = Library(api)
|
||||
return lib, api
|
||||
|
||||
|
||||
def _wire_triple(s_iri, p_iri, o_val):
|
||||
return {
|
||||
"s": {"t": "i", "i": s_iri},
|
||||
"p": {"t": "i", "i": p_iri},
|
||||
"o": {"t": "l", "v": o_val},
|
||||
}
|
||||
|
||||
|
||||
def _doc_wire(id="doc-1", time=1700000000, title="Test Doc",
|
||||
kind="text/plain", comments="", tags=None,
|
||||
metadata=None, parent_id="", document_type="source",
|
||||
include_title=True):
|
||||
doc = {
|
||||
"id": id,
|
||||
"time": time,
|
||||
"kind": kind,
|
||||
"comments": comments,
|
||||
"metadata": metadata or [],
|
||||
"tags": tags or [],
|
||||
"parent-id": parent_id,
|
||||
"document-type": document_type,
|
||||
}
|
||||
if include_title:
|
||||
doc["title"] = title
|
||||
return doc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 1: get_documents tolerates missing title
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGetDocumentsMissingTitle:
|
||||
|
||||
def test_missing_title_defaults_to_empty(self):
|
||||
doc = _doc_wire(include_title=False)
|
||||
lib, api = _make_library({"document-metadatas": [doc]})
|
||||
|
||||
result = lib.get_documents()
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].title == ""
|
||||
|
||||
def test_present_title_preserved(self):
|
||||
doc = _doc_wire(title="My Title")
|
||||
lib, api = _make_library({"document-metadatas": [doc]})
|
||||
|
||||
result = lib.get_documents()
|
||||
|
||||
assert result[0].title == "My Title"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 2: update_document handles Triple objects (attribute access)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUpdateDocumentTripleAccess:
|
||||
|
||||
def test_triple_objects_serialized_correctly(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(1700000000),
|
||||
kind="text/plain",
|
||||
title="Test",
|
||||
comments="",
|
||||
metadata=[
|
||||
Triple(
|
||||
s=Uri("http://example.org/entity/alice"),
|
||||
p=Uri("http://example.org/rel/knows"),
|
||||
o=Literal("Bob"),
|
||||
),
|
||||
],
|
||||
tags=["test"],
|
||||
)
|
||||
|
||||
lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
call_args = api.request.call_args[0][1]
|
||||
triples = call_args["document-metadata"]["metadata"]
|
||||
|
||||
assert len(triples) == 1
|
||||
assert triples[0]["s"]["i"] == "http://example.org/entity/alice"
|
||||
assert triples[0]["p"]["i"] == "http://example.org/rel/knows"
|
||||
assert triples[0]["o"]["v"] == "Bob"
|
||||
|
||||
def test_empty_metadata_list(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(1700000000),
|
||||
kind="text/plain",
|
||||
title="Test",
|
||||
comments="",
|
||||
metadata=[],
|
||||
tags=[],
|
||||
)
|
||||
|
||||
lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
call_args = api.request.call_args[0][1]
|
||||
assert call_args["document-metadata"]["metadata"] == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 3: update_document serializes datetime to int seconds
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUpdateDocumentTimeSerialization:
|
||||
|
||||
def test_datetime_serialized_to_int(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
ts = 1700000000
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(ts),
|
||||
kind="text/plain",
|
||||
title="Test",
|
||||
comments="",
|
||||
metadata=[],
|
||||
tags=[],
|
||||
)
|
||||
|
||||
lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
call_args = api.request.call_args[0][1]
|
||||
wire_time = call_args["document-metadata"]["time"]
|
||||
|
||||
assert isinstance(wire_time, int)
|
||||
assert wire_time == ts
|
||||
|
||||
def test_int_time_passed_through(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=1700000000,
|
||||
kind="text/plain",
|
||||
title="Test",
|
||||
comments="",
|
||||
metadata=[],
|
||||
tags=[],
|
||||
)
|
||||
|
||||
lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
call_args = api.request.call_args[0][1]
|
||||
assert call_args["document-metadata"]["time"] == 1700000000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 4: update_document handles empty server response
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUpdateDocumentEmptyResponse:
|
||||
|
||||
def test_empty_response_returns_input_metadata(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(1700000000),
|
||||
kind="text/plain",
|
||||
title="Updated Title",
|
||||
comments="notes",
|
||||
metadata=[],
|
||||
tags=["a"],
|
||||
)
|
||||
|
||||
result = lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
assert result is metadata
|
||||
|
||||
def test_full_response_parsed(self):
|
||||
response_doc = _doc_wire(
|
||||
id="doc-1", title="Server Title", tags=["b"],
|
||||
)
|
||||
lib, api = _make_library({"document-metadata": response_doc})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(1700000000),
|
||||
kind="text/plain",
|
||||
title="Client Title",
|
||||
comments="",
|
||||
metadata=[],
|
||||
tags=["a"],
|
||||
)
|
||||
|
||||
result = lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
assert result.title == "Server Title"
|
||||
assert result.tags == ["b"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 5: update_document sends both id and document-id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUpdateDocumentIdKeys:
|
||||
|
||||
def test_both_id_keys_sent(self):
|
||||
lib, api = _make_library({})
|
||||
|
||||
metadata = DocumentMetadata(
|
||||
id="doc-1",
|
||||
time=datetime.datetime.fromtimestamp(1700000000),
|
||||
kind="text/plain",
|
||||
title="Test",
|
||||
comments="",
|
||||
metadata=[],
|
||||
tags=[],
|
||||
)
|
||||
|
||||
lib.update_document(id="doc-1", metadata=metadata)
|
||||
|
||||
call_args = api.request.call_args[0][1]
|
||||
doc_meta = call_args["document-metadata"]
|
||||
|
||||
assert doc_meta["id"] == "doc-1"
|
||||
assert doc_meta["document-id"] == "doc-1"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Round-trip: get_documents → update_document
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGetUpdateRoundTrip:
|
||||
|
||||
def test_full_round_trip(self):
|
||||
wire_doc = _doc_wire(
|
||||
id="doc-42",
|
||||
title="Original",
|
||||
tags=["v1"],
|
||||
metadata=[_wire_triple(
|
||||
"http://example.org/e/1",
|
||||
"http://example.org/r/type",
|
||||
"report",
|
||||
)],
|
||||
)
|
||||
|
||||
lib, api = _make_library({"document-metadatas": [wire_doc]})
|
||||
|
||||
docs = lib.get_documents()
|
||||
assert len(docs) == 1
|
||||
|
||||
doc = docs[0]
|
||||
doc.title = "Updated"
|
||||
doc.tags.append("v2")
|
||||
|
||||
# Server returns empty on update
|
||||
api.request.return_value = {}
|
||||
result = lib.update_document(id=doc.id, metadata=doc)
|
||||
|
||||
# Should not raise, should return the input metadata
|
||||
assert result.title == "Updated"
|
||||
assert "v2" in result.tags
|
||||
|
||||
# Verify the wire format sent
|
||||
call_args = api.request.call_args[0][1]
|
||||
doc_meta = call_args["document-metadata"]
|
||||
|
||||
assert doc_meta["id"] == "doc-42"
|
||||
assert doc_meta["title"] == "Updated"
|
||||
assert isinstance(doc_meta["time"], int)
|
||||
assert len(doc_meta["metadata"]) == 1
|
||||
assert doc_meta["metadata"][0]["o"]["v"] == "report"
|
||||
|
|
@ -272,23 +272,22 @@ class TestMetricsIntegration:
|
|||
class TestPollTimeout:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_timeout_is_100ms(self):
|
||||
"""Consumer receive timeout should be 100ms, not the original 2000ms.
|
||||
async def test_poll_timeout_is_2000ms(self):
|
||||
"""Consumer receive timeout should be 2000ms.
|
||||
|
||||
A 2000ms poll timeout means every service adds up to 2s of idle
|
||||
blocking between message bursts. With many sequential hops in a
|
||||
query pipeline, this compounds into seconds of unnecessary latency.
|
||||
100ms keeps responsiveness high without significant CPU overhead.
|
||||
receive() is a blocking call that returns immediately when a
|
||||
message arrives — the timeout only governs how often the loop
|
||||
checks the shutdown flag during idle periods. Lower values
|
||||
(e.g. 100ms) generate excessive C++ client WARN logging with
|
||||
no latency benefit.
|
||||
"""
|
||||
consumer = _make_consumer()
|
||||
|
||||
# Wire up a mock Pulsar consumer that records the receive kwargs
|
||||
mock_pulsar_consumer = MagicMock()
|
||||
received_kwargs = {}
|
||||
|
||||
def capture_receive(**kwargs):
|
||||
received_kwargs.update(kwargs)
|
||||
# Stop after one call
|
||||
consumer.running = False
|
||||
raise type('Timeout', (Exception,), {})("timeout")
|
||||
|
||||
|
|
@ -296,7 +295,7 @@ class TestPollTimeout:
|
|||
|
||||
await consumer.consume_from_queue(mock_pulsar_consumer)
|
||||
|
||||
assert received_kwargs.get("timeout_millis") == 100
|
||||
assert received_kwargs.get("timeout_millis") == 2000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -25,16 +25,17 @@ class TestSemaphoreEnforcement:
|
|||
max_concurrent = 0
|
||||
processing_event = asyncio.Event()
|
||||
|
||||
async def slow_process(message):
|
||||
async def slow_process(message, sender):
|
||||
nonlocal concurrent_count, max_concurrent
|
||||
concurrent_count += 1
|
||||
max_concurrent = max(max_concurrent, concurrent_count)
|
||||
await asyncio.sleep(0.05)
|
||||
concurrent_count -= 1
|
||||
return {"id": message.get("id"), "response": {"ok": True}}
|
||||
|
||||
dispatcher._process_message = slow_process
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
# Launch more tasks than max_workers
|
||||
messages = [
|
||||
{"id": f"msg-{i}", "service": "test", "request": {}}
|
||||
|
|
@ -42,7 +43,7 @@ class TestSemaphoreEnforcement:
|
|||
]
|
||||
|
||||
tasks = [
|
||||
asyncio.create_task(dispatcher.handle_message(m))
|
||||
asyncio.create_task(dispatcher.handle_message(m, sender))
|
||||
for m in messages
|
||||
]
|
||||
|
||||
|
|
@ -66,17 +67,17 @@ class TestSemaphoreEnforcement:
|
|||
|
||||
original_process = dispatcher._process_message
|
||||
|
||||
async def tracking_process(message):
|
||||
async def tracking_process(message, sender):
|
||||
nonlocal task_was_tracked
|
||||
# During processing, our task should be in active_tasks
|
||||
if len(dispatcher.active_tasks) > 0:
|
||||
task_was_tracked = True
|
||||
return {"id": message.get("id"), "response": {"ok": True}}
|
||||
|
||||
dispatcher._process_message = tracking_process
|
||||
|
||||
await dispatcher.handle_message(
|
||||
{"id": "test", "service": "test", "request": {}}
|
||||
{"id": "test", "service": "test", "request": {}},
|
||||
AsyncMock(),
|
||||
)
|
||||
|
||||
assert task_was_tracked
|
||||
|
|
@ -88,7 +89,7 @@ class TestSemaphoreEnforcement:
|
|||
"""Semaphore should be released even if processing raises."""
|
||||
dispatcher = MessageDispatcher(max_workers=2)
|
||||
|
||||
async def failing_process(message):
|
||||
async def failing_process(message, sender):
|
||||
raise RuntimeError("process failed")
|
||||
|
||||
dispatcher._process_message = failing_process
|
||||
|
|
@ -96,7 +97,8 @@ class TestSemaphoreEnforcement:
|
|||
# Should not deadlock — semaphore must be released on error
|
||||
with pytest.raises(RuntimeError):
|
||||
await dispatcher.handle_message(
|
||||
{"id": "test", "service": "test", "request": {}}
|
||||
{"id": "test", "service": "test", "request": {}},
|
||||
AsyncMock(),
|
||||
)
|
||||
|
||||
# Semaphore should be back at max
|
||||
|
|
@ -109,17 +111,18 @@ class TestSemaphoreEnforcement:
|
|||
|
||||
order = []
|
||||
|
||||
async def ordered_process(message):
|
||||
async def ordered_process(message, sender):
|
||||
msg_id = message["id"]
|
||||
order.append(f"start-{msg_id}")
|
||||
await asyncio.sleep(0.02)
|
||||
order.append(f"end-{msg_id}")
|
||||
return {"id": msg_id, "response": {"ok": True}}
|
||||
|
||||
dispatcher._process_message = ordered_process
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
messages = [{"id": str(i), "service": "t", "request": {}} for i in range(3)]
|
||||
tasks = [asyncio.create_task(dispatcher.handle_message(m)) for m in messages]
|
||||
tasks = [asyncio.create_task(dispatcher.handle_message(m, sender)) for m in messages]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
# With semaphore=1, each message should complete before next starts
|
||||
|
|
|
|||
|
|
@ -0,0 +1,389 @@
|
|||
"""
|
||||
Tests for TripleConverter domain/range enforcement and
|
||||
OntologySelector bypass for small ontologies.
|
||||
|
||||
Covers fixes for #908 (bypass_selector_below) and #920 (domain/range validation).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, AsyncMock
|
||||
|
||||
from trustgraph.extract.kg.ontology.triple_converter import TripleConverter
|
||||
from trustgraph.extract.kg.ontology.ontology_selector import (
|
||||
OntologySelector,
|
||||
OntologySubset,
|
||||
)
|
||||
from trustgraph.extract.kg.ontology.ontology_loader import (
|
||||
Ontology,
|
||||
OntologyClass,
|
||||
OntologyProperty,
|
||||
)
|
||||
from trustgraph.extract.kg.ontology.simplified_parser import (
|
||||
Relationship,
|
||||
Attribute,
|
||||
)
|
||||
from trustgraph.extract.kg.ontology.text_processor import TextSegment
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def ontology_subset():
|
||||
"""Ontology subset with classes, hierarchy, and constrained properties."""
|
||||
return OntologySubset(
|
||||
ontology_id="test",
|
||||
classes={
|
||||
"Person": {
|
||||
"uri": "http://example.org/Person",
|
||||
"type": "owl:Class",
|
||||
"labels": [{"value": "Person"}],
|
||||
"subclass_of": None,
|
||||
},
|
||||
"Employee": {
|
||||
"uri": "http://example.org/Employee",
|
||||
"type": "owl:Class",
|
||||
"labels": [{"value": "Employee"}],
|
||||
"subclass_of": "Person",
|
||||
},
|
||||
"Manager": {
|
||||
"uri": "http://example.org/Manager",
|
||||
"type": "owl:Class",
|
||||
"labels": [{"value": "Manager"}],
|
||||
"subclass_of": "Employee",
|
||||
},
|
||||
"Company": {
|
||||
"uri": "http://example.org/Company",
|
||||
"type": "owl:Class",
|
||||
"labels": [{"value": "Company"}],
|
||||
"subclass_of": None,
|
||||
},
|
||||
"Product": {
|
||||
"uri": "http://example.org/Product",
|
||||
"type": "owl:Class",
|
||||
"labels": [{"value": "Product"}],
|
||||
"subclass_of": None,
|
||||
},
|
||||
},
|
||||
object_properties={
|
||||
"worksFor": {
|
||||
"uri": "http://example.org/worksFor",
|
||||
"type": "owl:ObjectProperty",
|
||||
"labels": [{"value": "works for"}],
|
||||
"domain": "Person",
|
||||
"range": "Company",
|
||||
},
|
||||
"manages": {
|
||||
"uri": "http://example.org/manages",
|
||||
"type": "owl:ObjectProperty",
|
||||
"labels": [{"value": "manages"}],
|
||||
"domain": "Manager",
|
||||
"range": "Employee",
|
||||
},
|
||||
"relatedTo": {
|
||||
"uri": "http://example.org/relatedTo",
|
||||
"type": "owl:ObjectProperty",
|
||||
"labels": [{"value": "related to"}],
|
||||
"domain": None,
|
||||
"range": None,
|
||||
},
|
||||
},
|
||||
datatype_properties={
|
||||
"employeeId": {
|
||||
"uri": "http://example.org/employeeId",
|
||||
"type": "owl:DatatypeProperty",
|
||||
"labels": [{"value": "employee ID"}],
|
||||
"domain": "Employee",
|
||||
},
|
||||
"description": {
|
||||
"uri": "http://example.org/description",
|
||||
"type": "owl:DatatypeProperty",
|
||||
"labels": [{"value": "description"}],
|
||||
"domain": None,
|
||||
},
|
||||
},
|
||||
metadata={"name": "Test Ontology"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def converter(ontology_subset):
|
||||
return TripleConverter(ontology_subset=ontology_subset, ontology_id="test")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Domain/range enforcement — relationships
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRelationshipDomainRange:
|
||||
|
||||
def test_valid_domain_and_range(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Alice", subject_type="Person",
|
||||
relation="worksFor",
|
||||
object="Acme Corp", object_type="Company",
|
||||
)
|
||||
triple = converter.convert_relationship(rel)
|
||||
assert triple is not None
|
||||
|
||||
def test_domain_violation_rejected(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Widget", subject_type="Product",
|
||||
relation="worksFor",
|
||||
object="Acme Corp", object_type="Company",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is None
|
||||
|
||||
def test_range_violation_rejected(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Alice", subject_type="Person",
|
||||
relation="worksFor",
|
||||
object="Widget", object_type="Product",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is None
|
||||
|
||||
def test_both_domain_and_range_violated(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Widget", subject_type="Product",
|
||||
relation="worksFor",
|
||||
object="Gadget", object_type="Product",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subclass acceptance
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSubclassAcceptance:
|
||||
|
||||
def test_direct_subclass_matches_domain(self, converter):
|
||||
"""Employee is subclass of Person; worksFor domain is Person."""
|
||||
rel = Relationship(
|
||||
subject="Bob", subject_type="Employee",
|
||||
relation="worksFor",
|
||||
object="Acme Corp", object_type="Company",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is not None
|
||||
|
||||
def test_transitive_subclass_matches_domain(self, converter):
|
||||
"""Manager → Employee → Person; worksFor domain is Person."""
|
||||
rel = Relationship(
|
||||
subject="Carol", subject_type="Manager",
|
||||
relation="worksFor",
|
||||
object="Acme Corp", object_type="Company",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is not None
|
||||
|
||||
def test_subclass_matches_range(self, converter):
|
||||
"""manages range is Employee; Manager is subclass of Employee."""
|
||||
rel = Relationship(
|
||||
subject="Carol", subject_type="Manager",
|
||||
relation="manages",
|
||||
object="Dave", object_type="Manager",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is not None
|
||||
|
||||
def test_superclass_does_not_match_subclass_constraint(self, converter):
|
||||
"""manages domain is Manager; Person is NOT a subclass of Manager."""
|
||||
rel = Relationship(
|
||||
subject="Alice", subject_type="Person",
|
||||
relation="manages",
|
||||
object="Bob", object_type="Employee",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Polymorphic properties (no domain/range)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPolymorphicProperties:
|
||||
|
||||
def test_no_domain_no_range_allows_anything(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Alice", subject_type="Person",
|
||||
relation="relatedTo",
|
||||
object="Acme Corp", object_type="Company",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is not None
|
||||
|
||||
def test_polymorphic_with_unrelated_types(self, converter):
|
||||
rel = Relationship(
|
||||
subject="Widget", subject_type="Product",
|
||||
relation="relatedTo",
|
||||
object="Bob", object_type="Employee",
|
||||
)
|
||||
assert converter.convert_relationship(rel) is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Datatype property domain enforcement
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAttributeDomainValidation:
|
||||
|
||||
def test_valid_domain(self, converter):
|
||||
attr = Attribute(
|
||||
entity="Bob", entity_type="Employee",
|
||||
attribute="employeeId", value="E-1234",
|
||||
)
|
||||
assert converter.convert_attribute(attr) is not None
|
||||
|
||||
def test_subclass_matches_domain(self, converter):
|
||||
"""Manager is subclass of Employee; employeeId domain is Employee."""
|
||||
attr = Attribute(
|
||||
entity="Carol", entity_type="Manager",
|
||||
attribute="employeeId", value="M-5678",
|
||||
)
|
||||
assert converter.convert_attribute(attr) is not None
|
||||
|
||||
def test_domain_violation_rejected(self, converter):
|
||||
attr = Attribute(
|
||||
entity="Acme Corp", entity_type="Company",
|
||||
attribute="employeeId", value="E-0000",
|
||||
)
|
||||
assert converter.convert_attribute(attr) is None
|
||||
|
||||
def test_no_domain_allows_anything(self, converter):
|
||||
attr = Attribute(
|
||||
entity="Widget", entity_type="Product",
|
||||
attribute="description", value="A useful widget",
|
||||
)
|
||||
assert converter.convert_attribute(attr) is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OntologySelector bypass for small ontologies (#908)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_ontology(n_classes, n_obj_props=0, n_dt_props=0):
|
||||
classes = {
|
||||
f"C{i}": OntologyClass(uri=f"http://example.org/C{i}")
|
||||
for i in range(n_classes)
|
||||
}
|
||||
obj_props = {
|
||||
f"op{i}": OntologyProperty(
|
||||
uri=f"http://example.org/op{i}", type="owl:ObjectProperty"
|
||||
)
|
||||
for i in range(n_obj_props)
|
||||
}
|
||||
dt_props = {
|
||||
f"dp{i}": OntologyProperty(
|
||||
uri=f"http://example.org/dp{i}", type="owl:DatatypeProperty"
|
||||
)
|
||||
for i in range(n_dt_props)
|
||||
}
|
||||
return Ontology(
|
||||
id="tiny",
|
||||
metadata={"name": "Tiny"},
|
||||
classes=classes,
|
||||
object_properties=obj_props,
|
||||
datatype_properties=dt_props,
|
||||
)
|
||||
|
||||
|
||||
def _make_loader(ontology):
|
||||
loader = Mock()
|
||||
loader.get_ontology.return_value = ontology
|
||||
loader.get_all_ontologies.return_value = {"tiny": ontology}
|
||||
return loader
|
||||
|
||||
|
||||
class TestBypassSelectorBelow:
|
||||
|
||||
async def test_bypass_returns_full_ontology(self):
|
||||
"""With 3 elements and bypass_selector_below=5, selector is bypassed."""
|
||||
ont = _make_ontology(2, 1, 0)
|
||||
loader = _make_loader(ont)
|
||||
embedder = Mock()
|
||||
|
||||
selector = OntologySelector(
|
||||
ontology_embedder=embedder,
|
||||
ontology_loader=loader,
|
||||
bypass_selector_below=5,
|
||||
)
|
||||
|
||||
segments = [TextSegment(text="some text", type="sentence", position=0)]
|
||||
subsets = await selector.select_ontology_subset(segments)
|
||||
|
||||
assert len(subsets) == 1
|
||||
assert subsets[0].ontology_id == "tiny"
|
||||
assert len(subsets[0].classes) == 2
|
||||
assert len(subsets[0].object_properties) == 1
|
||||
assert subsets[0].relevance_score == 1.0
|
||||
# Embedder should never be called
|
||||
embedder.embed_text.assert_not_called()
|
||||
|
||||
async def test_no_bypass_when_above_threshold(self):
|
||||
"""With 10 elements and bypass_selector_below=5, selector runs normally."""
|
||||
ont = _make_ontology(6, 3, 1)
|
||||
loader = _make_loader(ont)
|
||||
|
||||
embedder = Mock()
|
||||
embedder.embed_text = AsyncMock(return_value=[0.1, 0.2])
|
||||
vector_store = Mock()
|
||||
vector_store.size.return_value = 10
|
||||
vector_store.search.return_value = []
|
||||
embedder.get_vector_store.return_value = vector_store
|
||||
|
||||
selector = OntologySelector(
|
||||
ontology_embedder=embedder,
|
||||
ontology_loader=loader,
|
||||
bypass_selector_below=5,
|
||||
)
|
||||
|
||||
segments = [TextSegment(text="some text", type="sentence", position=0)]
|
||||
subsets = await selector.select_ontology_subset(segments)
|
||||
|
||||
# Vector store was consulted (selector ran normally)
|
||||
vector_store.size.assert_called_once()
|
||||
|
||||
async def test_bypass_at_exact_threshold_not_triggered(self):
|
||||
"""With exactly 5 elements and bypass_selector_below=5, selector runs (< not <=)."""
|
||||
ont = _make_ontology(3, 1, 1) # total = 5
|
||||
loader = _make_loader(ont)
|
||||
|
||||
embedder = Mock()
|
||||
embedder.embed_text = AsyncMock(return_value=[0.1, 0.2])
|
||||
vector_store = Mock()
|
||||
vector_store.size.return_value = 5
|
||||
vector_store.search.return_value = []
|
||||
embedder.get_vector_store.return_value = vector_store
|
||||
|
||||
selector = OntologySelector(
|
||||
ontology_embedder=embedder,
|
||||
ontology_loader=loader,
|
||||
bypass_selector_below=5,
|
||||
)
|
||||
|
||||
segments = [TextSegment(text="some text", type="sentence", position=0)]
|
||||
subsets = await selector.select_ontology_subset(segments)
|
||||
|
||||
# Should NOT bypass — 5 is not < 5
|
||||
vector_store.size.assert_called_once()
|
||||
|
||||
async def test_bypass_zero_disables(self):
|
||||
"""bypass_selector_below=0 means bypass never triggers."""
|
||||
ont = _make_ontology(0, 0, 0) # empty ontology
|
||||
loader = _make_loader(ont)
|
||||
|
||||
embedder = Mock()
|
||||
embedder.embed_text = AsyncMock(return_value=[0.1])
|
||||
vector_store = Mock()
|
||||
vector_store.size.return_value = 0
|
||||
vector_store.search.return_value = []
|
||||
embedder.get_vector_store.return_value = vector_store
|
||||
|
||||
selector = OntologySelector(
|
||||
ontology_embedder=embedder,
|
||||
ontology_loader=loader,
|
||||
bypass_selector_below=0,
|
||||
)
|
||||
|
||||
segments = [TextSegment(text="some text", type="sentence", position=0)]
|
||||
subsets = await selector.select_ontology_subset(segments)
|
||||
|
||||
# 0 is not < 0, so bypass doesn't trigger
|
||||
vector_store.size.assert_called_once()
|
||||
|
|
@ -165,22 +165,37 @@ class TestIamAuthDispatch:
|
|||
by shape of the bearer."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_authorization_header_raises_401(self):
|
||||
async def test_no_authorization_header_tries_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request(None))
|
||||
|
||||
async def fake_with_client(op):
|
||||
raise RuntimeError("auth-failed: anonymous access not permitted")
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request(None))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_bearer_header_raises_401(self):
|
||||
async def test_non_bearer_header_tries_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Basic whatever"))
|
||||
|
||||
async def fake_with_client(op):
|
||||
raise RuntimeError("auth-failed: anonymous access not permitted")
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Basic whatever"))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_bearer_raises_401(self):
|
||||
async def test_empty_bearer_tries_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Bearer "))
|
||||
|
||||
async def fake_with_client(op):
|
||||
raise RuntimeError("auth-failed: anonymous access not permitted")
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Bearer "))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_format_raises_401(self):
|
||||
|
|
@ -445,3 +460,121 @@ class TestAuthorise:
|
|||
|
||||
# Different resource → different cache key → two IAM calls.
|
||||
assert calls["n"] == 2
|
||||
|
||||
|
||||
# -- Anonymous authentication boundary ------------------------------------
|
||||
|
||||
|
||||
class TestAnonymousAuthBoundary:
|
||||
"""The gateway must only attempt anonymous auth when no credential
|
||||
is presented. A malformed token must NOT fall through to the
|
||||
anonymous path — that would let an attacker bypass a broken token
|
||||
by simply sending garbage."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_header_attempts_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
|
||||
async def fake_with_client(op):
|
||||
return await op(Mock(
|
||||
authenticate_anonymous=AsyncMock(
|
||||
return_value=("anon", "default", ["reader"]),
|
||||
)
|
||||
))
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
ident = await auth.authenticate(make_request(None))
|
||||
assert ident.handle == "anon"
|
||||
assert ident.source == "anonymous"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_bearer_attempts_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
|
||||
async def fake_with_client(op):
|
||||
return await op(Mock(
|
||||
authenticate_anonymous=AsyncMock(
|
||||
return_value=("anon", "default", ["reader"]),
|
||||
)
|
||||
))
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
ident = await auth.authenticate(make_request("Bearer "))
|
||||
assert ident.handle == "anon"
|
||||
assert ident.source == "anonymous"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_malformed_token_does_not_fall_through_to_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
called = {"anonymous": False}
|
||||
|
||||
original = auth._authenticate_anonymous
|
||||
|
||||
async def spy_anonymous():
|
||||
called["anonymous"] = True
|
||||
return await original()
|
||||
|
||||
auth._authenticate_anonymous = spy_anonymous
|
||||
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Bearer garbage"))
|
||||
assert not called["anonymous"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_api_key_does_not_fall_through_to_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
called = {"anonymous": False}
|
||||
|
||||
async def spy_anonymous():
|
||||
called["anonymous"] = True
|
||||
|
||||
auth._authenticate_anonymous = spy_anonymous
|
||||
|
||||
async def fake_with_client(op):
|
||||
raise RuntimeError("auth-failed: unknown key")
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Bearer tg_bad"))
|
||||
assert not called["anonymous"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_jwt_does_not_fall_through_to_anonymous(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
auth._signing_public_pem = "not-a-real-pem"
|
||||
called = {"anonymous": False}
|
||||
|
||||
async def spy_anonymous():
|
||||
called["anonymous"] = True
|
||||
|
||||
auth._authenticate_anonymous = spy_anonymous
|
||||
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request("Bearer a.b.c"))
|
||||
assert not called["anonymous"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_rejected_by_iam_raises_401(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
|
||||
async def fake_with_client(op):
|
||||
raise RuntimeError("auth-failed: anonymous access not permitted")
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request(None))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_with_empty_user_id_raises_401(self):
|
||||
auth = IamAuth(backend=Mock())
|
||||
|
||||
async def fake_with_client(op):
|
||||
return await op(Mock(
|
||||
authenticate_anonymous=AsyncMock(
|
||||
return_value=("", "default", []),
|
||||
)
|
||||
))
|
||||
|
||||
with patch.object(auth, "_with_client", side_effect=fake_with_client):
|
||||
with pytest.raises(web.HTTPUnauthorized):
|
||||
await auth.authenticate(make_request(None))
|
||||
|
|
|
|||
0
tests/unit/test_iam/__init__.py
Normal file
0
tests/unit/test_iam/__init__.py
Normal file
44
tests/unit/test_iam/test_iam_rejects_anonymous.py
Normal file
44
tests/unit/test_iam/test_iam_rejects_anonymous.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
"""
|
||||
Contract test: the full iam-svc MUST reject authenticate-anonymous.
|
||||
|
||||
This is a safety pin — if someone accidentally adds anonymous access
|
||||
to the production IAM handler, this test catches it.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import Mock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from trustgraph.iam.service.iam import IamService
|
||||
|
||||
|
||||
def _make_request(**kwargs):
|
||||
req = Mock()
|
||||
for k, v in kwargs.items():
|
||||
setattr(req, k, v)
|
||||
return req
|
||||
|
||||
|
||||
class TestIamRejectsAnonymous:
|
||||
|
||||
@pytest.fixture
|
||||
def handler(self):
|
||||
svc = object.__new__(IamService)
|
||||
svc.table_store = Mock(spec=[])
|
||||
svc.bootstrap_mode = "token"
|
||||
svc.bootstrap_token = "tok"
|
||||
svc._on_workspace_created = None
|
||||
svc._on_workspace_deleted = None
|
||||
svc._signing_key = None
|
||||
svc._signing_key_lock = asyncio.Lock()
|
||||
return svc
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_anonymous_returns_auth_failed(self, handler):
|
||||
resp = await handler.handle(
|
||||
_make_request(operation="authenticate-anonymous")
|
||||
)
|
||||
assert resp.error is not None
|
||||
assert resp.error.type == "auth-failed"
|
||||
assert "anonymous" in resp.error.message.lower()
|
||||
138
tests/unit/test_iam/test_noauth_handler.py
Normal file
138
tests/unit/test_iam/test_noauth_handler.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""
|
||||
Tests for the no-auth IAM handler.
|
||||
|
||||
Verifies that NoAuthHandler returns the expected permissive responses
|
||||
and that the always-allow authorise path returns the correct shape.
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from trustgraph.iam.noauth.handler import NoAuthHandler
|
||||
|
||||
|
||||
def _make_request(**kwargs):
|
||||
req = Mock()
|
||||
for k, v in kwargs.items():
|
||||
setattr(req, k, v)
|
||||
return req
|
||||
|
||||
|
||||
class TestAuthenticateAnonymous:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_default_identity(self):
|
||||
h = NoAuthHandler(
|
||||
default_user_id="anon", default_workspace="ws",
|
||||
)
|
||||
resp = await h.handle(
|
||||
_make_request(operation="authenticate-anonymous")
|
||||
)
|
||||
assert resp.error is None
|
||||
assert resp.resolved_user_id == "anon"
|
||||
assert resp.resolved_workspace == "ws"
|
||||
assert "admin" in list(resp.resolved_roles)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_defaults_propagate(self):
|
||||
h = NoAuthHandler(
|
||||
default_user_id="dev-user", default_workspace="dev-ws",
|
||||
)
|
||||
resp = await h.handle(
|
||||
_make_request(operation="authenticate-anonymous")
|
||||
)
|
||||
assert resp.resolved_user_id == "dev-user"
|
||||
assert resp.resolved_workspace == "dev-ws"
|
||||
|
||||
|
||||
class TestResolveApiKey:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_any_key_resolves_to_default_identity(self):
|
||||
h = NoAuthHandler()
|
||||
resp = await h.handle(
|
||||
_make_request(operation="resolve-api-key", api_key="tg_bogus")
|
||||
)
|
||||
assert resp.error is None
|
||||
assert resp.resolved_user_id == "anonymous"
|
||||
assert resp.resolved_workspace == "default"
|
||||
|
||||
|
||||
class TestAuthorise:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_always_allows(self):
|
||||
h = NoAuthHandler()
|
||||
resp = await h.handle(
|
||||
_make_request(
|
||||
operation="authorise",
|
||||
user_id="anyone",
|
||||
capability="anything",
|
||||
resource_json="{}",
|
||||
parameters_json="{}",
|
||||
)
|
||||
)
|
||||
assert resp.error is None
|
||||
assert resp.decision_allow is True
|
||||
assert resp.decision_ttl_seconds > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authorise_many_returns_matching_count(self):
|
||||
h = NoAuthHandler()
|
||||
checks = [
|
||||
{"capability": "a", "resource": {}, "parameters": {}},
|
||||
{"capability": "b", "resource": {}, "parameters": {}},
|
||||
{"capability": "c", "resource": {}, "parameters": {}},
|
||||
]
|
||||
resp = await h.handle(
|
||||
_make_request(
|
||||
operation="authorise-many",
|
||||
user_id="u",
|
||||
authorise_checks=json.dumps(checks),
|
||||
)
|
||||
)
|
||||
assert resp.error is None
|
||||
decisions = json.loads(resp.decisions_json)
|
||||
assert len(decisions) == 3
|
||||
assert all(d["allow"] is True for d in decisions)
|
||||
|
||||
|
||||
class TestCreateWorkspaceCallback:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_workspace_calls_callback(self):
|
||||
called_with = []
|
||||
|
||||
async def on_created(ws_id):
|
||||
called_with.append(ws_id)
|
||||
|
||||
h = NoAuthHandler(on_workspace_created=on_created)
|
||||
req = _make_request(operation="create-workspace")
|
||||
req.workspace_record = Mock()
|
||||
req.workspace_record.id = "test-ws"
|
||||
resp = await h.handle(req)
|
||||
assert resp.error is None
|
||||
assert called_with == ["test-ws"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_workspace_without_callback_still_succeeds(self):
|
||||
h = NoAuthHandler()
|
||||
req = _make_request(operation="create-workspace")
|
||||
req.workspace_record = Mock()
|
||||
req.workspace_record.id = "test-ws"
|
||||
resp = await h.handle(req)
|
||||
assert resp.error is None
|
||||
|
||||
|
||||
class TestUnknownOperation:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_op_returns_error(self):
|
||||
h = NoAuthHandler()
|
||||
resp = await h.handle(
|
||||
_make_request(operation="not-a-real-op")
|
||||
)
|
||||
assert resp.error is not None
|
||||
assert resp.error.type == "invalid-argument"
|
||||
|
|
@ -7,7 +7,7 @@ including template rendering, term merging, JSON validation, and error handling.
|
|||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
from trustgraph.template.prompt_manager import PromptManager, PromptConfiguration, Prompt
|
||||
|
||||
|
|
@ -344,6 +344,42 @@ class TestPromptManager:
|
|||
assert pm.terms == {} # Default empty terms
|
||||
assert len(pm.prompts) == 0
|
||||
|
||||
def test_load_config_does_not_swallow_keyboard_interrupt(self, monkeypatch):
|
||||
"""KeyboardInterrupt should propagate out of config parsing."""
|
||||
pm = PromptManager()
|
||||
|
||||
def interrupt(_value):
|
||||
raise KeyboardInterrupt
|
||||
|
||||
monkeypatch.setattr("trustgraph.template.prompt_manager.json.loads", interrupt)
|
||||
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
pm.load_config({"system": json.dumps("Test")})
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_json_parse_does_not_swallow_system_exit(self):
|
||||
"""SystemExit should propagate out of JSON response parsing."""
|
||||
pm = PromptManager()
|
||||
config = {
|
||||
"system": json.dumps("Test"),
|
||||
"template-index": json.dumps(["json_response"]),
|
||||
"template.json_response": json.dumps({
|
||||
"prompt": "Generate JSON",
|
||||
"response-type": "json"
|
||||
})
|
||||
}
|
||||
pm.load_config(config)
|
||||
|
||||
def exit_parse(_text):
|
||||
raise SystemExit(2)
|
||||
|
||||
pm.parse_json = exit_parse
|
||||
mock_llm = AsyncMock()
|
||||
mock_llm.return_value = "{}"
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
await pm.invoke("json_response", {}, mock_llm)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestPromptManagerJsonl:
|
||||
|
|
@ -585,4 +621,4 @@ not json at all
|
|||
|
||||
assert len(result) == 2
|
||||
assert result[0] == {"any": "structure"}
|
||||
assert result[1] == {"completely": "different"}
|
||||
assert result[1] == {"completely": "different"}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import pytest
|
|||
from unittest.mock import Mock, patch, MagicMock, call
|
||||
import json
|
||||
|
||||
from trustgraph.api.socket_client import SocketClient
|
||||
from trustgraph.api import (
|
||||
Api,
|
||||
Triple,
|
||||
|
|
@ -222,6 +223,82 @@ class TestSocketClient:
|
|||
for method in expected_methods:
|
||||
assert hasattr(flow_instance, method), f"Missing method: {method}"
|
||||
|
||||
def test_socket_client_close_does_not_swallow_base_exceptions(self):
|
||||
"""Test close cleanup does not suppress process-level interrupts."""
|
||||
|
||||
class InterruptingLoop:
|
||||
def is_closed(self):
|
||||
return False
|
||||
|
||||
def run_until_complete(self, awaitable):
|
||||
if hasattr(awaitable, "close"):
|
||||
awaitable.close()
|
||||
raise SystemExit("stop")
|
||||
|
||||
socket = SocketClient(url="http://test/", timeout=60, token=None)
|
||||
socket._loop = InterruptingLoop()
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
socket.close()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("generator_method", "async_method"),
|
||||
[
|
||||
("_streaming_generator", "_send_request_async_streaming"),
|
||||
("_streaming_generator_raw", "_send_request_async_streaming_raw"),
|
||||
],
|
||||
)
|
||||
def test_socket_client_streaming_cleanup_does_not_swallow_base_exceptions(
|
||||
self, generator_method, async_method
|
||||
):
|
||||
"""Test streaming cleanup does not suppress process-level interrupts."""
|
||||
|
||||
class FakeAsyncGenerator:
|
||||
def __anext__(self):
|
||||
return "next"
|
||||
|
||||
def aclose(self):
|
||||
return "close"
|
||||
|
||||
class InterruptingLoop:
|
||||
def run_until_complete(self, awaitable):
|
||||
if awaitable == "next":
|
||||
raise StopAsyncIteration
|
||||
if awaitable == "close":
|
||||
raise SystemExit("stop")
|
||||
raise AssertionError(f"unexpected awaitable: {awaitable!r}")
|
||||
|
||||
socket = SocketClient(url="http://test/", timeout=60, token=None)
|
||||
setattr(socket, async_method, lambda *args, **kwargs: FakeAsyncGenerator())
|
||||
generator = getattr(socket, generator_method)(
|
||||
"agent", "default", {}, InterruptingLoop()
|
||||
)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
next(generator)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_socket_client_reader_does_not_swallow_base_exceptions(self):
|
||||
"""Test reader error fanout does not suppress process-level interrupts."""
|
||||
|
||||
class FailingSocket:
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
raise ValueError("reader failed")
|
||||
|
||||
class InterruptingQueue:
|
||||
async def put(self, message):
|
||||
raise SystemExit("stop")
|
||||
|
||||
socket = SocketClient(url="http://test/", timeout=60, token=None)
|
||||
socket._socket = FailingSocket()
|
||||
socket._pending = {"req-1": InterruptingQueue()}
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
await socket._reader()
|
||||
|
||||
|
||||
class TestBulkClient:
|
||||
"""Test bulk operations client"""
|
||||
|
|
|
|||
56
tests/unit/test_query/test_ontology_monitoring.py
Normal file
56
tests/unit/test_query/test_ontology_monitoring.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
Tests for ontology monitoring metrics.
|
||||
"""
|
||||
|
||||
from trustgraph.query.ontology.monitoring import (
|
||||
PerformanceMonitor,
|
||||
_extract_metric_label,
|
||||
)
|
||||
|
||||
|
||||
def test_extract_metric_label_reads_unquoted_label_value():
|
||||
metric_name = "cache_requests_total{cache_type=entity,component=ontology}"
|
||||
|
||||
assert _extract_metric_label(metric_name, "cache_type") == "entity"
|
||||
|
||||
|
||||
def test_extract_metric_label_reads_quoted_label_value():
|
||||
metric_name = 'cache_requests_total{cache_type="entity",component="ontology"}'
|
||||
|
||||
assert _extract_metric_label(metric_name, "cache_type") == "entity"
|
||||
|
||||
|
||||
def test_extract_metric_label_returns_none_when_label_missing():
|
||||
metric_name = "cache_requests_total{component=ontology}"
|
||||
|
||||
assert _extract_metric_label(metric_name, "cache_type") is None
|
||||
|
||||
|
||||
def test_performance_report_ignores_counters_without_cache_type_label():
|
||||
monitor = PerformanceMonitor({"enabled": False})
|
||||
monitor.metrics_collector.increment(
|
||||
"cache_requests_total",
|
||||
labels={"component": "ontology"},
|
||||
)
|
||||
monitor.metrics_collector.increment(
|
||||
"cache_type=not_a_label",
|
||||
labels={"component": "ontology"},
|
||||
)
|
||||
monitor.metrics_collector.increment(
|
||||
"cache_requests_total",
|
||||
labels={"cache_type": "entity"},
|
||||
)
|
||||
monitor.metrics_collector.increment(
|
||||
"cache_hits_total",
|
||||
labels={"cache_type": "entity"},
|
||||
)
|
||||
|
||||
report = monitor.get_performance_report()
|
||||
|
||||
assert report["cache_performance"] == {
|
||||
"entity": {
|
||||
"hit_rate": 1.0,
|
||||
"total_requests": 1.0,
|
||||
"total_hits": 1.0,
|
||||
}
|
||||
}
|
||||
|
|
@ -89,12 +89,15 @@ class TestRowsGraphQLQueryLogic:
|
|||
@pytest.mark.asyncio
|
||||
async def test_schema_config_parsing(self):
|
||||
"""Test parsing of schema configuration"""
|
||||
import asyncio
|
||||
processor = MagicMock()
|
||||
processor.schemas = {}
|
||||
processor.schema_builders = {}
|
||||
processor.graphql_schemas = {}
|
||||
processor.config_key = "schema"
|
||||
processor.query_cassandra = MagicMock()
|
||||
processor._setup_lock = asyncio.Lock()
|
||||
processor._apply_schema_config = Processor._apply_schema_config.__get__(processor, Processor)
|
||||
processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
|
||||
|
||||
# Create test config
|
||||
|
|
@ -335,7 +338,7 @@ class TestUnifiedTableQueries:
|
|||
"""Test query execution with matching index"""
|
||||
processor = MagicMock()
|
||||
processor.session = MagicMock()
|
||||
processor.connect_cassandra = MagicMock()
|
||||
processor.connect_cassandra = AsyncMock()
|
||||
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
|
||||
processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
|
||||
processor.find_matching_index = Processor.find_matching_index.__get__(processor, Processor)
|
||||
|
|
@ -396,7 +399,7 @@ class TestUnifiedTableQueries:
|
|||
"""Test query execution without matching index (scan mode)"""
|
||||
processor = MagicMock()
|
||||
processor.session = MagicMock()
|
||||
processor.connect_cassandra = MagicMock()
|
||||
processor.connect_cassandra = AsyncMock()
|
||||
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
|
||||
processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
|
||||
processor.find_matching_index = Processor.find_matching_index.__get__(processor, Processor)
|
||||
|
|
|
|||
580
tests/unit/test_query/test_sparql_algebra.py
Normal file
580
tests/unit/test_query/test_sparql_algebra.py
Normal file
|
|
@ -0,0 +1,580 @@
|
|||
"""
|
||||
Tests for the SPARQL algebra evaluator.
|
||||
|
||||
Verifies that evaluate() and _query_pattern() call TriplesClient.query()
|
||||
with the correct arguments, and in particular that workspace is never
|
||||
passed — workspace isolation is handled by pub/sub topic routing.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, call
|
||||
|
||||
from rdflib.term import Variable, URIRef, Literal
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
|
||||
from trustgraph.schema import Term, IRI, LITERAL
|
||||
from trustgraph.query.sparql.algebra import (
|
||||
evaluate, materialise, _query_pattern, _eval_bgp,
|
||||
)
|
||||
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
def iri(v):
|
||||
return Term(type=IRI, iri=v)
|
||||
|
||||
|
||||
def lit(v):
|
||||
return Term(type=LITERAL, value=v)
|
||||
|
||||
|
||||
def make_tc(query_return=None, query_side_effect=None):
|
||||
"""Create a mock TriplesClient with both query() and query_gen() support."""
|
||||
tc = AsyncMock()
|
||||
|
||||
if query_side_effect is not None:
|
||||
tc.query.side_effect = query_side_effect
|
||||
|
||||
async def gen_side_effect(**kwargs):
|
||||
results = await query_side_effect(**kwargs)
|
||||
for r in results:
|
||||
yield r
|
||||
|
||||
tc.query_gen = gen_side_effect
|
||||
else:
|
||||
items = query_return or []
|
||||
tc.query.return_value = items
|
||||
|
||||
async def gen(**kwargs):
|
||||
for item in items:
|
||||
yield item
|
||||
|
||||
tc.query_gen = gen
|
||||
|
||||
return tc
|
||||
|
||||
|
||||
def make_triple(s, p, o):
|
||||
t = MagicMock()
|
||||
t.s = s
|
||||
t.p = p
|
||||
t.o = o
|
||||
return t
|
||||
|
||||
|
||||
def make_bgp(*patterns):
|
||||
"""Build a CompValue BGP node from (s, p, o) tuples of rdflib terms."""
|
||||
node = CompValue("BGP")
|
||||
node.triples = list(patterns)
|
||||
return node
|
||||
|
||||
|
||||
def make_project(inner, variables):
|
||||
node = CompValue("Project")
|
||||
node.p = inner
|
||||
node.PV = [Variable(v) for v in variables]
|
||||
return node
|
||||
|
||||
|
||||
def make_select(inner):
|
||||
node = CompValue("SelectQuery")
|
||||
node.p = inner
|
||||
return node
|
||||
|
||||
|
||||
def make_join(left, right):
|
||||
node = CompValue("Join")
|
||||
node.p1 = left
|
||||
node.p2 = right
|
||||
return node
|
||||
|
||||
|
||||
def make_union(left, right):
|
||||
node = CompValue("Union")
|
||||
node.p1 = left
|
||||
node.p2 = right
|
||||
return node
|
||||
|
||||
|
||||
def make_slice(inner, start, length):
|
||||
node = CompValue("Slice")
|
||||
node.p = inner
|
||||
node.start = start
|
||||
node.length = length
|
||||
return node
|
||||
|
||||
|
||||
def make_distinct(inner):
|
||||
node = CompValue("Distinct")
|
||||
node.p = inner
|
||||
return node
|
||||
|
||||
|
||||
def make_filter(inner, expr):
|
||||
node = CompValue("Filter")
|
||||
node.p = inner
|
||||
node.expr = expr
|
||||
return node
|
||||
|
||||
|
||||
def make_minus(left, right):
|
||||
node = CompValue("Minus")
|
||||
node.p1 = left
|
||||
node.p2 = right
|
||||
return node
|
||||
|
||||
|
||||
class TestQueryPattern:
|
||||
"""Tests for _query_pattern — the leaf that calls TriplesClient."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_passes_correct_args(self):
|
||||
tc = AsyncMock()
|
||||
tc.query.return_value = []
|
||||
|
||||
await _query_pattern(
|
||||
tc,
|
||||
s=iri("http://example.com/s"),
|
||||
p=iri("http://example.com/p"),
|
||||
o=None,
|
||||
collection="my-collection",
|
||||
limit=100,
|
||||
)
|
||||
|
||||
tc.query.assert_called_once_with(
|
||||
s=iri("http://example.com/s"),
|
||||
p=iri("http://example.com/p"),
|
||||
o=None,
|
||||
limit=100,
|
||||
collection="my-collection",
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workspace_not_passed(self):
|
||||
tc = AsyncMock()
|
||||
tc.query.return_value = []
|
||||
|
||||
await _query_pattern(tc, None, None, None, "default", 10)
|
||||
|
||||
kwargs = tc.query.call_args.kwargs
|
||||
assert "workspace" not in kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_query_results(self):
|
||||
tc = AsyncMock()
|
||||
triple = make_triple(iri("http://a"), iri("http://b"), lit("c"))
|
||||
tc.query.return_value = [triple]
|
||||
|
||||
results = await _query_pattern(tc, None, None, None, "default", 10)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].s.iri == "http://a"
|
||||
|
||||
|
||||
class TestEvalBgp:
|
||||
"""Tests for BGP evaluation — triple pattern queries."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_pattern_all_variables(self):
|
||||
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
||||
tc = make_tc(query_return=[triple])
|
||||
|
||||
bgp = make_bgp(
|
||||
(Variable("s"), Variable("p"), Variable("o")),
|
||||
)
|
||||
|
||||
solutions = await materialise(bgp, tc, collection="default", limit=100)
|
||||
|
||||
assert len(solutions) == 1
|
||||
assert solutions[0]["s"].iri == "http://s"
|
||||
assert solutions[0]["p"].iri == "http://p"
|
||||
assert solutions[0]["o"].value == "o"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_pattern_bound_subject(self):
|
||||
tc = make_tc(query_return=[
|
||||
make_triple(iri("http://s"), iri("http://p"), lit("val")),
|
||||
])
|
||||
|
||||
bgp = make_bgp(
|
||||
(URIRef("http://s"), Variable("p"), Variable("o")),
|
||||
)
|
||||
|
||||
solutions = await materialise(bgp, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_bgp_returns_empty_solution(self):
|
||||
tc = make_tc()
|
||||
|
||||
bgp = make_bgp()
|
||||
|
||||
solutions = await materialise(bgp, tc, collection="default")
|
||||
|
||||
assert solutions == [{}]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_results_returns_empty(self):
|
||||
tc = make_tc(query_return=[])
|
||||
|
||||
bgp = make_bgp(
|
||||
(Variable("s"), Variable("p"), Variable("o")),
|
||||
)
|
||||
|
||||
solutions = await materialise(bgp, tc, collection="default")
|
||||
|
||||
assert solutions == []
|
||||
|
||||
|
||||
class TestEvaluate:
|
||||
"""Tests for the top-level evaluate() dispatcher."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_select_query_node(self):
|
||||
tc = make_tc(query_return=[
|
||||
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
||||
])
|
||||
|
||||
bgp = make_bgp(
|
||||
(Variable("s"), Variable("p"), Variable("o")),
|
||||
)
|
||||
select = make_select(make_project(bgp, ["s", "p"]))
|
||||
|
||||
solutions = await materialise(select, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
assert "s" in solutions[0]
|
||||
assert "p" in solutions[0]
|
||||
assert "o" not in solutions[0]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workspace_never_in_query_calls(self):
|
||||
"""Verify that no matter the algebra structure, workspace is never
|
||||
passed to TriplesClient.query()."""
|
||||
tc = make_tc(query_return=[
|
||||
make_triple(iri("http://s"), iri("http://p"), lit("o")),
|
||||
])
|
||||
|
||||
bgp1 = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||
bgp2 = make_bgp((Variable("a"), Variable("b"), Variable("c")))
|
||||
tree = make_select(make_project(
|
||||
make_union(bgp1, bgp2), ["s", "p", "o"]
|
||||
))
|
||||
|
||||
await materialise(tree, tc, collection="test-coll")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_join(self):
|
||||
call_count = 0
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return [make_triple(iri("http://a"), iri("http://p"), lit("v"))]
|
||||
else:
|
||||
return [make_triple(iri("http://a"), iri("http://q"), lit("w"))]
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
bgp1 = make_bgp((Variable("s"), URIRef("http://p"), Variable("v1")))
|
||||
bgp2 = make_bgp((Variable("s"), URIRef("http://q"), Variable("v2")))
|
||||
tree = make_join(bgp1, bgp2)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
assert solutions[0]["s"].iri == "http://a"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_slice(self):
|
||||
triples = [
|
||||
make_triple(iri(f"http://s{i}"), iri("http://p"), lit(f"o{i}"))
|
||||
for i in range(5)
|
||||
]
|
||||
tc = make_tc(query_return=triples)
|
||||
|
||||
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||
tree = make_slice(bgp, start=1, length=2)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_distinct(self):
|
||||
triple = make_triple(iri("http://s"), iri("http://p"), lit("o"))
|
||||
tc = make_tc(query_return=[triple, triple])
|
||||
|
||||
bgp = make_bgp((Variable("s"), Variable("p"), Variable("o")))
|
||||
tree = make_distinct(bgp)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_minus_removes_matching(self):
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
knows = iri("http://example.com/knows")
|
||||
hates = iri("http://example.com/hates")
|
||||
charlie = iri("http://example.com/charlie")
|
||||
|
||||
left_triple = make_triple(alice, knows, bob)
|
||||
right_triple2 = make_triple(alice, hates, charlie)
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
pred = kwargs.get("p")
|
||||
if pred and pred.iri == "http://example.com/knows":
|
||||
return [left_triple]
|
||||
elif pred and pred.iri == "http://example.com/hates":
|
||||
return [right_triple2]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
left_bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||
)
|
||||
right_bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
|
||||
)
|
||||
|
||||
tree = make_select(
|
||||
make_project(
|
||||
make_minus(left_bgp, right_bgp),
|
||||
["s", "o"]
|
||||
)
|
||||
)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_minus_no_shared_vars_preserves_all(self):
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
|
||||
left_triple = make_triple(alice, iri("http://example.com/p"), bob)
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
pred = kwargs.get("p")
|
||||
if pred and pred.iri == "http://example.com/p":
|
||||
return [left_triple]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
left_bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/p"), Variable("o"))
|
||||
)
|
||||
right_bgp = make_bgp(
|
||||
(Variable("x"), URIRef("http://example.com/q"), Variable("y"))
|
||||
)
|
||||
|
||||
tree = make_select(
|
||||
make_project(
|
||||
make_minus(left_bgp, right_bgp),
|
||||
["s", "o"]
|
||||
)
|
||||
)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_filter_exists_keeps_matching(self):
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
charlie = iri("http://example.com/charlie")
|
||||
|
||||
left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
|
||||
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
pred = kwargs.get("p")
|
||||
if pred and pred.iri == "http://example.com/knows":
|
||||
return [left_triple1, left_triple2]
|
||||
elif pred and pred.iri == "http://example.com/likes":
|
||||
return [exists_triple]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
left_bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||
)
|
||||
exists_bgp = make_bgp(
|
||||
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||
)
|
||||
|
||||
exists_expr = CompValue("Builtin_EXISTS")
|
||||
exists_expr.graph = exists_bgp
|
||||
|
||||
tree = make_select(
|
||||
make_project(
|
||||
make_filter(left_bgp, exists_expr),
|
||||
["s", "o"]
|
||||
)
|
||||
)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
result_objects = [s["o"].iri for s in solutions]
|
||||
assert "http://example.com/bob" in result_objects
|
||||
assert "http://example.com/charlie" not in result_objects
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_filter_not_exists_removes_matching(self):
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
charlie = iri("http://example.com/charlie")
|
||||
|
||||
left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
|
||||
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
pred = kwargs.get("p")
|
||||
if pred and pred.iri == "http://example.com/knows":
|
||||
return [left_triple1, left_triple2]
|
||||
elif pred and pred.iri == "http://example.com/likes":
|
||||
return [exists_triple]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
left_bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||
)
|
||||
exists_bgp = make_bgp(
|
||||
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||
)
|
||||
|
||||
not_exists_expr = CompValue("Builtin_NOTEXISTS")
|
||||
not_exists_expr.graph = exists_bgp
|
||||
|
||||
tree = make_select(
|
||||
make_project(
|
||||
make_filter(left_bgp, not_exists_expr),
|
||||
["s", "o"]
|
||||
)
|
||||
)
|
||||
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
result_objects = [s["o"].iri for s in solutions]
|
||||
assert "http://example.com/charlie" in result_objects
|
||||
assert "http://example.com/bob" not in result_objects
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_join_values_uses_bind_join(self):
|
||||
"""When VALUES is joined with a BGP, the bind join should pass
|
||||
the VALUES bindings into the BGP evaluation so the triple store
|
||||
query is selective (not a wildcard)."""
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
knows = iri("http://example.com/knows")
|
||||
|
||||
queries_issued = []
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
queries_issued.append(kwargs)
|
||||
s, p = kwargs.get("s"), kwargs.get("p")
|
||||
if s and s.iri == "http://example.com/alice" and p and p.iri == "http://example.com/knows":
|
||||
return [make_triple(alice, knows, bob)]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
# VALUES ?s { <alice> }
|
||||
values_node = CompValue("values")
|
||||
values_node.var = [Variable("s")]
|
||||
values_node.value = [[URIRef("http://example.com/alice")]]
|
||||
values_node.res = None
|
||||
|
||||
to_multiset = CompValue("ToMultiSet")
|
||||
to_multiset.p = values_node
|
||||
|
||||
bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o")),
|
||||
)
|
||||
|
||||
tree = make_join(to_multiset, bgp)
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 1
|
||||
assert solutions[0]["s"].iri == "http://example.com/alice"
|
||||
assert solutions[0]["o"].iri == "http://example.com/bob"
|
||||
|
||||
# The key assertion: the BGP query should have received
|
||||
# s=alice (bound from VALUES), NOT s=None (wildcard)
|
||||
assert len(queries_issued) == 1
|
||||
assert queries_issued[0]["s"] is not None
|
||||
assert queries_issued[0]["s"].iri == "http://example.com/alice"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_join_values_multiple_bindings(self):
|
||||
"""Bind join with multiple VALUES bindings."""
|
||||
alice = iri("http://example.com/alice")
|
||||
bob = iri("http://example.com/bob")
|
||||
knows = iri("http://example.com/knows")
|
||||
charlie = iri("http://example.com/charlie")
|
||||
|
||||
async def mock_query(**kwargs):
|
||||
s = kwargs.get("s")
|
||||
if s and s.iri == "http://example.com/alice":
|
||||
return [make_triple(alice, knows, bob)]
|
||||
elif s and s.iri == "http://example.com/bob":
|
||||
return [make_triple(bob, knows, charlie)]
|
||||
return []
|
||||
|
||||
tc = make_tc(query_side_effect=mock_query)
|
||||
|
||||
values_node = CompValue("values")
|
||||
values_node.var = [Variable("s")]
|
||||
values_node.value = [
|
||||
[URIRef("http://example.com/alice")],
|
||||
[URIRef("http://example.com/bob")],
|
||||
]
|
||||
values_node.res = None
|
||||
|
||||
to_multiset = CompValue("ToMultiSet")
|
||||
to_multiset.p = values_node
|
||||
|
||||
bgp = make_bgp(
|
||||
(Variable("s"), URIRef("http://example.com/knows"), Variable("o")),
|
||||
)
|
||||
|
||||
tree = make_join(to_multiset, bgp)
|
||||
solutions = await materialise(tree, tc, collection="default")
|
||||
|
||||
assert len(solutions) == 2
|
||||
subjects = {s["s"].iri for s in solutions}
|
||||
assert subjects == {
|
||||
"http://example.com/alice",
|
||||
"http://example.com/bob",
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unsupported_node_returns_empty_solution(self):
|
||||
tc = make_tc()
|
||||
|
||||
node = CompValue("SomethingUnknown")
|
||||
|
||||
solutions = await materialise(node, tc, collection="default")
|
||||
|
||||
assert solutions == [{}]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_compvalue_returns_empty_solution(self):
|
||||
tc = make_tc()
|
||||
|
||||
solutions = await materialise("not a node", tc, collection="default")
|
||||
|
||||
assert solutions == [{}]
|
||||
|
|
@ -300,6 +300,438 @@ class TestBuiltinFunctions:
|
|||
flags=None)
|
||||
assert evaluate_expression(expr, {"x": lit("hello")}) is False
|
||||
|
||||
def test_substr_three_args(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("SUBSTR",
|
||||
arg=Variable("x"),
|
||||
start=Literal(1),
|
||||
length=Literal(4))
|
||||
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "2024"
|
||||
|
||||
def test_substr_two_args(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("SUBSTR",
|
||||
arg=Variable("x"),
|
||||
start=Literal(6),
|
||||
length=None)
|
||||
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "03-15"
|
||||
|
||||
def test_substr_middle(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("SUBSTR",
|
||||
arg=Variable("x"),
|
||||
start=Literal(6),
|
||||
length=Literal(2))
|
||||
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "03"
|
||||
|
||||
def test_substr_null_start(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("SUBSTR",
|
||||
arg=Variable("x"),
|
||||
start=Variable("missing"),
|
||||
length=None)
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result is None
|
||||
|
||||
def test_year(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||
)
|
||||
assert result == 2024
|
||||
|
||||
def test_month(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("MONTH", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||
)
|
||||
assert result == 3
|
||||
|
||||
def test_day(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("DAY", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||
)
|
||||
assert result == 15
|
||||
|
||||
def test_hours(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("HOURS", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result == 10
|
||||
|
||||
def test_minutes(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("MINUTES", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result == 30
|
||||
|
||||
def test_seconds(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("SECONDS", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result == 45
|
||||
|
||||
def test_year_from_datetime(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result == 2024
|
||||
|
||||
def test_hours_from_date_returns_zero(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("HOURS", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||
)
|
||||
assert result == 0
|
||||
|
||||
def test_year_invalid_date(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("not-a-date")}
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_floor(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("3.7")}) == 3
|
||||
|
||||
def test_floor_negative(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("-2.3")}) == -3
|
||||
|
||||
def test_floor_none(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||
|
||||
def test_ceil(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("CEIL", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("3.2")}) == 4
|
||||
|
||||
def test_ceil_negative(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("CEIL", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("-2.7")}) == -2
|
||||
|
||||
def test_abs_positive(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("42")}) == 42
|
||||
|
||||
def test_abs_negative(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("-42")}) == 42
|
||||
|
||||
def test_abs_none(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||
|
||||
def test_replace_simple(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("REPLACE",
|
||||
arg=Variable("x"),
|
||||
pattern=Literal(" BC"),
|
||||
replacement=Literal(""),
|
||||
flags=None)
|
||||
result = evaluate_expression(expr, {"x": lit("500 BC")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "500"
|
||||
|
||||
def test_replace_regex(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("REPLACE",
|
||||
arg=Variable("x"),
|
||||
pattern=Literal("[0-9]+"),
|
||||
replacement=Literal("X"),
|
||||
flags=None)
|
||||
result = evaluate_expression(expr, {"x": lit("abc123def456")})
|
||||
assert result.value == "abcXdefX"
|
||||
|
||||
def test_replace_case_insensitive(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("REPLACE",
|
||||
arg=Variable("x"),
|
||||
pattern=Literal("hello"),
|
||||
replacement=Literal("world"),
|
||||
flags=Literal("i"))
|
||||
result = evaluate_expression(expr, {"x": lit("HELLO there")})
|
||||
assert result.value == "world there"
|
||||
|
||||
def test_round_up(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("3.7")}) == 4
|
||||
|
||||
def test_round_down(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("3.2")}) == 3
|
||||
|
||||
def test_round_none(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||
|
||||
def test_strbefore(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("STRBEFORE",
|
||||
arg1=Variable("x"), arg2=Literal("-"))
|
||||
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||
assert result.value == "2024"
|
||||
|
||||
def test_strbefore_not_found(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("STRBEFORE",
|
||||
arg1=Variable("x"), arg2=Literal("/"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.value == ""
|
||||
|
||||
def test_strafter(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("STRAFTER",
|
||||
arg1=Variable("x"), arg2=Literal("-"))
|
||||
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||
assert result.value == "03-15"
|
||||
|
||||
def test_strafter_not_found(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("STRAFTER",
|
||||
arg1=Variable("x"), arg2=Literal("/"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.value == ""
|
||||
|
||||
def test_encode_for_uri(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello world")})
|
||||
assert result.value == "hello%20world"
|
||||
|
||||
def test_encode_for_uri_special_chars(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("a/b?c=d&e")})
|
||||
assert result.value == "a%2Fb%3Fc%3Dd%26e"
|
||||
|
||||
def test_langmatches_basic(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("LANGMATCHES",
|
||||
arg1=Literal("en"), arg2=Literal("en"))
|
||||
assert evaluate_expression(expr, {}) is True
|
||||
|
||||
def test_langmatches_subtag(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("LANGMATCHES",
|
||||
arg1=Literal("en-US"), arg2=Literal("en"))
|
||||
assert evaluate_expression(expr, {}) is True
|
||||
|
||||
def test_langmatches_wildcard(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("LANGMATCHES",
|
||||
arg1=Literal("fr"), arg2=Literal("*"))
|
||||
assert evaluate_expression(expr, {}) is True
|
||||
|
||||
def test_langmatches_wildcard_empty(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("LANGMATCHES",
|
||||
arg1=Literal(""), arg2=Literal("*"))
|
||||
assert evaluate_expression(expr, {}) is False
|
||||
|
||||
def test_langmatches_no_match(self):
|
||||
from rdflib.term import Variable
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("LANGMATCHES",
|
||||
arg1=Literal("fr"), arg2=Literal("en"))
|
||||
assert evaluate_expression(expr, {}) is False
|
||||
|
||||
def test_iri_constructor(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("IRI", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("http://example.com/test")}
|
||||
)
|
||||
assert result.type == IRI
|
||||
assert result.iri == "http://example.com/test"
|
||||
|
||||
def test_uri_constructor(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("URI", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("http://example.com/test")}
|
||||
)
|
||||
assert result.type == IRI
|
||||
assert result.iri == "http://example.com/test"
|
||||
|
||||
def test_bnode_no_arg(self):
|
||||
expr = self._make_builtin("BNODE")
|
||||
result = evaluate_expression(expr, {})
|
||||
assert result.type == BLANK
|
||||
assert len(result.id) > 0
|
||||
|
||||
def test_bnode_with_label(self):
|
||||
from rdflib import Literal
|
||||
expr = self._make_builtin("BNODE", arg=Literal("mynode"))
|
||||
result = evaluate_expression(expr, {})
|
||||
assert result.type == BLANK
|
||||
assert result.id == "mynode"
|
||||
|
||||
def test_now(self):
|
||||
import re as re_mod
|
||||
expr = self._make_builtin("NOW")
|
||||
result = evaluate_expression(expr, {})
|
||||
assert result.type == LITERAL
|
||||
assert result.datatype == XSD + "dateTime"
|
||||
assert re_mod.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", result.value)
|
||||
|
||||
def test_tz_with_utc(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("TZ", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45+0000",
|
||||
datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "+00:00"
|
||||
|
||||
def test_tz_no_timezone(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("TZ", arg=Variable("x"))
|
||||
result = evaluate_expression(
|
||||
expr, {"x": lit("2024-03-15T10:30:45",
|
||||
datatype=XSD + "dateTime")}
|
||||
)
|
||||
assert result.value == ""
|
||||
|
||||
def test_rand(self):
|
||||
expr = self._make_builtin("RAND")
|
||||
result = evaluate_expression(expr, {})
|
||||
assert isinstance(result, float)
|
||||
assert 0.0 <= result < 1.0
|
||||
|
||||
def test_uuid(self):
|
||||
import re as re_mod
|
||||
expr = self._make_builtin("UUID")
|
||||
result = evaluate_expression(expr, {})
|
||||
assert result.type == IRI
|
||||
assert result.iri.startswith("urn:uuid:")
|
||||
uuid_part = result.iri[len("urn:uuid:"):]
|
||||
assert re_mod.match(
|
||||
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
||||
uuid_part
|
||||
)
|
||||
|
||||
def test_struuid(self):
|
||||
import re as re_mod
|
||||
expr = self._make_builtin("STRUUID")
|
||||
result = evaluate_expression(expr, {})
|
||||
assert result.type == LITERAL
|
||||
assert re_mod.match(
|
||||
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
||||
result.value
|
||||
)
|
||||
|
||||
def test_md5(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("MD5", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "5d41402abc4b2a76b9719d911017c592"
|
||||
|
||||
def test_sha1(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("SHA1", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
|
||||
|
||||
def test_sha256(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("SHA256", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.type == LITERAL
|
||||
assert result.value == (
|
||||
"2cf24dba5fb0a30e26e83b2ac5b9e29e"
|
||||
"1b161e5c1fa7425e73043362938b9824"
|
||||
)
|
||||
|
||||
def test_sha512(self):
|
||||
from rdflib.term import Variable
|
||||
expr = self._make_builtin("SHA512", arg=Variable("x"))
|
||||
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||
assert result.type == LITERAL
|
||||
assert len(result.value) == 128
|
||||
|
||||
def test_exists_with_callback(self):
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
graph = CompValue("BGP")
|
||||
expr = self._make_builtin("EXISTS", graph=graph)
|
||||
cb = lambda g, s: True
|
||||
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||
assert result is True
|
||||
|
||||
def test_exists_callback_false(self):
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
graph = CompValue("BGP")
|
||||
expr = self._make_builtin("EXISTS", graph=graph)
|
||||
cb = lambda g, s: False
|
||||
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||
assert result is False
|
||||
|
||||
def test_notexists_with_callback(self):
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
graph = CompValue("BGP")
|
||||
expr = self._make_builtin("NOTEXISTS", graph=graph)
|
||||
cb = lambda g, s: True
|
||||
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||
assert result is False
|
||||
|
||||
def test_notexists_callback_false(self):
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
graph = CompValue("BGP")
|
||||
expr = self._make_builtin("NOTEXISTS", graph=graph)
|
||||
cb = lambda g, s: False
|
||||
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||
assert result is True
|
||||
|
||||
|
||||
class TestEffectiveBoolean:
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Tests for SPARQL solution sequence operations.
|
|||
import pytest
|
||||
from trustgraph.schema import Term, IRI, LITERAL
|
||||
from trustgraph.query.sparql.solutions import (
|
||||
hash_join, left_join, union, project, distinct,
|
||||
hash_join, left_join, minus, union, project, distinct,
|
||||
order_by, slice_solutions, _terms_equal, _compatible,
|
||||
)
|
||||
|
||||
|
|
@ -311,6 +311,30 @@ class TestOrderBy:
|
|||
result = order_by(solutions, [])
|
||||
assert len(result) == 1
|
||||
|
||||
def test_order_by_numeric_literals(self):
|
||||
solutions = [
|
||||
{"year": lit("1950")},
|
||||
{"year": lit("700")},
|
||||
{"year": lit("2000")},
|
||||
{"year": lit("450")},
|
||||
{"year": lit("1200")},
|
||||
]
|
||||
key_fns = [(lambda sol: sol.get("year"), True)]
|
||||
result = order_by(solutions, key_fns)
|
||||
values = [s["year"].value for s in result]
|
||||
assert values == ["450", "700", "1200", "1950", "2000"]
|
||||
|
||||
def test_order_by_numeric_descending(self):
|
||||
solutions = [
|
||||
{"year": lit("1950")},
|
||||
{"year": lit("700")},
|
||||
{"year": lit("2000")},
|
||||
]
|
||||
key_fns = [(lambda sol: sol.get("year"), False)]
|
||||
result = order_by(solutions, key_fns)
|
||||
values = [s["year"].value for s in result]
|
||||
assert values == ["2000", "1950", "700"]
|
||||
|
||||
|
||||
class TestSlice:
|
||||
|
||||
|
|
@ -343,3 +367,37 @@ class TestSlice:
|
|||
solutions = [{"s": alice}, {"s": bob}]
|
||||
result = slice_solutions(solutions)
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
class TestMinus:
|
||||
|
||||
def test_removes_compatible(self, alice, bob):
|
||||
left = [{"s": alice}, {"s": bob}]
|
||||
right = [{"s": alice}]
|
||||
result = minus(left, right)
|
||||
assert len(result) == 1
|
||||
assert result[0]["s"].iri == "http://example.com/bob"
|
||||
|
||||
def test_empty_right_preserves_all(self, alice, bob):
|
||||
left = [{"s": alice}, {"s": bob}]
|
||||
result = minus(left, [])
|
||||
assert len(result) == 2
|
||||
|
||||
def test_no_shared_variables_preserves_all(self, alice, bob):
|
||||
left = [{"s": alice}]
|
||||
right = [{"t": bob}]
|
||||
result = minus(left, right)
|
||||
assert len(result) == 1
|
||||
|
||||
def test_all_removed(self, alice):
|
||||
left = [{"s": alice}]
|
||||
right = [{"s": alice}]
|
||||
result = minus(left, right)
|
||||
assert len(result) == 0
|
||||
|
||||
def test_partial_shared_variables(self, alice, bob):
|
||||
left = [{"s": alice, "p": lit("x")}, {"s": bob, "p": lit("y")}]
|
||||
right = [{"s": alice}]
|
||||
result = minus(left, right)
|
||||
assert len(result) == 1
|
||||
assert result[0]["s"].iri == "http://example.com/bob"
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@
|
|||
Tests for Cassandra triples query service
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
|
||||
from trustgraph.query.triples.cassandra.service import Processor, create_term
|
||||
from trustgraph.schema import Term, IRI, LITERAL
|
||||
|
|
@ -18,7 +20,7 @@ class TestCassandraQueryProcessor:
|
|||
return Processor(
|
||||
taskgroup=MagicMock(),
|
||||
id='test-cassandra-query',
|
||||
graph_host='localhost'
|
||||
cassandra_host='localhost'
|
||||
)
|
||||
|
||||
def test_create_term_with_http_uri(self, processor):
|
||||
|
|
@ -85,7 +87,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_result.o = 'test_object'
|
||||
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_spo = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
|
|
@ -110,8 +112,8 @@ class TestCassandraQueryProcessor:
|
|||
keyspace='test_user'
|
||||
)
|
||||
|
||||
# Verify get_spo was called with correct parameters
|
||||
mock_tg_instance.get_spo.assert_called_once_with(
|
||||
# Verify async_get_spo was called with correct parameters
|
||||
mock_tg_instance.async_get_spo.assert_called_once_with(
|
||||
'test_collection', 'test_subject', 'test_predicate', 'test_object', g=None, limit=100
|
||||
)
|
||||
|
||||
|
|
@ -130,23 +132,25 @@ class TestCassandraQueryProcessor:
|
|||
assert processor.cassandra_host == ['cassandra'] # Updated default
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
assert processor.table is None
|
||||
assert processor._connections == {}
|
||||
assert isinstance(processor._conn_lock, asyncio.Lock)
|
||||
|
||||
def test_processor_initialization_with_custom_params(self):
|
||||
"""Test processor initialization with custom parameters"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
cassandra_host='cassandra.example.com',
|
||||
cassandra_username='queryuser',
|
||||
cassandra_password='querypass'
|
||||
)
|
||||
|
||||
|
||||
assert processor.cassandra_host == ['cassandra.example.com']
|
||||
assert processor.cassandra_username == 'queryuser'
|
||||
assert processor.cassandra_password == 'querypass'
|
||||
assert processor.table is None
|
||||
assert processor._connections == {}
|
||||
assert isinstance(processor._conn_lock, asyncio.Lock)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
|
||||
|
|
@ -164,7 +168,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_sp.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_sp = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -178,7 +182,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50)
|
||||
mock_tg_instance.async_get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.iri == 'test_subject'
|
||||
assert result[0].p.iri == 'test_predicate'
|
||||
|
|
@ -200,7 +204,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_s.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_s = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -214,7 +218,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25)
|
||||
mock_tg_instance.async_get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.iri == 'test_subject'
|
||||
assert result[0].p.iri == 'result_predicate'
|
||||
|
|
@ -236,7 +240,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_p.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_p = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -250,7 +254,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10)
|
||||
mock_tg_instance.async_get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.iri == 'result_subject'
|
||||
assert result[0].p.iri == 'test_predicate'
|
||||
|
|
@ -272,7 +276,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_o.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_o = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -286,7 +290,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75)
|
||||
mock_tg_instance.async_get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.iri == 'result_subject'
|
||||
assert result[0].p.iri == 'result_predicate'
|
||||
|
|
@ -305,11 +309,11 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.s = 'all_subject'
|
||||
mock_result.p = 'all_predicate'
|
||||
mock_result.o = 'all_object'
|
||||
mock_result.g = ''
|
||||
mock_result.d = ''
|
||||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_all.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_all = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -323,7 +327,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
|
||||
mock_tg_instance.async_get_all.assert_called_once_with('test_collection', limit=1000)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.iri == 'all_subject'
|
||||
assert result[0].p.iri == 'all_predicate'
|
||||
|
|
@ -410,7 +414,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_result.o = 'test_object'
|
||||
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_spo = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
|
|
@ -451,7 +455,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_result.o = 'test_object'
|
||||
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_spo = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -489,8 +493,8 @@ class TestCassandraQueryProcessor:
|
|||
mock_result.lang = None
|
||||
mock_result.p = 'p'
|
||||
mock_result.o = 'o'
|
||||
mock_tg_instance1.get_s.return_value = [mock_result]
|
||||
mock_tg_instance2.get_s.return_value = [mock_result]
|
||||
mock_tg_instance1.async_get_s = AsyncMock(return_value=[mock_result])
|
||||
mock_tg_instance2.async_get_s = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -504,7 +508,6 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples('user1', query1)
|
||||
assert processor.table == 'user1'
|
||||
|
||||
# Second query with different table
|
||||
query2 = TriplesQueryRequest(
|
||||
|
|
@ -516,10 +519,11 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples('user2', query2)
|
||||
assert processor.table == 'user2'
|
||||
|
||||
# Verify TrustGraph was created twice
|
||||
# Verify TrustGraph was created twice for different workspaces
|
||||
assert mock_kg_class.call_count == 2
|
||||
mock_kg_class.assert_any_call(hosts=['cassandra'], keyspace='user1')
|
||||
mock_kg_class.assert_any_call(hosts=['cassandra'], keyspace='user2')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
|
||||
|
|
@ -529,7 +533,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
mock_tg_instance.get_spo.side_effect = Exception("Query failed")
|
||||
mock_tg_instance.async_get_spo = AsyncMock(side_effect=Exception("Query failed"))
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -566,7 +570,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_result2.otype = None
|
||||
mock_result2.dtype = None
|
||||
mock_result2.lang = None
|
||||
mock_tg_instance.get_sp.return_value = [mock_result1, mock_result2]
|
||||
mock_tg_instance.async_get_sp = AsyncMock(return_value=[mock_result1, mock_result2])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -603,7 +607,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_po.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_po = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -618,8 +622,8 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
# Verify get_po was called (should use optimized po_table)
|
||||
mock_tg_instance.get_po.assert_called_once_with(
|
||||
# Verify async_get_po was called (should use optimized po_table)
|
||||
mock_tg_instance.async_get_po.assert_called_once_with(
|
||||
'test_collection', 'test_predicate', 'test_object', g=None, limit=50
|
||||
)
|
||||
|
||||
|
|
@ -643,7 +647,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
mock_result.otype = None
|
||||
mock_result.dtype = None
|
||||
mock_result.lang = None
|
||||
mock_tg_instance.get_os.return_value = [mock_result]
|
||||
mock_tg_instance.async_get_os = AsyncMock(return_value=[mock_result])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -658,8 +662,8 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
|
||||
result = await processor.query_triples('test_user', query)
|
||||
|
||||
# Verify get_os was called (should use optimized subject_table with clustering)
|
||||
mock_tg_instance.get_os.assert_called_once_with(
|
||||
# Verify async_get_os was called (should use optimized subject_table with clustering)
|
||||
mock_tg_instance.async_get_os.assert_called_once_with(
|
||||
'test_collection', 'test_object', 'test_subject', g=None, limit=25
|
||||
)
|
||||
|
||||
|
|
@ -678,28 +682,28 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
# Mock empty results for all queries
|
||||
mock_tg_instance.get_all.return_value = []
|
||||
mock_tg_instance.get_s.return_value = []
|
||||
mock_tg_instance.get_p.return_value = []
|
||||
mock_tg_instance.get_o.return_value = []
|
||||
mock_tg_instance.get_sp.return_value = []
|
||||
mock_tg_instance.get_po.return_value = []
|
||||
mock_tg_instance.get_os.return_value = []
|
||||
mock_tg_instance.get_spo.return_value = []
|
||||
mock_tg_instance.async_get_all = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_s = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_p = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_o = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_sp = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_po = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_os = AsyncMock(return_value=[])
|
||||
mock_tg_instance.async_get_spo = AsyncMock(return_value=[])
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Test each query pattern
|
||||
test_patterns = [
|
||||
# (s, p, o, expected_method)
|
||||
(None, None, None, 'get_all'), # All triples
|
||||
('s1', None, None, 'get_s'), # Subject only
|
||||
(None, 'p1', None, 'get_p'), # Predicate only
|
||||
(None, None, 'o1', 'get_o'), # Object only
|
||||
('s1', 'p1', None, 'get_sp'), # Subject + Predicate
|
||||
(None, 'p1', 'o1', 'get_po'), # Predicate + Object (CRITICAL OPTIMIZATION)
|
||||
('s1', None, 'o1', 'get_os'), # Object + Subject
|
||||
('s1', 'p1', 'o1', 'get_spo'), # All three
|
||||
(None, None, None, 'async_get_all'), # All triples
|
||||
('s1', None, None, 'async_get_s'), # Subject only
|
||||
(None, 'p1', None, 'async_get_p'), # Predicate only
|
||||
(None, None, 'o1', 'async_get_o'), # Object only
|
||||
('s1', 'p1', None, 'async_get_sp'), # Subject + Predicate
|
||||
(None, 'p1', 'o1', 'async_get_po'), # Predicate + Object (CRITICAL OPTIMIZATION)
|
||||
('s1', None, 'o1', 'async_get_os'), # Object + Subject
|
||||
('s1', 'p1', 'o1', 'async_get_spo'), # All three
|
||||
]
|
||||
|
||||
for s, p, o, expected_method in test_patterns:
|
||||
|
|
@ -759,7 +763,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
mock_result.lang = None
|
||||
mock_results.append(mock_result)
|
||||
|
||||
mock_tg_instance.get_po.return_value = mock_results
|
||||
mock_tg_instance.async_get_po = AsyncMock(return_value=mock_results)
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
|
|
@ -774,8 +778,8 @@ class TestCassandraQueryPerformanceOptimizations:
|
|||
|
||||
result = await processor.query_triples('large_dataset_user', query)
|
||||
|
||||
# Verify optimized get_po was used (no ALLOW FILTERING needed!)
|
||||
mock_tg_instance.get_po.assert_called_once_with(
|
||||
# Verify optimized async_get_po was used (no ALLOW FILTERING needed!)
|
||||
mock_tg_instance.async_get_po.assert_called_once_with(
|
||||
'massive_collection',
|
||||
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
|
||||
'http://example.com/Person',
|
||||
|
|
|
|||
|
|
@ -113,12 +113,15 @@ class TestDocEmbeddingsNullProtection:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_valid_embedding_upserted(self):
|
||||
import asyncio
|
||||
from trustgraph.storage.doc_embeddings.qdrant.write import Processor
|
||||
|
||||
proc = Processor.__new__(Processor)
|
||||
proc.qdrant = MagicMock()
|
||||
proc.qdrant.collection_exists.return_value = True
|
||||
proc.collection_exists = MagicMock(return_value=True)
|
||||
proc._cache_lock = asyncio.Lock()
|
||||
proc._known_collections = set()
|
||||
|
||||
msg = MagicMock()
|
||||
msg.metadata.collection = "col1"
|
||||
|
|
@ -134,12 +137,15 @@ class TestDocEmbeddingsNullProtection:
|
|||
@pytest.mark.asyncio
|
||||
async def test_dimension_in_collection_name(self):
|
||||
"""Collection name should include vector dimension."""
|
||||
import asyncio
|
||||
from trustgraph.storage.doc_embeddings.qdrant.write import Processor
|
||||
|
||||
proc = Processor.__new__(Processor)
|
||||
proc.qdrant = MagicMock()
|
||||
proc.qdrant.collection_exists.return_value = True
|
||||
proc.collection_exists = MagicMock(return_value=True)
|
||||
proc._cache_lock = asyncio.Lock()
|
||||
proc._known_collections = set()
|
||||
|
||||
msg = MagicMock()
|
||||
msg.metadata.collection = "docs"
|
||||
|
|
@ -220,12 +226,15 @@ class TestGraphEmbeddingsNullProtection:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_valid_entity_and_vector_upserted(self):
|
||||
import asyncio
|
||||
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
|
||||
|
||||
proc = Processor.__new__(Processor)
|
||||
proc.qdrant = MagicMock()
|
||||
proc.qdrant.collection_exists.return_value = True
|
||||
proc.collection_exists = MagicMock(return_value=True)
|
||||
proc._cache_lock = asyncio.Lock()
|
||||
proc._known_collections = set()
|
||||
|
||||
msg = MagicMock()
|
||||
msg.metadata.collection = "col1"
|
||||
|
|
@ -241,12 +250,15 @@ class TestGraphEmbeddingsNullProtection:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lazy_collection_creation_on_new_dimension(self):
|
||||
import asyncio
|
||||
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
|
||||
|
||||
proc = Processor.__new__(Processor)
|
||||
proc.qdrant = MagicMock()
|
||||
proc.qdrant.collection_exists.return_value = False
|
||||
proc.collection_exists = MagicMock(return_value=True)
|
||||
proc._cache_lock = asyncio.Lock()
|
||||
proc._known_collections = set()
|
||||
|
||||
msg = MagicMock()
|
||||
msg.metadata.collection = "graphs"
|
||||
|
|
|
|||
|
|
@ -337,6 +337,57 @@ class TestQuery:
|
|||
cache_key = "test_collection:unlabeled_entity"
|
||||
mock_cache.put.assert_called_once_with(cache_key, "unlabeled_entity")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_triples_query_never_passes_workspace(self):
|
||||
"""Workspace isolation is handled by pub/sub topic routing, not
|
||||
by passing workspace to TriplesClient.query(). Verify that
|
||||
GraphRAG never passes workspace as a keyword argument."""
|
||||
mock_rag = MagicMock()
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.get.return_value = None
|
||||
mock_rag.label_cache = mock_cache
|
||||
mock_triples_client = AsyncMock()
|
||||
mock_rag.triples_client = mock_triples_client
|
||||
|
||||
mock_triple = MagicMock()
|
||||
mock_triple.o = "Label"
|
||||
mock_triples_client.query.return_value = [mock_triple]
|
||||
|
||||
query = Query(
|
||||
rag=mock_rag,
|
||||
collection="test_collection",
|
||||
verbose=False
|
||||
)
|
||||
|
||||
await query.maybe_label("http://example.com/entity")
|
||||
|
||||
for c in mock_triples_client.query.call_args_list:
|
||||
assert "workspace" not in c.kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_follow_edges_never_passes_workspace(self):
|
||||
"""Verify follow_edges never passes workspace to query_stream."""
|
||||
mock_rag = MagicMock()
|
||||
mock_triples_client = AsyncMock()
|
||||
mock_rag.triples_client = mock_triples_client
|
||||
|
||||
mock_triple = MagicMock()
|
||||
mock_triple.s, mock_triple.p, mock_triple.o = "e1", "p1", "o1"
|
||||
mock_triples_client.query_stream.return_value = [mock_triple]
|
||||
|
||||
query = Query(
|
||||
rag=mock_rag,
|
||||
collection="test_collection",
|
||||
verbose=False,
|
||||
triple_limit=10
|
||||
)
|
||||
|
||||
subgraph = set()
|
||||
await query.follow_edges("e1", subgraph, path_length=1)
|
||||
|
||||
for c in mock_triples_client.query_stream.call_args_list:
|
||||
assert "workspace" not in c.kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_follow_edges_basic_functionality(self):
|
||||
"""Test Query.follow_edges method basic triple discovery"""
|
||||
|
|
|
|||
0
tests/unit/test_rev_gateway/__init__.py
Normal file
0
tests/unit/test_rev_gateway/__init__.py
Normal file
|
|
@ -3,275 +3,279 @@ Tests for Reverse Gateway Dispatcher
|
|||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, AsyncMock, patch
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock, AsyncMock, patch, ANY
|
||||
|
||||
from trustgraph.rev_gateway.dispatcher import WebSocketResponder, MessageDispatcher
|
||||
|
||||
|
||||
class TestWebSocketResponder:
|
||||
"""Test cases for WebSocketResponder class"""
|
||||
|
||||
def test_websocket_responder_initialization(self):
|
||||
"""Test WebSocketResponder initialization"""
|
||||
responder = WebSocketResponder()
|
||||
|
||||
assert responder.response is None
|
||||
assert responder.completed is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_websocket_responder_send_method(self):
|
||||
"""Test WebSocketResponder send method"""
|
||||
responder = WebSocketResponder()
|
||||
|
||||
test_response = {"data": "test response"}
|
||||
|
||||
# Call send method
|
||||
await responder.send(test_response)
|
||||
|
||||
# Verify response was stored
|
||||
assert responder.response == test_response
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_websocket_responder_call_method(self):
|
||||
"""Test WebSocketResponder __call__ method"""
|
||||
responder = WebSocketResponder()
|
||||
|
||||
test_response = {"result": "success"}
|
||||
test_completed = True
|
||||
|
||||
# Call the responder
|
||||
await responder(test_response, test_completed)
|
||||
|
||||
# Verify response and completed status were set
|
||||
assert responder.response == test_response
|
||||
assert responder.completed == test_completed
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_websocket_responder_call_method_with_false_completion(self):
|
||||
"""Test WebSocketResponder __call__ method with incomplete response"""
|
||||
responder = WebSocketResponder()
|
||||
|
||||
test_response = {"partial": "data"}
|
||||
test_completed = False
|
||||
|
||||
# Call the responder
|
||||
await responder(test_response, test_completed)
|
||||
|
||||
# Verify response was set and completed is True (since send() always sets completed=True)
|
||||
assert responder.response == test_response
|
||||
assert responder.completed is True
|
||||
from trustgraph.rev_gateway.dispatcher import MessageDispatcher
|
||||
|
||||
|
||||
class TestMessageDispatcher:
|
||||
"""Test cases for MessageDispatcher class"""
|
||||
|
||||
def test_message_dispatcher_initialization_with_defaults(self):
|
||||
"""Test MessageDispatcher initialization with default parameters"""
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
|
||||
assert dispatcher.max_workers == 10
|
||||
assert dispatcher.semaphore._value == 10
|
||||
assert dispatcher.active_tasks == set()
|
||||
assert dispatcher.backend is None
|
||||
assert dispatcher.auth is None
|
||||
assert dispatcher.dispatcher_manager is None
|
||||
assert len(dispatcher.service_mapping) > 0
|
||||
|
||||
def test_message_dispatcher_initialization_with_custom_workers(self):
|
||||
"""Test MessageDispatcher initialization with custom max_workers"""
|
||||
dispatcher = MessageDispatcher(max_workers=5)
|
||||
|
||||
|
||||
assert dispatcher.max_workers == 5
|
||||
assert dispatcher.semaphore._value == 5
|
||||
|
||||
@patch('trustgraph.rev_gateway.dispatcher.DispatcherManager')
|
||||
def test_message_dispatcher_initialization_with_pulsar_client(self, mock_dispatcher_manager):
|
||||
"""Test MessageDispatcher initialization with pulsar_client and config_receiver"""
|
||||
def test_message_dispatcher_initialization_with_backend(
|
||||
self, mock_dispatcher_manager,
|
||||
):
|
||||
mock_backend = MagicMock()
|
||||
mock_config_receiver = MagicMock()
|
||||
mock_auth = MagicMock()
|
||||
mock_dispatcher_instance = MagicMock()
|
||||
mock_dispatcher_manager.return_value = mock_dispatcher_instance
|
||||
|
||||
|
||||
dispatcher = MessageDispatcher(
|
||||
max_workers=8,
|
||||
config_receiver=mock_config_receiver,
|
||||
backend=mock_backend
|
||||
backend=mock_backend,
|
||||
auth=mock_auth,
|
||||
timeout=300,
|
||||
)
|
||||
|
||||
|
||||
assert dispatcher.max_workers == 8
|
||||
assert dispatcher.backend == mock_backend
|
||||
assert dispatcher.auth == mock_auth
|
||||
assert dispatcher.dispatcher_manager == mock_dispatcher_instance
|
||||
mock_dispatcher_manager.assert_called_once_with(
|
||||
mock_backend, mock_config_receiver, prefix="rev-gateway"
|
||||
mock_backend, mock_config_receiver,
|
||||
auth=mock_auth, prefix="rev-gateway", timeout=300,
|
||||
)
|
||||
|
||||
def test_message_dispatcher_service_mapping(self):
|
||||
"""Test MessageDispatcher service mapping contains expected services"""
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
|
||||
expected_services = [
|
||||
"text-completion", "graph-rag", "agent", "embeddings",
|
||||
"graph-embeddings", "triples", "document-load", "text-load",
|
||||
"flow", "knowledge", "config", "librarian", "document-rag"
|
||||
"flow", "knowledge", "config", "librarian", "document-rag",
|
||||
]
|
||||
|
||||
|
||||
for service in expected_services:
|
||||
assert service in dispatcher.service_mapping
|
||||
|
||||
# Test specific mappings
|
||||
assert dispatcher.service_mapping["text-completion"] == "text-completion"
|
||||
|
||||
assert dispatcher.service_mapping["document-load"] == "document"
|
||||
assert dispatcher.service_mapping["text-load"] == "text-document"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_handle_message_without_dispatcher_manager(self):
|
||||
"""Test MessageDispatcher handle_message without dispatcher manager"""
|
||||
async def test_handle_message_without_dispatcher_manager(self):
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
test_message = {
|
||||
"id": "test-123",
|
||||
"service": "test-service",
|
||||
"request": {"data": "test"}
|
||||
}
|
||||
|
||||
result = await dispatcher.handle_message(test_message)
|
||||
|
||||
assert result["id"] == "test-123"
|
||||
assert "error" in result["response"]
|
||||
assert "DispatcherManager not available" in result["response"]["error"]
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
return_value=MagicMock(workspace="default")
|
||||
)
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
await dispatcher.handle_message(
|
||||
{"id": "test-1", "service": "test", "request": {}},
|
||||
sender,
|
||||
)
|
||||
|
||||
sender.assert_called_once()
|
||||
sent = sender.call_args[0][0]
|
||||
assert sent["id"] == "test-1"
|
||||
assert sent["error"]["message"] == "DispatcherManager not available"
|
||||
assert sent["error"]["type"] == "error"
|
||||
assert sent["complete"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_handle_message_with_exception(self):
|
||||
"""Test MessageDispatcher handle_message with exception during processing"""
|
||||
mock_dispatcher_manager = MagicMock()
|
||||
mock_dispatcher_manager.invoke_global_service = AsyncMock(side_effect=Exception("Test error"))
|
||||
|
||||
async def test_handle_message_auth_failure(self):
|
||||
dispatcher = MessageDispatcher()
|
||||
dispatcher.dispatcher_manager = mock_dispatcher_manager
|
||||
|
||||
test_message = {
|
||||
"id": "test-456",
|
||||
"service": "text-completion",
|
||||
"request": {"prompt": "test"}
|
||||
}
|
||||
|
||||
with patch('trustgraph.gateway.dispatch.manager.global_dispatchers', {"text-completion": True}):
|
||||
result = await dispatcher.handle_message(test_message)
|
||||
|
||||
assert result["id"] == "test-456"
|
||||
assert "error" in result["response"]
|
||||
assert "Test error" in result["response"]["error"]
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
side_effect=Exception("auth failure")
|
||||
)
|
||||
dispatcher.dispatcher_manager = MagicMock()
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
await dispatcher.handle_message(
|
||||
{"id": "test-2", "token": "bad", "service": "test", "request": {}},
|
||||
sender,
|
||||
)
|
||||
|
||||
sender.assert_called_once()
|
||||
sent = sender.call_args[0][0]
|
||||
assert sent["id"] == "test-2"
|
||||
assert "auth failure" in sent["error"]["message"]
|
||||
assert sent["complete"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_handle_message_global_service(self):
|
||||
"""Test MessageDispatcher handle_message with global service"""
|
||||
mock_dispatcher_manager = MagicMock()
|
||||
mock_dispatcher_manager.invoke_global_service = AsyncMock()
|
||||
mock_responder = MagicMock()
|
||||
mock_responder.completed = True
|
||||
mock_responder.response = {"result": "success"}
|
||||
|
||||
async def test_handle_message_global_service(self):
|
||||
mock_dm = MagicMock()
|
||||
mock_dm.invoke_global_service = AsyncMock()
|
||||
|
||||
dispatcher = MessageDispatcher()
|
||||
dispatcher.dispatcher_manager = mock_dispatcher_manager
|
||||
|
||||
test_message = {
|
||||
"id": "test-789",
|
||||
"service": "text-completion",
|
||||
"request": {"prompt": "hello"}
|
||||
}
|
||||
|
||||
with patch('trustgraph.gateway.dispatch.manager.global_dispatchers', {"text-completion": True}):
|
||||
with patch('trustgraph.rev_gateway.dispatcher.WebSocketResponder', return_value=mock_responder):
|
||||
result = await dispatcher.handle_message(test_message)
|
||||
|
||||
assert result["id"] == "test-789"
|
||||
assert result["response"] == {"result": "success"}
|
||||
mock_dispatcher_manager.invoke_global_service.assert_called_once()
|
||||
dispatcher.dispatcher_manager = mock_dm
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
return_value=MagicMock(workspace="ws1")
|
||||
)
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
with patch(
|
||||
'trustgraph.gateway.dispatch.manager.global_dispatchers',
|
||||
{"text-completion": True},
|
||||
):
|
||||
await dispatcher.handle_message(
|
||||
{
|
||||
"id": "test-3",
|
||||
"token": "tg_key",
|
||||
"service": "text-completion",
|
||||
"request": {"prompt": "hello"},
|
||||
},
|
||||
sender,
|
||||
)
|
||||
|
||||
mock_dm.invoke_global_service.assert_called_once()
|
||||
args, kwargs = mock_dm.invoke_global_service.call_args
|
||||
assert args[0] == {"prompt": "hello"}
|
||||
assert args[2] == "text-completion"
|
||||
assert kwargs["workspace"] == "ws1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_handle_message_flow_service(self):
|
||||
"""Test MessageDispatcher handle_message with flow service"""
|
||||
mock_dispatcher_manager = MagicMock()
|
||||
mock_dispatcher_manager.invoke_flow_service = AsyncMock()
|
||||
mock_responder = MagicMock()
|
||||
mock_responder.completed = True
|
||||
mock_responder.response = {"data": "flow_result"}
|
||||
|
||||
async def test_handle_message_flow_service(self):
|
||||
mock_dm = MagicMock()
|
||||
mock_dm.invoke_flow_service = AsyncMock()
|
||||
|
||||
dispatcher = MessageDispatcher()
|
||||
dispatcher.dispatcher_manager = mock_dispatcher_manager
|
||||
|
||||
test_message = {
|
||||
"id": "test-flow-123",
|
||||
"service": "document-rag",
|
||||
"request": {"query": "test"},
|
||||
"flow": "custom-flow"
|
||||
}
|
||||
|
||||
with patch('trustgraph.gateway.dispatch.manager.global_dispatchers', {}):
|
||||
with patch('trustgraph.rev_gateway.dispatcher.WebSocketResponder', return_value=mock_responder):
|
||||
result = await dispatcher.handle_message(test_message)
|
||||
|
||||
assert result["id"] == "test-flow-123"
|
||||
assert result["response"] == {"data": "flow_result"}
|
||||
mock_dispatcher_manager.invoke_flow_service.assert_called_once_with(
|
||||
{"query": "test"}, mock_responder, "custom-flow", "document-rag"
|
||||
dispatcher.dispatcher_manager = mock_dm
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
return_value=MagicMock(workspace="ws2")
|
||||
)
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
with patch(
|
||||
'trustgraph.gateway.dispatch.manager.global_dispatchers', {},
|
||||
):
|
||||
await dispatcher.handle_message(
|
||||
{
|
||||
"id": "test-4",
|
||||
"token": "tg_key",
|
||||
"service": "document-rag",
|
||||
"request": {"query": "test"},
|
||||
"flow": "my-flow",
|
||||
},
|
||||
sender,
|
||||
)
|
||||
|
||||
mock_dm.invoke_flow_service.assert_called_once_with(
|
||||
{"query": "test"}, ANY, "ws2", "my-flow", "document-rag",
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_handle_message_incomplete_response(self):
|
||||
"""Test MessageDispatcher handle_message with incomplete response"""
|
||||
mock_dispatcher_manager = MagicMock()
|
||||
mock_dispatcher_manager.invoke_flow_service = AsyncMock()
|
||||
mock_responder = MagicMock()
|
||||
mock_responder.completed = False
|
||||
mock_responder.response = None
|
||||
|
||||
async def test_handle_message_responder_sends_frames(self):
|
||||
mock_dm = MagicMock()
|
||||
|
||||
async def fake_invoke(data, responder, svc, workspace=None):
|
||||
await responder({"partial": 1}, False)
|
||||
await responder({"partial": 2}, True)
|
||||
|
||||
mock_dm.invoke_global_service = AsyncMock(side_effect=fake_invoke)
|
||||
|
||||
dispatcher = MessageDispatcher()
|
||||
dispatcher.dispatcher_manager = mock_dispatcher_manager
|
||||
|
||||
test_message = {
|
||||
"id": "test-incomplete",
|
||||
"service": "agent",
|
||||
"request": {"input": "test"}
|
||||
dispatcher.dispatcher_manager = mock_dm
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
return_value=MagicMock(workspace="ws1")
|
||||
)
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
with patch(
|
||||
'trustgraph.gateway.dispatch.manager.global_dispatchers',
|
||||
{"text-completion": True},
|
||||
):
|
||||
await dispatcher.handle_message(
|
||||
{
|
||||
"id": "test-5",
|
||||
"token": "tg_key",
|
||||
"service": "text-completion",
|
||||
"request": {"prompt": "hi"},
|
||||
},
|
||||
sender,
|
||||
)
|
||||
|
||||
assert sender.call_count == 2
|
||||
first = sender.call_args_list[0][0][0]
|
||||
second = sender.call_args_list[1][0][0]
|
||||
|
||||
assert first == {
|
||||
"id": "test-5", "response": {"partial": 1}, "complete": False,
|
||||
}
|
||||
assert second == {
|
||||
"id": "test-5", "response": {"partial": 2}, "complete": True,
|
||||
}
|
||||
|
||||
with patch('trustgraph.gateway.dispatch.manager.global_dispatchers', {}):
|
||||
with patch('trustgraph.rev_gateway.dispatcher.WebSocketResponder', return_value=mock_responder):
|
||||
result = await dispatcher.handle_message(test_message)
|
||||
|
||||
assert result["id"] == "test-incomplete"
|
||||
assert result["response"] == {"error": "No response received"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_shutdown(self):
|
||||
"""Test MessageDispatcher shutdown method"""
|
||||
import asyncio
|
||||
|
||||
async def test_handle_message_workspace_from_identity(self):
|
||||
mock_dm = MagicMock()
|
||||
mock_dm.invoke_flow_service = AsyncMock()
|
||||
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
# Create actual async tasks
|
||||
dispatcher.dispatcher_manager = mock_dm
|
||||
dispatcher.auth = MagicMock()
|
||||
dispatcher.auth.authenticate = AsyncMock(
|
||||
return_value=MagicMock(workspace="derived-ws")
|
||||
)
|
||||
|
||||
sender = AsyncMock()
|
||||
|
||||
with patch(
|
||||
'trustgraph.gateway.dispatch.manager.global_dispatchers', {},
|
||||
):
|
||||
await dispatcher.handle_message(
|
||||
{
|
||||
"id": "test-6",
|
||||
"token": "tg_key",
|
||||
"service": "agent",
|
||||
"request": {"question": "test"},
|
||||
"flow": "default",
|
||||
},
|
||||
sender,
|
||||
)
|
||||
|
||||
args = mock_dm.invoke_flow_service.call_args[0]
|
||||
assert args[2] == "derived-ws"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shutdown(self):
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
async def dummy_task():
|
||||
await asyncio.sleep(0.01)
|
||||
return "done"
|
||||
|
||||
|
||||
task1 = asyncio.create_task(dummy_task())
|
||||
task2 = asyncio.create_task(dummy_task())
|
||||
dispatcher.active_tasks = {task1, task2}
|
||||
|
||||
# Call shutdown
|
||||
|
||||
await dispatcher.shutdown()
|
||||
|
||||
# Verify tasks were completed
|
||||
|
||||
assert task1.done()
|
||||
assert task2.done()
|
||||
assert len(dispatcher.active_tasks) == 2 # Tasks remain in set but are completed
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_dispatcher_shutdown_with_no_tasks(self):
|
||||
"""Test MessageDispatcher shutdown with no active tasks"""
|
||||
async def test_shutdown_with_no_tasks(self):
|
||||
dispatcher = MessageDispatcher()
|
||||
|
||||
# Call shutdown with no active tasks
|
||||
|
||||
await dispatcher.shutdown()
|
||||
|
||||
# Should complete without error
|
||||
assert dispatcher.active_tasks == set()
|
||||
|
||||
assert dispatcher.active_tasks == set()
|
||||
|
|
|
|||
|
|
@ -8,22 +8,38 @@ from unittest.mock import MagicMock, AsyncMock, patch, Mock
|
|||
from aiohttp import WSMsgType, ClientWebSocketResponse
|
||||
import json
|
||||
|
||||
from trustgraph.rev_gateway.service import ReverseGateway, parse_args, run
|
||||
from trustgraph.rev_gateway.service import ReverseGateway, run
|
||||
|
||||
|
||||
MOCK_PATCHES = [
|
||||
'trustgraph.rev_gateway.service.IamAuth',
|
||||
'trustgraph.rev_gateway.service.ConfigReceiver',
|
||||
'trustgraph.rev_gateway.service.MessageDispatcher',
|
||||
'trustgraph.rev_gateway.service.get_pubsub',
|
||||
]
|
||||
|
||||
|
||||
def make_gateway(**overrides):
|
||||
config = {"websocket_uri": "ws://localhost:7650/out"}
|
||||
config.update(overrides)
|
||||
return ReverseGateway(**config)
|
||||
|
||||
|
||||
class TestReverseGateway:
|
||||
"""Test cases for ReverseGateway class"""
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_initialization_defaults(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway initialization with default parameters"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_initialization_defaults(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
assert gateway.websocket_uri == "ws://localhost:7650/out"
|
||||
assert gateway.host == "localhost"
|
||||
assert gateway.port == 7650
|
||||
|
|
@ -33,25 +49,22 @@ class TestReverseGateway:
|
|||
assert gateway.max_workers == 10
|
||||
assert gateway.running is False
|
||||
assert gateway.reconnect_delay == 3.0
|
||||
assert gateway.pulsar_host == "pulsar://pulsar:6650"
|
||||
assert gateway.pulsar_api_key is None
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_initialization_custom_params(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway initialization with custom parameters"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway(
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_initialization_custom_params(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway(
|
||||
websocket_uri="wss://example.com:8080/websocket",
|
||||
max_workers=20,
|
||||
pulsar_host="pulsar://custom:6650",
|
||||
pulsar_api_key="test-key",
|
||||
pulsar_listener="test-listener"
|
||||
)
|
||||
|
||||
|
||||
assert gateway.websocket_uri == "wss://example.com:8080/websocket"
|
||||
assert gateway.host == "example.com"
|
||||
assert gateway.port == 8080
|
||||
|
|
@ -59,340 +72,360 @@ class TestReverseGateway:
|
|||
assert gateway.path == "/websocket"
|
||||
assert gateway.url == "wss://example.com:8080/websocket"
|
||||
assert gateway.max_workers == 20
|
||||
assert gateway.pulsar_host == "pulsar://custom:6650"
|
||||
assert gateway.pulsar_api_key == "test-key"
|
||||
assert gateway.pulsar_listener == "test-listener"
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_initialization_with_missing_path(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway initialization with WebSocket URI missing path"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway(websocket_uri="ws://example.com")
|
||||
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_initialization_with_missing_path(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway(websocket_uri="ws://example.com")
|
||||
|
||||
assert gateway.path == "/ws"
|
||||
assert gateway.url == "ws://example.com/ws"
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_initialization_invalid_scheme(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway initialization with invalid WebSocket scheme"""
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_initialization_invalid_scheme(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
with pytest.raises(ValueError, match="WebSocket URI must use ws:// or wss:// scheme"):
|
||||
ReverseGateway(websocket_uri="http://example.com")
|
||||
make_gateway(websocket_uri="http://example.com")
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_initialization_missing_hostname(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway initialization with missing hostname"""
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_initialization_missing_hostname(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
with pytest.raises(ValueError, match="WebSocket URI must include hostname"):
|
||||
ReverseGateway(websocket_uri="ws://")
|
||||
make_gateway(websocket_uri="ws://")
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_pulsar_client_with_auth(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway creates backend with authentication"""
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_iam_auth_created(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway(
|
||||
pulsar_api_key="test-key",
|
||||
pulsar_listener="test-listener"
|
||||
gateway = make_gateway(id="test-rev-gw")
|
||||
|
||||
mock_iam_auth.assert_called_once_with(
|
||||
backend=mock_backend,
|
||||
id="test-rev-gw",
|
||||
)
|
||||
|
||||
# Verify get_pubsub was called with the correct parameters
|
||||
mock_get_pubsub.assert_called_once_with(
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
pulsar_api_key="test-key",
|
||||
pulsar_listener="test-listener"
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_config_receiver_gets_auth(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
mock_auth_instance = MagicMock()
|
||||
mock_iam_auth.return_value = mock_auth_instance
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
mock_config_receiver.assert_called_once_with(
|
||||
mock_backend, auth=mock_auth_instance,
|
||||
)
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@patch('trustgraph.rev_gateway.service.ClientSession')
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_connect_success(self, mock_session_class, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway successful connection"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
async def test_reverse_gateway_connect_success(
|
||||
self, mock_session_class, mock_get_pubsub,
|
||||
mock_dispatcher, mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_ws = AsyncMock()
|
||||
mock_session.ws_connect.return_value = mock_ws
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
result = await gateway.connect()
|
||||
|
||||
|
||||
assert result is True
|
||||
assert gateway.session == mock_session
|
||||
assert gateway.ws == mock_ws
|
||||
mock_session.ws_connect.assert_called_once_with(gateway.url)
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@patch('trustgraph.rev_gateway.service.ClientSession')
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_connect_failure(self, mock_session_class, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway connection failure"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
async def test_reverse_gateway_connect_failure(
|
||||
self, mock_session_class, mock_get_pubsub,
|
||||
mock_dispatcher, mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_session.ws_connect.side_effect = Exception("Connection failed")
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
result = await gateway.connect()
|
||||
|
||||
|
||||
assert result is False
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_disconnect(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway disconnect"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock websocket and session
|
||||
async def test_reverse_gateway_disconnect(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = False
|
||||
mock_session = AsyncMock()
|
||||
mock_session.closed = False
|
||||
|
||||
|
||||
gateway.ws = mock_ws
|
||||
gateway.session = mock_session
|
||||
|
||||
|
||||
await gateway.disconnect()
|
||||
|
||||
|
||||
mock_ws.close.assert_called_once()
|
||||
mock_session.close.assert_called_once()
|
||||
assert gateway.ws is None
|
||||
assert gateway.session is None
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_send_message(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway send message"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock websocket
|
||||
async def test_reverse_gateway_send_message(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = False
|
||||
gateway.ws = mock_ws
|
||||
|
||||
|
||||
test_message = {"id": "test", "data": "hello"}
|
||||
|
||||
|
||||
await gateway.send_message(test_message)
|
||||
|
||||
|
||||
mock_ws.send_str.assert_called_once_with(json.dumps(test_message))
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_send_message_closed_connection(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway send message with closed connection"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock closed websocket
|
||||
async def test_reverse_gateway_send_message_closed_connection(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = True
|
||||
gateway.ws = mock_ws
|
||||
|
||||
|
||||
test_message = {"id": "test", "data": "hello"}
|
||||
|
||||
|
||||
await gateway.send_message(test_message)
|
||||
|
||||
# Should not call send_str on closed connection
|
||||
|
||||
mock_ws.send_str.assert_not_called()
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_handle_message(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway handle message"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
mock_dispatcher_instance = AsyncMock()
|
||||
mock_dispatcher_instance.handle_message.return_value = {"response": "success"}
|
||||
mock_dispatcher.return_value = mock_dispatcher_instance
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock send_message
|
||||
gateway.send_message = AsyncMock()
|
||||
|
||||
test_message = '{"id": "test", "service": "test-service", "request": {"data": "test"}}'
|
||||
|
||||
await gateway.handle_message(test_message)
|
||||
|
||||
mock_dispatcher_instance.handle_message.assert_called_once_with({
|
||||
"id": "test",
|
||||
"service": "test-service",
|
||||
"request": {"data": "test"}
|
||||
})
|
||||
gateway.send_message.assert_called_once_with({"response": "success"})
|
||||
async def test_reverse_gateway_handle_message(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
mock_dispatcher_instance = AsyncMock()
|
||||
mock_dispatcher.return_value = mock_dispatcher_instance
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_handle_message_invalid_json(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway handle message with invalid JSON"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock send_message
|
||||
gateway.send_message = AsyncMock()
|
||||
|
||||
test_message = 'invalid json'
|
||||
|
||||
# Should not raise exception
|
||||
|
||||
test_message = '{"id": "test", "service": "test-service", "request": {"data": "test"}}'
|
||||
|
||||
await gateway.handle_message(test_message)
|
||||
|
||||
# Should not call send_message due to error
|
||||
|
||||
mock_dispatcher_instance.handle_message.assert_called_once_with(
|
||||
{
|
||||
"id": "test",
|
||||
"service": "test-service",
|
||||
"request": {"data": "test"},
|
||||
},
|
||||
gateway.send_message,
|
||||
)
|
||||
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_handle_message_invalid_json(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
gateway.send_message = AsyncMock()
|
||||
|
||||
await gateway.handle_message('invalid json')
|
||||
|
||||
gateway.send_message.assert_not_called()
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_listen_text_message(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway listen with text message"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
async def test_reverse_gateway_listen_text_message(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
gateway.running = True
|
||||
|
||||
# Mock websocket
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = False
|
||||
gateway.ws = mock_ws
|
||||
|
||||
# Mock handle_message
|
||||
|
||||
gateway.handle_message = AsyncMock()
|
||||
|
||||
# Mock message
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.type = WSMsgType.TEXT
|
||||
mock_msg.data = '{"test": "message"}'
|
||||
|
||||
# Mock receive to return message once, then raise exception to stop loop
|
||||
|
||||
mock_ws.receive.side_effect = [mock_msg, Exception("Test stop")]
|
||||
|
||||
# listen() catches exceptions and breaks, so no exception should be raised
|
||||
|
||||
await gateway.listen()
|
||||
|
||||
|
||||
gateway.handle_message.assert_called_once_with('{"test": "message"}')
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_listen_binary_message(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway listen with binary message"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
async def test_reverse_gateway_listen_binary_message(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
gateway.running = True
|
||||
|
||||
# Mock websocket
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = False
|
||||
gateway.ws = mock_ws
|
||||
|
||||
# Mock handle_message
|
||||
|
||||
gateway.handle_message = AsyncMock()
|
||||
|
||||
# Mock message
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.type = WSMsgType.BINARY
|
||||
mock_msg.data = b'{"test": "binary"}'
|
||||
|
||||
# Mock receive to return message once, then raise exception to stop loop
|
||||
|
||||
mock_ws.receive.side_effect = [mock_msg, Exception("Test stop")]
|
||||
|
||||
# listen() catches exceptions and breaks, so no exception should be raised
|
||||
|
||||
await gateway.listen()
|
||||
|
||||
|
||||
gateway.handle_message.assert_called_once_with('{"test": "binary"}')
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_listen_close_message(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway listen with close message"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
async def test_reverse_gateway_listen_close_message(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
gateway.running = True
|
||||
|
||||
# Mock websocket
|
||||
|
||||
mock_ws = AsyncMock()
|
||||
mock_ws.closed = False
|
||||
gateway.ws = mock_ws
|
||||
|
||||
# Mock handle_message
|
||||
|
||||
gateway.handle_message = AsyncMock()
|
||||
|
||||
# Mock message
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.type = WSMsgType.CLOSE
|
||||
|
||||
# Mock receive to return close message
|
||||
|
||||
mock_ws.receive.return_value = mock_msg
|
||||
|
||||
|
||||
await gateway.listen()
|
||||
|
||||
# Should not call handle_message for close message
|
||||
|
||||
gateway.handle_message.assert_not_called()
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_shutdown(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway shutdown"""
|
||||
async def test_reverse_gateway_shutdown(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
mock_dispatcher_instance = AsyncMock()
|
||||
mock_dispatcher.return_value = mock_dispatcher_instance
|
||||
|
||||
gateway = ReverseGateway()
|
||||
gateway = make_gateway()
|
||||
gateway.running = True
|
||||
|
||||
# Mock disconnect
|
||||
gateway.disconnect = AsyncMock()
|
||||
|
||||
await gateway.shutdown()
|
||||
|
|
@ -402,46 +435,50 @@ class TestReverseGateway:
|
|||
gateway.disconnect.assert_called_once()
|
||||
mock_backend.close.assert_called_once()
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
def test_reverse_gateway_stop(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway stop"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
gateway = ReverseGateway()
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
def test_reverse_gateway_stop(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
gateway = make_gateway()
|
||||
gateway.running = True
|
||||
|
||||
|
||||
gateway.stop()
|
||||
|
||||
|
||||
assert gateway.running is False
|
||||
|
||||
|
||||
class TestReverseGatewayRun:
|
||||
"""Test cases for ReverseGateway run method"""
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ConfigReceiver')
|
||||
@patch('trustgraph.rev_gateway.service.MessageDispatcher')
|
||||
@patch('trustgraph.rev_gateway.service.get_pubsub')
|
||||
@patch(*MOCK_PATCHES[0:1])
|
||||
@patch(*MOCK_PATCHES[1:2])
|
||||
@patch(*MOCK_PATCHES[2:3])
|
||||
@patch(*MOCK_PATCHES[3:4])
|
||||
@pytest.mark.asyncio
|
||||
async def test_reverse_gateway_run_successful_cycle(self, mock_get_pubsub, mock_dispatcher, mock_config_receiver):
|
||||
"""Test ReverseGateway run method with successful connect/listen cycle"""
|
||||
mock_backend = MagicMock()
|
||||
mock_get_pubsub.return_value = mock_backend
|
||||
|
||||
async def test_reverse_gateway_run_successful_cycle(
|
||||
self, mock_get_pubsub, mock_dispatcher,
|
||||
mock_config_receiver, mock_iam_auth,
|
||||
):
|
||||
mock_get_pubsub.return_value = MagicMock()
|
||||
|
||||
mock_auth_instance = AsyncMock()
|
||||
mock_iam_auth.return_value = mock_auth_instance
|
||||
|
||||
mock_config_receiver_instance = AsyncMock()
|
||||
mock_config_receiver.return_value = mock_config_receiver_instance
|
||||
|
||||
gateway = ReverseGateway()
|
||||
|
||||
# Mock methods
|
||||
gateway.connect = AsyncMock(return_value=True)
|
||||
|
||||
gateway = make_gateway()
|
||||
|
||||
gateway.listen = AsyncMock()
|
||||
gateway.disconnect = AsyncMock()
|
||||
gateway.shutdown = AsyncMock()
|
||||
|
||||
# Stop after one iteration
|
||||
|
||||
call_count = 0
|
||||
async def mock_connect():
|
||||
nonlocal call_count
|
||||
|
|
@ -451,91 +488,13 @@ class TestReverseGatewayRun:
|
|||
else:
|
||||
gateway.running = False
|
||||
return False
|
||||
|
||||
|
||||
gateway.connect = mock_connect
|
||||
|
||||
|
||||
await gateway.run()
|
||||
|
||||
|
||||
mock_auth_instance.start.assert_called_once()
|
||||
mock_config_receiver_instance.start.assert_called_once()
|
||||
gateway.listen.assert_called_once()
|
||||
# disconnect is called twice: once in the main loop, once in shutdown
|
||||
assert gateway.disconnect.call_count == 2
|
||||
gateway.shutdown.assert_called_once()
|
||||
|
||||
|
||||
class TestReverseGatewayArgs:
|
||||
"""Test cases for argument parsing and run function"""
|
||||
|
||||
def test_parse_args_defaults(self):
|
||||
"""Test parse_args with default values"""
|
||||
import sys
|
||||
|
||||
# Mock sys.argv
|
||||
original_argv = sys.argv
|
||||
sys.argv = ['reverse-gateway']
|
||||
|
||||
try:
|
||||
args = parse_args()
|
||||
|
||||
assert args.websocket_uri is None
|
||||
assert args.max_workers == 10
|
||||
assert args.pulsar_host is None
|
||||
assert args.pulsar_api_key is None
|
||||
assert args.pulsar_listener is None
|
||||
finally:
|
||||
sys.argv = original_argv
|
||||
|
||||
def test_parse_args_custom_values(self):
|
||||
"""Test parse_args with custom values"""
|
||||
import sys
|
||||
|
||||
# Mock sys.argv
|
||||
original_argv = sys.argv
|
||||
sys.argv = [
|
||||
'reverse-gateway',
|
||||
'--websocket-uri', 'ws://custom:8080/ws',
|
||||
'--max-workers', '20',
|
||||
'--pulsar-host', 'pulsar://custom:6650',
|
||||
'--pulsar-api-key', 'test-key',
|
||||
'--pulsar-listener', 'test-listener'
|
||||
]
|
||||
|
||||
try:
|
||||
args = parse_args()
|
||||
|
||||
assert args.websocket_uri == 'ws://custom:8080/ws'
|
||||
assert args.max_workers == 20
|
||||
assert args.pulsar_host == 'pulsar://custom:6650'
|
||||
assert args.pulsar_api_key == 'test-key'
|
||||
assert args.pulsar_listener == 'test-listener'
|
||||
finally:
|
||||
sys.argv = original_argv
|
||||
|
||||
@patch('trustgraph.rev_gateway.service.ReverseGateway')
|
||||
@patch('asyncio.run')
|
||||
def test_run_function(self, mock_asyncio_run, mock_gateway_class):
|
||||
"""Test run function"""
|
||||
import sys
|
||||
|
||||
# Mock sys.argv
|
||||
original_argv = sys.argv
|
||||
sys.argv = ['reverse-gateway', '--max-workers', '15']
|
||||
|
||||
try:
|
||||
mock_gateway_instance = MagicMock()
|
||||
mock_gateway_instance.url = "ws://localhost:7650/out"
|
||||
mock_gateway_instance.pulsar_host = "pulsar://pulsar:6650"
|
||||
mock_gateway_class.return_value = mock_gateway_instance
|
||||
|
||||
run()
|
||||
|
||||
mock_gateway_class.assert_called_once_with(
|
||||
websocket_uri=None,
|
||||
max_workers=15,
|
||||
pulsar_host=None,
|
||||
pulsar_api_key=None,
|
||||
pulsar_listener=None
|
||||
)
|
||||
mock_asyncio_run.assert_called_once_with(mock_gateway_instance.run())
|
||||
finally:
|
||||
sys.argv = original_argv
|
||||
|
|
@ -413,8 +413,8 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
# Assert
|
||||
expected_collection = 'd_cache_user_cache_collection_3' # 3 dimensions
|
||||
|
||||
# Verify collection existence is checked on each write
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
# Second write uses cached collection state — no collection_exists check
|
||||
mock_qdrant_instance.collection_exists.assert_not_called()
|
||||
|
||||
# But upsert should still be called
|
||||
mock_qdrant_instance.upsert.assert_called_once()
|
||||
|
|
|
|||
|
|
@ -125,13 +125,13 @@ class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
processor = Processor(**config)
|
||||
|
||||
processor.ensure_collection("test_collection", 384)
|
||||
await processor.ensure_collection("test_collection", 384)
|
||||
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with("test_collection")
|
||||
mock_qdrant_instance.create_collection.assert_called_once()
|
||||
|
||||
# Verify the collection is cached
|
||||
assert "test_collection" in processor.created_collections
|
||||
assert "test_collection" in processor._known_collections
|
||||
|
||||
@patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
|
||||
async def test_ensure_collection_skips_existing(self, mock_qdrant_client):
|
||||
|
|
@ -149,7 +149,7 @@ class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
processor = Processor(**config)
|
||||
|
||||
processor.ensure_collection("existing_collection", 384)
|
||||
await processor.ensure_collection("existing_collection", 384)
|
||||
|
||||
mock_qdrant_instance.collection_exists.assert_called_once()
|
||||
mock_qdrant_instance.create_collection.assert_not_called()
|
||||
|
|
@ -168,9 +168,9 @@ class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
}
|
||||
|
||||
processor = Processor(**config)
|
||||
processor.created_collections.add("cached_collection")
|
||||
processor._known_collections.add("cached_collection")
|
||||
|
||||
processor.ensure_collection("cached_collection", 384)
|
||||
await processor.ensure_collection("cached_collection", 384)
|
||||
|
||||
# Should not check or create - just return
|
||||
mock_qdrant_instance.collection_exists.assert_not_called()
|
||||
|
|
@ -391,7 +391,7 @@ class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
}
|
||||
|
||||
processor = Processor(**config)
|
||||
processor.created_collections.add('rows_test_workspace_test_collection_schema1_384')
|
||||
processor._known_collections.add('rows_test_workspace_test_collection_schema1_384')
|
||||
|
||||
await processor.delete_collection('test_workspace', 'test_collection')
|
||||
|
||||
|
|
@ -399,7 +399,7 @@ class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
assert mock_qdrant_instance.delete_collection.call_count == 2
|
||||
|
||||
# Verify the cached collection was removed
|
||||
assert 'rows_test_workspace_test_collection_schema1_384' not in processor.created_collections
|
||||
assert 'rows_test_workspace_test_collection_schema1_384' not in processor._known_collections
|
||||
|
||||
@patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
|
||||
async def test_delete_collection_schema(self, mock_qdrant_client):
|
||||
|
|
|
|||
|
|
@ -121,10 +121,13 @@ class TestRowsCassandraStorageLogic:
|
|||
@pytest.mark.asyncio
|
||||
async def test_schema_config_parsing(self):
|
||||
"""Test parsing of schema configurations"""
|
||||
import asyncio
|
||||
processor = MagicMock()
|
||||
processor.schemas = {}
|
||||
processor.config_key = "schema"
|
||||
processor.registered_partitions = set()
|
||||
processor._setup_lock = asyncio.Lock()
|
||||
processor._apply_schema_config = Processor._apply_schema_config.__get__(processor, Processor)
|
||||
processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
|
||||
|
||||
# Create test configuration
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
Tests for Cassandra triples storage service
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
|
||||
|
|
@ -24,12 +26,13 @@ class TestCassandraStorageProcessor:
|
|||
assert processor.cassandra_host == ['cassandra'] # Updated default
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
assert processor.table is None
|
||||
assert processor._connections == {}
|
||||
assert isinstance(processor._conn_lock, asyncio.Lock)
|
||||
|
||||
def test_processor_initialization_with_custom_params(self):
|
||||
"""Test processor initialization with custom parameters (new cassandra_* names)"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
id='custom-storage',
|
||||
|
|
@ -37,11 +40,12 @@ class TestCassandraStorageProcessor:
|
|||
cassandra_username='testuser',
|
||||
cassandra_password='testpass'
|
||||
)
|
||||
|
||||
|
||||
assert processor.cassandra_host == ['cassandra.example.com']
|
||||
assert processor.cassandra_username == 'testuser'
|
||||
assert processor.cassandra_password == 'testpass'
|
||||
assert processor.table is None
|
||||
assert processor._connections == {}
|
||||
assert isinstance(processor._conn_lock, asyncio.Lock)
|
||||
|
||||
def test_processor_initialization_with_partial_auth(self):
|
||||
"""Test processor initialization with only username (no password)"""
|
||||
|
|
@ -92,6 +96,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test table switching logic when authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(
|
||||
|
|
@ -114,7 +119,6 @@ class TestCassandraStorageProcessor:
|
|||
username='testuser',
|
||||
password='testpass'
|
||||
)
|
||||
assert processor.table == 'user1'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
|
|
@ -122,6 +126,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test table switching logic when no authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -138,7 +143,6 @@ class TestCassandraStorageProcessor:
|
|||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user2'
|
||||
)
|
||||
assert processor.table == 'user2'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
|
|
@ -146,6 +150,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test that TrustGraph is not recreated when table hasn't changed"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -169,6 +174,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test that triples are properly inserted into Cassandra"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -208,12 +214,12 @@ class TestCassandraStorageProcessor:
|
|||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Verify both triples were inserted (with g=, otype=, dtype=, lang= parameters)
|
||||
assert mock_tg_instance.insert.call_count == 2
|
||||
mock_tg_instance.insert.assert_any_call(
|
||||
assert mock_tg_instance.async_insert.call_count == 2
|
||||
mock_tg_instance.async_insert.assert_any_call(
|
||||
'collection1', 'subject1', 'predicate1', 'object1',
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
mock_tg_instance.insert.assert_any_call(
|
||||
mock_tg_instance.async_insert.assert_any_call(
|
||||
'collection1', 'subject2', 'predicate2', 'object2',
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
|
|
@ -224,6 +230,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test behavior when message has no triples"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -236,19 +243,17 @@ class TestCassandraStorageProcessor:
|
|||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Verify no triples were inserted
|
||||
mock_tg_instance.insert.assert_not_called()
|
||||
mock_tg_instance.async_insert.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
|
||||
async def test_exception_handling_with_retry(self, mock_sleep, mock_kg_class):
|
||||
async def test_exception_handling_on_connection_failure(self, mock_kg_class):
|
||||
"""Test exception handling during TrustGraph creation"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_kg_class.side_effect = Exception("Connection failed")
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create mock message
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
|
@ -256,9 +261,6 @@ class TestCassandraStorageProcessor:
|
|||
with pytest.raises(Exception, match="Connection failed"):
|
||||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Verify sleep was called before re-raising
|
||||
mock_sleep.assert_called_once_with(1)
|
||||
|
||||
def test_add_args_method(self):
|
||||
"""Test that add_args properly configures argument parser"""
|
||||
from argparse import ArgumentParser
|
||||
|
|
@ -359,8 +361,6 @@ class TestCassandraStorageProcessor:
|
|||
mock_message1.triples = []
|
||||
|
||||
await processor.store_triples('user1', mock_message1)
|
||||
assert processor.table == 'user1'
|
||||
assert processor.tg == mock_tg_instance1
|
||||
|
||||
# Second message with different table
|
||||
mock_message2 = MagicMock()
|
||||
|
|
@ -368,11 +368,11 @@ class TestCassandraStorageProcessor:
|
|||
mock_message2.triples = []
|
||||
|
||||
await processor.store_triples('user2', mock_message2)
|
||||
assert processor.table == 'user2'
|
||||
assert processor.tg == mock_tg_instance2
|
||||
|
||||
# Verify TrustGraph was created twice for different tables
|
||||
# Verify TrustGraph was created twice for different workspaces
|
||||
assert mock_kg_class.call_count == 2
|
||||
mock_kg_class.assert_any_call(hosts=['cassandra'], keyspace='user1')
|
||||
mock_kg_class.assert_any_call(hosts=['cassandra'], keyspace='user2')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
|
|
@ -380,6 +380,7 @@ class TestCassandraStorageProcessor:
|
|||
"""Test storing triples with special characters and unicode"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -405,7 +406,7 @@ class TestCassandraStorageProcessor:
|
|||
await processor.store_triples('test_workspace', mock_message)
|
||||
|
||||
# Verify the triple was inserted with special characters preserved
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
mock_tg_instance.async_insert.assert_called_once_with(
|
||||
'test_collection',
|
||||
'subject with spaces & symbols',
|
||||
'predicate:with/colons',
|
||||
|
|
@ -418,29 +419,29 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_store_triples_preserves_old_table_on_exception(self, mock_kg_class):
|
||||
"""Test that table remains unchanged when TrustGraph creation fails"""
|
||||
async def test_connection_failure_does_not_cache_stale_state(self, mock_kg_class):
|
||||
"""Test that a failed connection doesn't leave stale cached state"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_good_instance = MagicMock()
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Set an initial table
|
||||
processor.table = ('old_user', 'old_collection')
|
||||
|
||||
# Mock TrustGraph to raise exception
|
||||
mock_kg_class.side_effect = Exception("Connection failed")
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.collection = 'new_collection'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
# First call fails
|
||||
mock_kg_class.side_effect = Exception("Connection failed")
|
||||
with pytest.raises(Exception, match="Connection failed"):
|
||||
await processor.store_triples('new_user', mock_message)
|
||||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Table should remain unchanged since self.table = table happens after try/except
|
||||
assert processor.table == ('old_user', 'old_collection')
|
||||
# TrustGraph should be set to None though
|
||||
assert processor.tg is None
|
||||
# Second call succeeds — should retry connection, not use stale state
|
||||
mock_kg_class.side_effect = None
|
||||
mock_kg_class.return_value = mock_good_instance
|
||||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Connection was attempted twice (failed + succeeded)
|
||||
assert mock_kg_class.call_count == 2
|
||||
|
||||
|
||||
class TestCassandraPerformanceOptimizations:
|
||||
|
|
@ -452,6 +453,7 @@ class TestCassandraPerformanceOptimizations:
|
|||
"""Test that legacy mode still works with single table"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
|
||||
|
|
@ -472,6 +474,7 @@ class TestCassandraPerformanceOptimizations:
|
|||
"""Test that optimized mode uses multi-table schema"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
|
||||
|
|
@ -492,6 +495,7 @@ class TestCassandraPerformanceOptimizations:
|
|||
"""Test that all tables stay consistent during batch writes"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_tg_instance.async_insert = AsyncMock()
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -517,7 +521,7 @@ class TestCassandraPerformanceOptimizations:
|
|||
await processor.store_triples('user1', mock_message)
|
||||
|
||||
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
mock_tg_instance.async_insert.assert_called_once_with(
|
||||
'collection1', 'test_subject', 'test_predicate', 'test_object',
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
|
|
|
|||
|
|
@ -89,7 +89,8 @@ class TestSanitizeName:
|
|||
|
||||
class TestFindCollection:
|
||||
|
||||
def test_finds_matching_collection(self):
|
||||
@pytest.mark.asyncio
|
||||
async def test_finds_matching_collection(self):
|
||||
proc = _make_processor()
|
||||
mock_coll = MagicMock()
|
||||
mock_coll.name = "rows_test_workspace_test_col_customers_384"
|
||||
|
|
@ -98,11 +99,12 @@ class TestFindCollection:
|
|||
mock_collections.collections = [mock_coll]
|
||||
proc.qdrant.get_collections.return_value = mock_collections
|
||||
|
||||
result = proc.find_collection("test-workspace", "test-col", "customers")
|
||||
result = await proc.find_collection("test-workspace", "test-col", "customers")
|
||||
|
||||
assert result == "rows_test_workspace_test_col_customers_384"
|
||||
|
||||
def test_returns_none_when_no_match(self):
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_none_when_no_match(self):
|
||||
proc = _make_processor()
|
||||
mock_coll = MagicMock()
|
||||
mock_coll.name = "rows_other_workspace_other_col_schema_768"
|
||||
|
|
@ -111,14 +113,15 @@ class TestFindCollection:
|
|||
mock_collections.collections = [mock_coll]
|
||||
proc.qdrant.get_collections.return_value = mock_collections
|
||||
|
||||
result = proc.find_collection("test-workspace", "test-col", "customers")
|
||||
result = await proc.find_collection("test-workspace", "test-col", "customers")
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_on_error(self):
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_none_on_error(self):
|
||||
proc = _make_processor()
|
||||
proc.qdrant.get_collections.side_effect = Exception("connection error")
|
||||
|
||||
result = proc.find_collection("workspace", "col", "schema")
|
||||
result = await proc.find_collection("workspace", "col", "schema")
|
||||
assert result is None
|
||||
|
||||
|
||||
|
|
@ -139,7 +142,7 @@ class TestQueryRowEmbeddings:
|
|||
@pytest.mark.asyncio
|
||||
async def test_no_collection_returns_empty(self):
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value=None)
|
||||
proc.find_collection = AsyncMock(return_value=None)
|
||||
request = _make_request()
|
||||
|
||||
result = await proc.query_row_embeddings("test-workspace", request)
|
||||
|
|
@ -148,7 +151,7 @@ class TestQueryRowEmbeddings:
|
|||
@pytest.mark.asyncio
|
||||
async def test_successful_query_returns_matches(self):
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value="rows_w_c_s_384")
|
||||
proc.find_collection = AsyncMock(return_value="rows_w_c_s_384")
|
||||
|
||||
points = [
|
||||
_make_search_point("name", ["Alice Smith"], "Alice Smith", 0.95),
|
||||
|
|
@ -172,7 +175,7 @@ class TestQueryRowEmbeddings:
|
|||
async def test_index_name_filter_applied(self):
|
||||
"""When index_name is specified, a Qdrant filter should be used."""
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value="rows_w_c_s_384")
|
||||
proc.find_collection = AsyncMock(return_value="rows_w_c_s_384")
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.points = []
|
||||
|
|
@ -188,7 +191,7 @@ class TestQueryRowEmbeddings:
|
|||
async def test_no_index_name_no_filter(self):
|
||||
"""When index_name is empty, no filter should be applied."""
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value="rows_w_c_s_384")
|
||||
proc.find_collection = AsyncMock(return_value="rows_w_c_s_384")
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.points = []
|
||||
|
|
@ -204,7 +207,7 @@ class TestQueryRowEmbeddings:
|
|||
async def test_missing_payload_fields_default(self):
|
||||
"""Points with missing payload fields should use defaults."""
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value="rows_w_c_s_384")
|
||||
proc.find_collection = AsyncMock(return_value="rows_w_c_s_384")
|
||||
|
||||
point = MagicMock()
|
||||
point.payload = {} # Empty payload
|
||||
|
|
@ -225,7 +228,7 @@ class TestQueryRowEmbeddings:
|
|||
@pytest.mark.asyncio
|
||||
async def test_qdrant_error_propagates(self):
|
||||
proc = _make_processor()
|
||||
proc.find_collection = MagicMock(return_value="rows_w_c_s_384")
|
||||
proc.find_collection = AsyncMock(return_value="rows_w_c_s_384")
|
||||
proc.qdrant.query_points.side_effect = Exception("qdrant down")
|
||||
|
||||
request = _make_request()
|
||||
|
|
|
|||
|
|
@ -62,12 +62,6 @@ class AsyncSocketClient:
|
|||
if self._connected:
|
||||
return
|
||||
|
||||
if not self.token:
|
||||
raise ProtocolException(
|
||||
"AsyncSocketClient requires a token for first-frame "
|
||||
"auth against /api/v1/socket"
|
||||
)
|
||||
|
||||
ws_url = self._build_ws_url()
|
||||
self._connect_cm = websockets.connect(
|
||||
ws_url, ping_interval=20, ping_timeout=self.timeout
|
||||
|
|
@ -79,7 +73,7 @@ class AsyncSocketClient:
|
|||
# reader task so the response isn't consumed by the reader's
|
||||
# id-based routing.
|
||||
await self._socket.send(json.dumps({
|
||||
"type": "auth", "token": self.token,
|
||||
"type": "auth", "token": self.token or "",
|
||||
}))
|
||||
try:
|
||||
raw = await asyncio.wait_for(
|
||||
|
|
|
|||
|
|
@ -132,3 +132,34 @@ class Knowledge:
|
|||
|
||||
self.request(request = input)
|
||||
|
||||
def list_de_cores(self):
|
||||
|
||||
input = {
|
||||
"operation": "list-de-cores",
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["ids"]
|
||||
|
||||
def delete_de_core(self, id):
|
||||
|
||||
input = {
|
||||
"operation": "delete-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def load_de_core(self, id, flow="default", collection="default"):
|
||||
|
||||
input = {
|
||||
"operation": "load-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
"flow": flow,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
|
|
|
|||
|
|
@ -365,7 +365,7 @@ class Library:
|
|||
id = v["id"],
|
||||
time = datetime.datetime.fromtimestamp(v["time"]),
|
||||
kind = v["kind"],
|
||||
title = v["title"],
|
||||
title = v.get("title", ""),
|
||||
comments = v.get("comments", ""),
|
||||
metadata = [
|
||||
Triple(
|
||||
|
|
@ -482,14 +482,15 @@ class Library:
|
|||
"workspace": self.api.workspace,
|
||||
"document-metadata": {
|
||||
"document-id": id,
|
||||
"time": metadata.time,
|
||||
"id": id,
|
||||
"time": int(metadata.time.timestamp()) if hasattr(metadata.time, 'timestamp') else metadata.time,
|
||||
"title": metadata.title,
|
||||
"comments": metadata.comments,
|
||||
"metadata": [
|
||||
{
|
||||
"s": from_value(t["s"]),
|
||||
"p": from_value(t["p"]),
|
||||
"o": from_value(t["o"]),
|
||||
"s": from_value(t.s),
|
||||
"p": from_value(t.p),
|
||||
"o": from_value(t.o),
|
||||
}
|
||||
for t in metadata.metadata
|
||||
],
|
||||
|
|
@ -498,14 +499,17 @@ class Library:
|
|||
}
|
||||
|
||||
object = self.request(input)
|
||||
doc = object["document-metadata"]
|
||||
doc = object.get("document-metadata") if isinstance(object, dict) else None
|
||||
|
||||
if not doc:
|
||||
return metadata
|
||||
|
||||
try:
|
||||
DocumentMetadata(
|
||||
return DocumentMetadata(
|
||||
id = doc["id"],
|
||||
time = datetime.datetime.fromtimestamp(doc["time"]),
|
||||
kind = doc["kind"],
|
||||
title = doc["title"],
|
||||
title = doc.get("title", ""),
|
||||
comments = doc.get("comments", ""),
|
||||
metadata = [
|
||||
Triple(
|
||||
|
|
@ -513,10 +517,11 @@ class Library:
|
|||
p = to_value(w["p"]),
|
||||
o = to_value(w["o"])
|
||||
)
|
||||
for w in doc["metadata"]
|
||||
for w in doc.get("metadata", [])
|
||||
],
|
||||
workspace = doc.get("workspace", ""),
|
||||
tags = doc["tags"]
|
||||
tags = doc.get("tags", []),
|
||||
parent_id = doc.get("parent-id", ""),
|
||||
document_type = doc.get("document-type", "source"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to parse document update response", exc_info=True)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ multiplexes requests by ID.
|
|||
import json
|
||||
import asyncio
|
||||
import websockets
|
||||
from websockets.exceptions import ConnectionClosed
|
||||
from typing import Optional, Dict, Any, Iterator, Union, List
|
||||
from threading import Lock
|
||||
|
||||
|
|
@ -137,12 +138,6 @@ class SocketClient:
|
|||
if self._connected:
|
||||
return
|
||||
|
||||
if not self.token:
|
||||
raise ProtocolException(
|
||||
"SocketClient requires a token for first-frame auth "
|
||||
"against /api/v1/socket"
|
||||
)
|
||||
|
||||
ws_url = self._build_ws_url()
|
||||
self._connect_cm = websockets.connect(
|
||||
ws_url, ping_interval=20, ping_timeout=self.timeout
|
||||
|
|
@ -153,7 +148,7 @@ class SocketClient:
|
|||
# auth-ok / auth-failed response isn't consumed by the reader
|
||||
# loop's id-based routing.
|
||||
await self._socket.send(json.dumps({
|
||||
"type": "auth", "token": self.token,
|
||||
"type": "auth", "token": self.token or "",
|
||||
}))
|
||||
try:
|
||||
raw = await asyncio.wait_for(
|
||||
|
|
@ -197,13 +192,13 @@ class SocketClient:
|
|||
if request_id and request_id in self._pending:
|
||||
await self._pending[request_id].put(response)
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
except ConnectionClosed:
|
||||
pass
|
||||
except Exception as e:
|
||||
for queue in self._pending.values():
|
||||
try:
|
||||
await queue.put({"error": str(e)})
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._connected = False
|
||||
|
|
@ -256,7 +251,7 @@ class SocketClient:
|
|||
finally:
|
||||
try:
|
||||
loop.run_until_complete(async_gen.aclose())
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _streaming_generator_raw(
|
||||
|
|
@ -279,7 +274,7 @@ class SocketClient:
|
|||
finally:
|
||||
try:
|
||||
loop.run_until_complete(async_gen.aclose())
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _send_request_async_streaming_raw(
|
||||
|
|
@ -491,12 +486,64 @@ class SocketClient:
|
|||
triples=raw_triples,
|
||||
)
|
||||
|
||||
def get_kg_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_kg_core(
|
||||
self, id: str, triples=None, graph_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if triples is not None:
|
||||
request["triples"] = triples
|
||||
if graph_embeddings is not None:
|
||||
request["graph-embeddings"] = graph_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def get_de_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_de_core(
|
||||
self, id: str, document_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if document_embeddings is not None:
|
||||
request["document-embeddings"] = document_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the persistent WebSocket connection."""
|
||||
if self._loop and not self._loop.is_closed():
|
||||
try:
|
||||
self._loop.run_until_complete(self._close_async())
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _close_async(self):
|
||||
|
|
|
|||
|
|
@ -76,8 +76,10 @@ class Consumer:
|
|||
|
||||
if hasattr(self, "consumer"):
|
||||
if self.consumer:
|
||||
self.consumer.unsubscribe()
|
||||
self.consumer.close()
|
||||
try:
|
||||
self.consumer.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.consumer = None
|
||||
|
||||
async def stop(self):
|
||||
|
|
@ -157,12 +159,14 @@ class Consumer:
|
|||
except Exception as e:
|
||||
|
||||
logger.error(f"Consumer loop exception: {e}", exc_info=True)
|
||||
for c in consumers:
|
||||
for i, c in enumerate(consumers):
|
||||
try:
|
||||
c.unsubscribe()
|
||||
c.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as ce:
|
||||
logger.warning(
|
||||
f"Consumer {i} close failed (error path): "
|
||||
f"{type(ce).__name__}: {ce}"
|
||||
)
|
||||
for ex in executors:
|
||||
ex.shutdown(wait=False)
|
||||
consumers = []
|
||||
|
|
@ -171,12 +175,14 @@ class Consumer:
|
|||
continue
|
||||
|
||||
finally:
|
||||
for c in consumers:
|
||||
for i, c in enumerate(consumers):
|
||||
try:
|
||||
c.unsubscribe()
|
||||
c.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as ce:
|
||||
logger.warning(
|
||||
f"Consumer {i} close failed: "
|
||||
f"{type(ce).__name__}: {ce}"
|
||||
)
|
||||
for ex in executors:
|
||||
ex.shutdown(wait=False)
|
||||
|
||||
|
|
@ -188,7 +194,7 @@ class Consumer:
|
|||
try:
|
||||
msg = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: consumer.receive(timeout_millis=100),
|
||||
lambda: consumer.receive(timeout_millis=2000),
|
||||
)
|
||||
except Exception as e:
|
||||
# Handle timeout from any backend
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ class Flow:
|
|||
async def stop(self):
|
||||
for c in self.consumer.values():
|
||||
await c.stop()
|
||||
for p in self.producer.values():
|
||||
await p.stop()
|
||||
if self.librarian:
|
||||
await self.librarian.stop()
|
||||
|
||||
|
|
|
|||
|
|
@ -62,6 +62,22 @@ class IamClient(RequestResponse):
|
|||
)
|
||||
return resp.user
|
||||
|
||||
async def authenticate_anonymous(self, timeout=IAM_TIMEOUT):
|
||||
"""Request anonymous access from the IAM regime.
|
||||
|
||||
Returns ``(user_id, workspace, roles)`` if the regime permits
|
||||
anonymous access, or raises ``RuntimeError`` with error type
|
||||
``auth-failed`` if it does not."""
|
||||
resp = await self._request(
|
||||
operation="authenticate-anonymous",
|
||||
timeout=timeout,
|
||||
)
|
||||
return (
|
||||
resp.resolved_user_id,
|
||||
resp.resolved_workspace,
|
||||
list(resp.resolved_roles),
|
||||
)
|
||||
|
||||
async def resolve_api_key(self, api_key, timeout=IAM_TIMEOUT):
|
||||
"""Resolve a plaintext API key to its identity triple.
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ class Producer:
|
|||
|
||||
async def stop(self):
|
||||
self.running = False
|
||||
if self.producer:
|
||||
self.producer.close()
|
||||
self.producer = None
|
||||
|
||||
async def send(self, msg, properties={}):
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ logger = logging.getLogger(__name__)
|
|||
# Default connection settings from environment
|
||||
DEFAULT_PULSAR_HOST = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
DEFAULT_PULSAR_API_KEY = os.getenv("PULSAR_API_KEY", None)
|
||||
DEFAULT_PULSAR_ADMIN_URL = os.getenv("PULSAR_ADMIN_URL", 'http://pulsar:8080')
|
||||
|
||||
DEFAULT_RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", 'rabbitmq')
|
||||
DEFAULT_RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", '5672'))
|
||||
|
|
@ -43,6 +44,7 @@ def get_pubsub(**config: Any) -> Any:
|
|||
host=config.get('pulsar_host', DEFAULT_PULSAR_HOST),
|
||||
api_key=config.get('pulsar_api_key', DEFAULT_PULSAR_API_KEY),
|
||||
listener=config.get('pulsar_listener'),
|
||||
admin_url=config.get('pulsar_admin_url', DEFAULT_PULSAR_ADMIN_URL),
|
||||
)
|
||||
elif backend_type == 'rabbitmq':
|
||||
from .rabbitmq_backend import RabbitMQBackend
|
||||
|
|
@ -77,6 +79,7 @@ def get_pubsub(**config: Any) -> Any:
|
|||
|
||||
|
||||
STANDALONE_PULSAR_HOST = 'pulsar://localhost:6650'
|
||||
STANDALONE_PULSAR_ADMIN_URL = 'http://localhost:8080'
|
||||
|
||||
|
||||
def add_pubsub_args(parser: ArgumentParser, standalone: bool = False) -> None:
|
||||
|
|
@ -88,6 +91,7 @@ def add_pubsub_args(parser: ArgumentParser, standalone: bool = False) -> None:
|
|||
that run outside containers)
|
||||
"""
|
||||
pulsar_host = STANDALONE_PULSAR_HOST if standalone else DEFAULT_PULSAR_HOST
|
||||
pulsar_admin_url = STANDALONE_PULSAR_ADMIN_URL if standalone else DEFAULT_PULSAR_ADMIN_URL
|
||||
pulsar_listener = 'localhost' if standalone else None
|
||||
rabbitmq_host = 'localhost' if standalone else DEFAULT_RABBITMQ_HOST
|
||||
kafka_bootstrap = 'localhost:9092' if standalone else DEFAULT_KAFKA_BOOTSTRAP
|
||||
|
|
@ -105,6 +109,12 @@ def add_pubsub_args(parser: ArgumentParser, standalone: bool = False) -> None:
|
|||
help=f'Pulsar host (default: {pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pulsar-admin-url',
|
||||
default=pulsar_admin_url,
|
||||
help=f'Pulsar admin REST API URL (default: {pulsar_admin_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pulsar-api-key',
|
||||
default=DEFAULT_PULSAR_API_KEY,
|
||||
|
|
|
|||
|
|
@ -7,8 +7,12 @@ handling topic mapping, serialization, and Pulsar client management.
|
|||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
from typing import Any
|
||||
|
||||
from .backend import PubSubBackend, BackendProducer, BackendConsumer, Message
|
||||
|
|
@ -117,7 +121,10 @@ class PulsarBackend:
|
|||
producers and consumers.
|
||||
"""
|
||||
|
||||
def __init__(self, host: str, api_key: str = None, listener: str = None):
|
||||
def __init__(
|
||||
self, host: str, api_key: str = None, listener: str = None,
|
||||
admin_url: str = None,
|
||||
):
|
||||
"""
|
||||
Initialize Pulsar backend.
|
||||
|
||||
|
|
@ -125,10 +132,12 @@ class PulsarBackend:
|
|||
host: Pulsar broker URL (e.g., pulsar://localhost:6650)
|
||||
api_key: Optional API key for authentication
|
||||
listener: Optional listener name for multi-homed setups
|
||||
admin_url: Pulsar admin REST API URL (e.g., http://pulsar:8080)
|
||||
"""
|
||||
self.host = host
|
||||
self.api_key = api_key
|
||||
self.listener = listener
|
||||
self.admin_url = admin_url
|
||||
|
||||
# Create Pulsar client
|
||||
client_args = {'service_url': host}
|
||||
|
|
@ -139,6 +148,10 @@ class PulsarBackend:
|
|||
if api_key:
|
||||
client_args['authentication'] = pulsar.AuthenticationToken(api_key)
|
||||
|
||||
client_args['logger'] = pulsar.ConsoleLogger(
|
||||
_pulsar.LoggerLevel.Error
|
||||
)
|
||||
|
||||
self.client = pulsar.Client(**client_args)
|
||||
logger.info(f"Pulsar client connected to {host}")
|
||||
|
||||
|
|
@ -266,24 +279,129 @@ class PulsarBackend:
|
|||
|
||||
return PulsarBackendConsumer(pulsar_consumer, schema)
|
||||
|
||||
def _admin_api_path(self, pulsar_uri: str) -> str:
|
||||
"""
|
||||
Convert a Pulsar topic URI to an admin REST API path.
|
||||
|
||||
persistent://tg/flow/triples-store:default:explain-flow
|
||||
-> /admin/v2/persistent/tg/flow/triples-store%3Adefault%3Aexplain-flow
|
||||
"""
|
||||
scheme, rest = pulsar_uri.split('://', 1)
|
||||
tenant, namespace, topic = rest.split('/', 2)
|
||||
encoded_topic = urllib.parse.quote(topic, safe='')
|
||||
return f"/admin/v2/{scheme}/{tenant}/{namespace}/{encoded_topic}"
|
||||
|
||||
def _admin_request(self, method, path):
|
||||
"""
|
||||
Make a synchronous admin REST API request.
|
||||
|
||||
Returns parsed JSON for GET, None for DELETE/PUT.
|
||||
Raises urllib.error.HTTPError for non-404 errors.
|
||||
404 is treated as success (idempotent deletion).
|
||||
"""
|
||||
url = f"{self.admin_url}{path}"
|
||||
req = urllib.request.Request(url, method=method)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
if method == 'GET':
|
||||
return json.loads(resp.read().decode('utf-8'))
|
||||
return None
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
def _delete_topic_sync(self, topic: str):
|
||||
"""
|
||||
Delete a persistent topic and all its subscriptions.
|
||||
|
||||
Subscriptions must be removed first — Pulsar rejects topic
|
||||
deletion while subscriptions exist. Force-deletes each
|
||||
subscription to disconnect any lingering consumers.
|
||||
"""
|
||||
pulsar_uri = self.map_topic(topic)
|
||||
|
||||
if pulsar_uri.startswith('non-persistent://'):
|
||||
return
|
||||
|
||||
api_path = self._admin_api_path(pulsar_uri)
|
||||
|
||||
try:
|
||||
subs = self._admin_request('GET', f"{api_path}/subscriptions")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to list subscriptions for {topic}: {e}")
|
||||
return
|
||||
|
||||
if subs:
|
||||
for sub in subs:
|
||||
encoded_sub = urllib.parse.quote(sub, safe='')
|
||||
try:
|
||||
self._admin_request(
|
||||
'DELETE',
|
||||
f"{api_path}/subscription/{encoded_sub}"
|
||||
f"?force=true"
|
||||
)
|
||||
logger.info(
|
||||
f"Deleted subscription {sub} from {topic}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete subscription {sub} "
|
||||
f"from {topic}: {e}"
|
||||
)
|
||||
|
||||
try:
|
||||
self._admin_request('DELETE', api_path)
|
||||
logger.info(f"Deleted topic: {topic}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete topic {topic}: {e}")
|
||||
|
||||
def _topic_exists_sync(self, topic: str) -> bool:
|
||||
"""Check topic existence via admin API."""
|
||||
pulsar_uri = self.map_topic(topic)
|
||||
|
||||
if pulsar_uri.startswith('non-persistent://'):
|
||||
return False
|
||||
|
||||
api_path = self._admin_api_path(pulsar_uri)
|
||||
|
||||
try:
|
||||
result = self._admin_request('GET', f"{api_path}/stats")
|
||||
return result is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def create_topic(self, topic: str) -> None:
|
||||
"""No-op — Pulsar auto-creates topics on first use.
|
||||
TODO: Use admin REST API for explicit persistent topic creation."""
|
||||
"""No-op — Pulsar auto-creates topics on first use."""
|
||||
pass
|
||||
|
||||
async def delete_topic(self, topic: str) -> None:
|
||||
"""No-op — to be replaced with admin REST API calls.
|
||||
TODO: Delete persistent topic via admin API."""
|
||||
pass
|
||||
"""
|
||||
Delete a persistent topic and all its subscriptions via
|
||||
the admin REST API.
|
||||
|
||||
Called by the flow controller during deliberate flow deletion.
|
||||
Non-persistent topics are skipped. Idempotent.
|
||||
"""
|
||||
if not self.admin_url:
|
||||
logger.warning(
|
||||
f"Cannot delete topic {topic}: "
|
||||
f"no admin URL configured"
|
||||
)
|
||||
return
|
||||
|
||||
await asyncio.to_thread(self._delete_topic_sync, topic)
|
||||
|
||||
async def topic_exists(self, topic: str) -> bool:
|
||||
"""Returns True — Pulsar auto-creates on subscribe.
|
||||
TODO: Use admin REST API for actual existence check."""
|
||||
return True
|
||||
"""Check whether a persistent topic exists via the admin API."""
|
||||
if not self.admin_url:
|
||||
return True
|
||||
|
||||
return await asyncio.to_thread(self._topic_exists_sync, topic)
|
||||
|
||||
async def ensure_topic(self, topic: str) -> None:
|
||||
"""No-op — Pulsar auto-creates topics on first use.
|
||||
TODO: Use admin REST API for explicit creation."""
|
||||
"""No-op — Pulsar auto-creates topics on first use."""
|
||||
pass
|
||||
|
||||
def close(self) -> None:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
|
|
@ -44,6 +45,60 @@ def from_value(x: Any) -> Any:
|
|||
return Term(type=LITERAL, value=str(x))
|
||||
|
||||
class TriplesClient(RequestResponse):
|
||||
|
||||
async def query_gen(self, s=None, p=None, o=None, limit=20,
|
||||
collection="default",
|
||||
batch_size=20, timeout=30, g=None):
|
||||
"""Async generator yielding Triple objects as batches arrive."""
|
||||
queue = asyncio.Queue()
|
||||
done = False
|
||||
|
||||
async def recipient(resp):
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
batch = [
|
||||
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
||||
for v in resp.triples
|
||||
]
|
||||
await queue.put(batch)
|
||||
|
||||
if resp.is_final:
|
||||
await queue.put(None)
|
||||
|
||||
return resp.is_final
|
||||
|
||||
# Launch the streaming request as a background task
|
||||
task = asyncio.ensure_future(self.request(
|
||||
TriplesQueryRequest(
|
||||
s=from_value(s),
|
||||
p=from_value(p),
|
||||
o=from_value(o),
|
||||
limit=limit,
|
||||
collection=collection,
|
||||
streaming=True,
|
||||
batch_size=batch_size,
|
||||
g=g,
|
||||
),
|
||||
timeout=timeout,
|
||||
recipient=recipient,
|
||||
))
|
||||
|
||||
try:
|
||||
while True:
|
||||
batch = await queue.get()
|
||||
if batch is None:
|
||||
break
|
||||
for triple in batch:
|
||||
yield triple
|
||||
finally:
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
collection="default",
|
||||
timeout=30, g=None):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Dict, Any, Tuple, Optional
|
||||
from ...schema import (
|
||||
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
|
||||
DocumentEmbeddings, ChunkEmbeddings,
|
||||
Metadata, EntityEmbeddings
|
||||
)
|
||||
from .base import MessageTranslator
|
||||
|
|
@ -43,6 +44,23 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
]
|
||||
)
|
||||
|
||||
document_embeddings = None
|
||||
if "document-embeddings" in data:
|
||||
document_embeddings = DocumentEmbeddings(
|
||||
metadata=Metadata(
|
||||
id=data["document-embeddings"]["metadata"]["id"],
|
||||
root=data["document-embeddings"]["metadata"].get("root", ""),
|
||||
collection=data["document-embeddings"]["metadata"]["collection"]
|
||||
),
|
||||
chunks=[
|
||||
ChunkEmbeddings(
|
||||
chunk_id=ch["chunk_id"],
|
||||
vector=ch["vector"],
|
||||
)
|
||||
for ch in data["document-embeddings"]["chunks"]
|
||||
]
|
||||
)
|
||||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
id=data.get("id"),
|
||||
|
|
@ -50,6 +68,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
collection=data.get("collection"),
|
||||
triples=triples,
|
||||
graph_embeddings=graph_embeddings,
|
||||
document_embeddings=document_embeddings,
|
||||
)
|
||||
|
||||
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
|
||||
|
|
@ -90,6 +109,22 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
],
|
||||
}
|
||||
|
||||
if obj.document_embeddings:
|
||||
result["document-embeddings"] = {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -140,6 +175,25 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
}
|
||||
}
|
||||
|
||||
# Streaming document embeddings response
|
||||
if obj.document_embeddings:
|
||||
return {
|
||||
"document-embeddings": {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
# End of stream marker
|
||||
if obj.eos is True:
|
||||
return {"eos": True}
|
||||
|
|
@ -155,7 +209,7 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
is_final = (
|
||||
obj.ids is not None or # List response
|
||||
obj.eos is True or # End of stream
|
||||
(not obj.triples and not obj.graph_embeddings) # Empty response
|
||||
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
|
||||
)
|
||||
|
||||
return response, is_final
|
||||
|
|
@ -4,7 +4,7 @@ from ..core.topic import queue
|
|||
from ..core.metadata import Metadata
|
||||
from .document import Document, TextDocument
|
||||
from .graph import Triples
|
||||
from .embeddings import GraphEmbeddings
|
||||
from .embeddings import GraphEmbeddings, DocumentEmbeddings
|
||||
|
||||
# get-kg-core
|
||||
# -> (???)
|
||||
|
|
@ -41,6 +41,9 @@ class KnowledgeRequest:
|
|||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
|
||||
# put-de-core
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
@dataclass
|
||||
class KnowledgeResponse:
|
||||
error: Error | None = None
|
||||
|
|
@ -48,6 +51,7 @@ class KnowledgeResponse:
|
|||
eos: bool = False # Indicates end of knowledge core stream
|
||||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
knowledge_request_queue = queue('knowledge', cls='request')
|
||||
knowledge_response_queue = queue('knowledge', cls='response')
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"trustgraph-base>=2.4,<2.5",
|
||||
"trustgraph-base>=2.5,<2.6",
|
||||
"pulsar-client",
|
||||
"prometheus-client",
|
||||
"boto3",
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"trustgraph-base>=2.4,<2.5",
|
||||
"trustgraph-base>=2.5,<2.6",
|
||||
"requests",
|
||||
"pulsar-client",
|
||||
"aiohttp",
|
||||
|
|
@ -37,6 +37,7 @@ tg-dump-msgpack = "trustgraph.cli.dump_msgpack:main"
|
|||
tg-dump-queues = "trustgraph.cli.dump_queues:main"
|
||||
tg-monitor-prompts = "trustgraph.cli.monitor_prompts:main"
|
||||
tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main"
|
||||
tg-get-de-core = "trustgraph.cli.get_de_core:main"
|
||||
tg-get-kg-core = "trustgraph.cli.get_kg_core:main"
|
||||
tg-get-document-content = "trustgraph.cli.get_document_content:main"
|
||||
tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main"
|
||||
|
|
@ -77,6 +78,7 @@ tg-load-turtle = "trustgraph.cli.load_turtle:main"
|
|||
tg-load-knowledge = "trustgraph.cli.load_knowledge:main"
|
||||
tg-load-structured-data = "trustgraph.cli.load_structured_data:main"
|
||||
tg-put-flow-blueprint = "trustgraph.cli.put_flow_blueprint:main"
|
||||
tg-put-de-core = "trustgraph.cli.put_de_core:main"
|
||||
tg-put-kg-core = "trustgraph.cli.put_kg_core:main"
|
||||
tg-remove-library-document = "trustgraph.cli.remove_library_document:main"
|
||||
tg-save-doc-embeds = "trustgraph.cli.save_doc_embeds:main"
|
||||
|
|
@ -119,6 +121,9 @@ tg-show-extraction-provenance = "trustgraph.cli.show_extraction_provenance:main"
|
|||
tg-list-explain-traces = "trustgraph.cli.list_explain_traces:main"
|
||||
tg-show-explain-trace = "trustgraph.cli.show_explain_trace:main"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"trustgraph.cli.sample_documents" = ["*.md", "*.pdf", "*.json"]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["trustgraph*"]
|
||||
|
||||
|
|
|
|||
111
trustgraph-cli/trustgraph/cli/get_de_core.py
Normal file
111
trustgraph-cli/trustgraph/cli/get_de_core.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""
|
||||
Uses the knowledge service to fetch a document embeddings core which is
|
||||
saved to a local file in msgpack format.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import msgpack
|
||||
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
def write_de(f, data):
|
||||
msg = (
|
||||
"de",
|
||||
{
|
||||
"m": {
|
||||
"i": data["metadata"]["id"],
|
||||
"m": data["metadata"]["root"],
|
||||
"c": data["metadata"]["collection"],
|
||||
},
|
||||
"c": [
|
||||
{
|
||||
"i": ch["chunk_id"],
|
||||
"v": ch["vector"],
|
||||
}
|
||||
for ch in data["chunks"]
|
||||
]
|
||||
}
|
||||
)
|
||||
f.write(msgpack.packb(msg, use_bin_type=True))
|
||||
|
||||
def fetch(url, workspace, id, output, token=None):
|
||||
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
socket = api.socket()
|
||||
|
||||
try:
|
||||
de = 0
|
||||
|
||||
with open(output, "wb") as f:
|
||||
|
||||
for response in socket.get_de_core(id):
|
||||
|
||||
if "document-embeddings" in response:
|
||||
de += 1
|
||||
write_de(f, response["document-embeddings"])
|
||||
|
||||
print(f"Got: {de} document embeddings messages.")
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-get-de-core',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-u', '--url',
|
||||
default=default_url,
|
||||
help=f'API URL (default: {default_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-w', '--workspace',
|
||||
default=default_workspace,
|
||||
help=f'Workspace (default: {default_workspace})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--id', '--identifier',
|
||||
required=True,
|
||||
help=f'Document embeddings core ID',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
required=True,
|
||||
help=f'Output file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--token',
|
||||
default=default_token,
|
||||
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
fetch(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
output=args.output,
|
||||
token=args.token,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -5,13 +5,11 @@ to a local file in msgpack format.
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import uuid
|
||||
import asyncio
|
||||
import json
|
||||
from websockets.asyncio.client import connect
|
||||
import msgpack
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
|
|
@ -21,7 +19,7 @@ def write_triple(f, data):
|
|||
{
|
||||
"m": {
|
||||
"i": data["metadata"]["id"],
|
||||
"m": data["metadata"]["metadata"],
|
||||
"m": data["metadata"]["root"],
|
||||
"c": data["metadata"]["collection"],
|
||||
},
|
||||
"t": data["triples"],
|
||||
|
|
@ -35,13 +33,13 @@ def write_ge(f, data):
|
|||
{
|
||||
"m": {
|
||||
"i": data["metadata"]["id"],
|
||||
"m": data["metadata"]["metadata"],
|
||||
"m": data["metadata"]["root"],
|
||||
"c": data["metadata"]["collection"],
|
||||
},
|
||||
"e": [
|
||||
{
|
||||
"e": ent["entity"],
|
||||
"v": ent["vectors"],
|
||||
"v": ent["vector"],
|
||||
}
|
||||
for ent in data["entities"]
|
||||
]
|
||||
|
|
@ -49,54 +47,18 @@ def write_ge(f, data):
|
|||
)
|
||||
f.write(msgpack.packb(msg, use_bin_type=True))
|
||||
|
||||
async def fetch(url, workspace, id, output, token=None):
|
||||
def fetch(url, workspace, id, output, token=None):
|
||||
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
|
||||
url = url + "api/v1/socket"
|
||||
|
||||
if token:
|
||||
url = f"{url}?token={token}"
|
||||
|
||||
mid = str(uuid.uuid4())
|
||||
|
||||
async with connect(url) as ws:
|
||||
|
||||
req = json.dumps({
|
||||
"id": mid,
|
||||
"workspace": workspace,
|
||||
"service": "knowledge",
|
||||
"request": {
|
||||
"operation": "get-kg-core",
|
||||
"workspace": workspace,
|
||||
"id": id,
|
||||
}
|
||||
})
|
||||
|
||||
await ws.send(req)
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
socket = api.socket()
|
||||
|
||||
try:
|
||||
ge = 0
|
||||
t = 0
|
||||
|
||||
with open(output, "wb") as f:
|
||||
|
||||
while True:
|
||||
|
||||
msg = await ws.recv()
|
||||
|
||||
obj = json.loads(msg)
|
||||
|
||||
if "response" not in obj:
|
||||
raise RuntimeError("No response?")
|
||||
|
||||
response = obj["response"]
|
||||
|
||||
if "error" in response:
|
||||
raise RuntimeError(obj["error"])
|
||||
|
||||
if "eos" in response:
|
||||
if response["eos"]: break
|
||||
for response in socket.get_kg_core(id):
|
||||
|
||||
if "triples" in response:
|
||||
t += 1
|
||||
|
|
@ -108,7 +70,8 @@ async def fetch(url, workspace, id, output, token=None):
|
|||
|
||||
print(f"Got: {t} triple, {ge} GE messages.")
|
||||
|
||||
await ws.close()
|
||||
finally:
|
||||
socket.close()
|
||||
|
||||
def main():
|
||||
|
||||
|
|
@ -151,14 +114,12 @@ def main():
|
|||
|
||||
try:
|
||||
|
||||
asyncio.run(
|
||||
fetch(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
output=args.output,
|
||||
token=args.token,
|
||||
)
|
||||
fetch(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
output=args.output,
|
||||
token=args.token,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -3,11 +3,8 @@ Uses the GraphRAG service to answer a question
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import websockets
|
||||
import asyncio
|
||||
from trustgraph.api import (
|
||||
Api,
|
||||
ExplainabilityClient,
|
||||
|
|
@ -31,607 +28,6 @@ default_max_path_length = 2
|
|||
default_edge_score_limit = 30
|
||||
default_edge_limit = 25
|
||||
|
||||
# Provenance predicates
|
||||
TG = "https://trustgraph.ai/ns/"
|
||||
TG_QUERY = TG + "query"
|
||||
TG_CONCEPT = TG + "concept"
|
||||
TG_ENTITY = TG + "entity"
|
||||
TG_EDGE_COUNT = TG + "edgeCount"
|
||||
TG_SELECTED_EDGE = TG + "selectedEdge"
|
||||
TG_EDGE = TG + "edge"
|
||||
TG_REASONING = TG + "reasoning"
|
||||
TG_DOCUMENT = TG + "document"
|
||||
TG_CONTAINS = TG + "contains"
|
||||
PROV = "http://www.w3.org/ns/prov#"
|
||||
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
|
||||
PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
|
||||
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
||||
|
||||
|
||||
def _get_event_type(prov_id):
|
||||
"""Extract event type from provenance_id"""
|
||||
if "question" in prov_id:
|
||||
return "question"
|
||||
elif "grounding" in prov_id:
|
||||
return "grounding"
|
||||
elif "exploration" in prov_id:
|
||||
return "exploration"
|
||||
elif "focus" in prov_id:
|
||||
return "focus"
|
||||
elif "synthesis" in prov_id:
|
||||
return "synthesis"
|
||||
return "provenance"
|
||||
|
||||
|
||||
def _format_provenance_details(event_type, triples):
|
||||
"""Format provenance details based on event type and triples"""
|
||||
lines = []
|
||||
|
||||
if event_type == "question":
|
||||
# Show query and timestamp
|
||||
for s, p, o in triples:
|
||||
if p == TG_QUERY:
|
||||
lines.append(f" Query: {o}")
|
||||
elif p == PROV_STARTED_AT_TIME:
|
||||
lines.append(f" Time: {o}")
|
||||
|
||||
elif event_type == "grounding":
|
||||
# Show extracted concepts
|
||||
concepts = [o for s, p, o in triples if p == TG_CONCEPT]
|
||||
if concepts:
|
||||
lines.append(f" Concepts: {len(concepts)}")
|
||||
for concept in concepts:
|
||||
lines.append(f" - {concept}")
|
||||
|
||||
elif event_type == "exploration":
|
||||
# Show edge count (seed entities resolved separately with labels)
|
||||
for s, p, o in triples:
|
||||
if p == TG_EDGE_COUNT:
|
||||
lines.append(f" Edges explored: {o}")
|
||||
|
||||
elif event_type == "focus":
|
||||
# For focus, just count edge selection URIs
|
||||
# The actual edge details are fetched separately via edge_selections parameter
|
||||
edge_sel_uris = []
|
||||
for s, p, o in triples:
|
||||
if p == TG_SELECTED_EDGE:
|
||||
edge_sel_uris.append(o)
|
||||
if edge_sel_uris:
|
||||
lines.append(f" Focused on {len(edge_sel_uris)} edge(s)")
|
||||
|
||||
elif event_type == "synthesis":
|
||||
# Show document reference (content already streamed)
|
||||
for s, p, o in triples:
|
||||
if p == TG_DOCUMENT:
|
||||
lines.append(f" Document: {o}")
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
async def _query_triples_once(ws_url, flow_id, prov_id, collection, graph=None, debug=False):
|
||||
"""Query triples for a provenance node (single attempt)"""
|
||||
request = {
|
||||
"id": "triples-request",
|
||||
"service": "triples",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"s": {"t": "i", "i": prov_id},
|
||||
"collection": collection,
|
||||
"limit": 100
|
||||
}
|
||||
}
|
||||
# Add graph filter if specified (for named graph queries)
|
||||
if graph is not None:
|
||||
request["request"]["g"] = graph
|
||||
|
||||
if debug:
|
||||
print(f" [debug] querying triples for s={prov_id}", file=sys.stderr)
|
||||
|
||||
triples = []
|
||||
try:
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if debug:
|
||||
print(f" [debug] response: {json.dumps(response)[:200]}", file=sys.stderr)
|
||||
|
||||
if response.get("id") != "triples-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
if debug:
|
||||
print(f" [debug] error: {response['error']}", file=sys.stderr)
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
# Handle triples response
|
||||
# Response format: {"response": [triples...]}
|
||||
# Each triple uses compact keys: "i" for iri, "v" for value, "t" for type
|
||||
triple_list = resp.get("response", [])
|
||||
for t in triple_list:
|
||||
s = t.get("s", {}).get("i", t.get("s", {}).get("v", ""))
|
||||
p = t.get("p", {}).get("i", t.get("p", {}).get("v", ""))
|
||||
# Handle quoted triples (type "t") and regular values
|
||||
o_term = t.get("o", {})
|
||||
if o_term.get("t") == "t":
|
||||
# Quoted triple - extract s, p, o from nested structure
|
||||
tr = o_term.get("tr", {})
|
||||
o = {
|
||||
"s": tr.get("s", {}).get("i", ""),
|
||||
"p": tr.get("p", {}).get("i", ""),
|
||||
"o": tr.get("o", {}).get("i", tr.get("o", {}).get("v", "")),
|
||||
}
|
||||
else:
|
||||
o = o_term.get("i", o_term.get("v", ""))
|
||||
triples.append((s, p, o))
|
||||
|
||||
if resp.get("complete") or response.get("complete"):
|
||||
break
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f" [debug] exception: {e}", file=sys.stderr)
|
||||
|
||||
if debug:
|
||||
print(f" [debug] got {len(triples)} triples", file=sys.stderr)
|
||||
|
||||
return triples
|
||||
|
||||
|
||||
async def _query_triples(ws_url, flow_id, prov_id, collection, graph=None, max_retries=5, retry_delay=0.2, debug=False):
|
||||
"""Query triples for a provenance node with retries for race condition"""
|
||||
for attempt in range(max_retries):
|
||||
triples = await _query_triples_once(ws_url, flow_id, prov_id, collection, graph=graph, debug=debug)
|
||||
if triples:
|
||||
return triples
|
||||
# Wait before retry if empty (triples may not be stored yet)
|
||||
if attempt < max_retries - 1:
|
||||
if debug:
|
||||
print(f" [debug] retry {attempt + 1}/{max_retries}...", file=sys.stderr)
|
||||
await asyncio.sleep(retry_delay)
|
||||
return []
|
||||
|
||||
|
||||
async def _query_edge_provenance(ws_url, flow_id, edge_s, edge_p, edge_o, collection, debug=False):
|
||||
"""
|
||||
Query for provenance of an edge (s, p, o) in the knowledge graph.
|
||||
|
||||
Finds subgraphs that contain the edge via tg:contains, then follows
|
||||
prov:wasDerivedFrom to find source documents.
|
||||
|
||||
Returns list of source URIs (chunks, pages, documents).
|
||||
"""
|
||||
# Query for subgraphs that contain this edge: ?subgraph tg:contains <<s p o>>
|
||||
request = {
|
||||
"id": "edge-prov-request",
|
||||
"service": "triples",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"p": {"t": "i", "i": TG_CONTAINS},
|
||||
"o": {
|
||||
"t": "t", # Quoted triple type
|
||||
"tr": {
|
||||
"s": {"t": "i", "i": edge_s},
|
||||
"p": {"t": "i", "i": edge_p},
|
||||
"o": {"t": "i", "i": edge_o} if edge_o.startswith("http") or edge_o.startswith("urn:") else {"t": "l", "v": edge_o},
|
||||
}
|
||||
},
|
||||
"collection": collection,
|
||||
"limit": 10
|
||||
}
|
||||
}
|
||||
|
||||
if debug:
|
||||
print(f" [debug] querying edge provenance for ({edge_s}, {edge_p}, {edge_o})", file=sys.stderr)
|
||||
|
||||
stmt_uris = []
|
||||
try:
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if response.get("id") != "edge-prov-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
if debug:
|
||||
print(f" [debug] error: {response['error']}", file=sys.stderr)
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
triple_list = resp.get("response", [])
|
||||
for t in triple_list:
|
||||
s = t.get("s", {}).get("i", "")
|
||||
if s:
|
||||
stmt_uris.append(s)
|
||||
|
||||
if resp.get("complete") or response.get("complete"):
|
||||
break
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f" [debug] exception querying edge provenance: {e}", file=sys.stderr)
|
||||
|
||||
if debug:
|
||||
print(f" [debug] found {len(stmt_uris)} reifying statements", file=sys.stderr)
|
||||
|
||||
# For each statement, query wasDerivedFrom to find sources
|
||||
sources = []
|
||||
for stmt_uri in stmt_uris:
|
||||
# Query: stmt_uri prov:wasDerivedFrom ?source
|
||||
request = {
|
||||
"id": "derived-from-request",
|
||||
"service": "triples",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"s": {"t": "i", "i": stmt_uri},
|
||||
"p": {"t": "i", "i": PROV_WAS_DERIVED_FROM},
|
||||
"collection": collection,
|
||||
"limit": 10
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if response.get("id") != "derived-from-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
triple_list = resp.get("response", [])
|
||||
for t in triple_list:
|
||||
o = t.get("o", {}).get("i", "")
|
||||
if o:
|
||||
sources.append(o)
|
||||
|
||||
if resp.get("complete") or response.get("complete"):
|
||||
break
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f" [debug] exception querying wasDerivedFrom: {e}", file=sys.stderr)
|
||||
|
||||
if debug:
|
||||
print(f" [debug] found {len(sources)} source(s): {sources}", file=sys.stderr)
|
||||
|
||||
return sources
|
||||
|
||||
|
||||
async def _query_derived_from(ws_url, flow_id, uri, collection, debug=False):
|
||||
"""Query for the prov:wasDerivedFrom parent of a URI. Returns None if no parent."""
|
||||
request = {
|
||||
"id": "parent-request",
|
||||
"service": "triples",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"s": {"t": "i", "i": uri},
|
||||
"p": {"t": "i", "i": PROV_WAS_DERIVED_FROM},
|
||||
"collection": collection,
|
||||
"limit": 1
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if response.get("id") != "parent-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
triple_list = resp.get("response", [])
|
||||
if triple_list:
|
||||
return triple_list[0].get("o", {}).get("i", None)
|
||||
|
||||
if resp.get("complete") or response.get("complete"):
|
||||
break
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f" [debug] exception querying parent: {e}", file=sys.stderr)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def _trace_provenance_chain(ws_url, flow_id, source_uri, collection, label_cache, debug=False):
|
||||
"""
|
||||
Trace the full provenance chain from a source URI up to the root document.
|
||||
Returns a list of (uri, label) tuples from leaf to root.
|
||||
"""
|
||||
chain = []
|
||||
current = source_uri
|
||||
max_depth = 10 # Prevent infinite loops
|
||||
|
||||
for _ in range(max_depth):
|
||||
if not current:
|
||||
break
|
||||
|
||||
# Get label for current entity
|
||||
label = await _query_label(ws_url, flow_id, current, collection, label_cache, debug)
|
||||
chain.append((current, label))
|
||||
|
||||
# Get parent
|
||||
parent = await _query_derived_from(ws_url, flow_id, current, collection, debug)
|
||||
if not parent or parent == current:
|
||||
break
|
||||
current = parent
|
||||
|
||||
return chain
|
||||
|
||||
|
||||
def _format_provenance_chain(chain):
|
||||
"""
|
||||
Format a provenance chain as a human-readable string.
|
||||
Chain is [(uri, label), ...] from leaf to root.
|
||||
"""
|
||||
if not chain:
|
||||
return ""
|
||||
|
||||
# Show labels, from leaf to root
|
||||
labels = [label for uri, label in chain]
|
||||
return " → ".join(labels)
|
||||
|
||||
|
||||
def _is_iri(value):
|
||||
"""Check if a value looks like an IRI."""
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
return value.startswith("http://") or value.startswith("https://") or value.startswith("urn:")
|
||||
|
||||
|
||||
async def _query_label(ws_url, flow_id, iri, collection, label_cache, debug=False):
|
||||
"""
|
||||
Query for the rdfs:label of an IRI.
|
||||
Uses label_cache to avoid repeated queries.
|
||||
Returns the label if found, otherwise returns the IRI.
|
||||
"""
|
||||
if not _is_iri(iri):
|
||||
return iri
|
||||
|
||||
# Check cache first
|
||||
if iri in label_cache:
|
||||
return label_cache[iri]
|
||||
|
||||
request = {
|
||||
"id": "label-request",
|
||||
"service": "triples",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"s": {"t": "i", "i": iri},
|
||||
"p": {"t": "i", "i": RDFS_LABEL},
|
||||
"collection": collection,
|
||||
"limit": 1
|
||||
}
|
||||
}
|
||||
|
||||
label = iri # Default to IRI if no label found
|
||||
try:
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if response.get("id") != "label-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
triple_list = resp.get("response", [])
|
||||
if triple_list:
|
||||
# Get the label value
|
||||
o = triple_list[0].get("o", {})
|
||||
label = o.get("v", o.get("i", iri))
|
||||
|
||||
if resp.get("complete") or response.get("complete"):
|
||||
break
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f" [debug] exception querying label for {iri}: {e}", file=sys.stderr)
|
||||
|
||||
# Cache the result
|
||||
label_cache[iri] = label
|
||||
return label
|
||||
|
||||
|
||||
async def _resolve_edge_labels(ws_url, flow_id, edge_triple, collection, label_cache, debug=False):
|
||||
"""
|
||||
Resolve labels for all IRI components of an edge triple.
|
||||
Returns (s_label, p_label, o_label).
|
||||
"""
|
||||
s = edge_triple.get("s", "?")
|
||||
p = edge_triple.get("p", "?")
|
||||
o = edge_triple.get("o", "?")
|
||||
|
||||
s_label = await _query_label(ws_url, flow_id, s, collection, label_cache, debug)
|
||||
p_label = await _query_label(ws_url, flow_id, p, collection, label_cache, debug)
|
||||
o_label = await _query_label(ws_url, flow_id, o, collection, label_cache, debug)
|
||||
|
||||
return s_label, p_label, o_label
|
||||
|
||||
|
||||
async def _question_explainable(
|
||||
url, flow_id, question, collection, entity_limit, triple_limit,
|
||||
max_subgraph_size, max_path_length, token=None, debug=False
|
||||
):
|
||||
"""Execute graph RAG with explainability - shows provenance events with details"""
|
||||
# Convert HTTP URL to WebSocket URL
|
||||
if url.startswith("http://"):
|
||||
ws_url = url.replace("http://", "ws://", 1)
|
||||
elif url.startswith("https://"):
|
||||
ws_url = url.replace("https://", "wss://", 1)
|
||||
else:
|
||||
ws_url = f"ws://{url}"
|
||||
|
||||
ws_url = f"{ws_url.rstrip('/')}/api/v1/socket"
|
||||
if token:
|
||||
ws_url = f"{ws_url}?token={token}"
|
||||
|
||||
# Cache for label lookups to avoid repeated queries
|
||||
label_cache = {}
|
||||
|
||||
request = {
|
||||
"id": "cli-request",
|
||||
"service": "graph-rag",
|
||||
"flow": flow_id,
|
||||
"request": {
|
||||
"query": question,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-path-length": max_path_length,
|
||||
"streaming": True
|
||||
}
|
||||
}
|
||||
|
||||
async with websockets.connect(ws_url, ping_interval=20, ping_timeout=300) as websocket:
|
||||
await websocket.send(json.dumps(request))
|
||||
|
||||
async for raw_message in websocket:
|
||||
response = json.loads(raw_message)
|
||||
|
||||
if response.get("id") != "cli-request":
|
||||
continue
|
||||
|
||||
if "error" in response:
|
||||
print(f"\nError: {response['error']}", file=sys.stderr)
|
||||
break
|
||||
|
||||
if "response" in response:
|
||||
resp = response["response"]
|
||||
|
||||
# Check for errors in response
|
||||
if "error" in resp and resp["error"]:
|
||||
err = resp["error"]
|
||||
print(f"\nError: {err.get('message', 'Unknown error')}", file=sys.stderr)
|
||||
break
|
||||
|
||||
message_type = resp.get("message_type", "")
|
||||
|
||||
if debug:
|
||||
print(f" [debug] message_type={message_type}, keys={list(resp.keys())}", file=sys.stderr)
|
||||
|
||||
if message_type == "explain":
|
||||
# Display explain event with details
|
||||
explain_id = resp.get("explain_id", "")
|
||||
explain_graph = resp.get("explain_graph") # Named graph (e.g., urn:graph:retrieval)
|
||||
if explain_id:
|
||||
event_type = _get_event_type(explain_id)
|
||||
print(f"\n [{event_type}] {explain_id}", file=sys.stderr)
|
||||
|
||||
# Query triples for this explain node (using named graph filter)
|
||||
triples = await _query_triples(
|
||||
ws_url, flow_id, explain_id, collection, graph=explain_graph, debug=debug
|
||||
)
|
||||
|
||||
# Format and display details
|
||||
details = _format_provenance_details(event_type, triples)
|
||||
for line in details:
|
||||
print(line, file=sys.stderr)
|
||||
|
||||
# For exploration events, resolve entity labels
|
||||
if event_type == "exploration":
|
||||
entity_iris = [o for s, p, o in triples if p == TG_ENTITY]
|
||||
if entity_iris:
|
||||
print(f" Seed entities: {len(entity_iris)}", file=sys.stderr)
|
||||
for iri in entity_iris:
|
||||
label = await _query_label(
|
||||
ws_url, flow_id, iri, collection,
|
||||
label_cache, debug=debug
|
||||
)
|
||||
print(f" - {label}", file=sys.stderr)
|
||||
|
||||
# For focus events, query each edge selection for details
|
||||
if event_type == "focus":
|
||||
for s, p, o in triples:
|
||||
if debug:
|
||||
print(f" [debug] triple: p={p}, o={o}, o_type={type(o).__name__}", file=sys.stderr)
|
||||
if p == TG_SELECTED_EDGE and isinstance(o, str):
|
||||
if debug:
|
||||
print(f" [debug] querying edge selection: {o}", file=sys.stderr)
|
||||
# Query the edge selection entity (using named graph filter)
|
||||
edge_triples = await _query_triples(
|
||||
ws_url, flow_id, o, collection, graph=explain_graph, debug=debug
|
||||
)
|
||||
if debug:
|
||||
print(f" [debug] got {len(edge_triples)} edge triples", file=sys.stderr)
|
||||
# Extract edge and reasoning
|
||||
edge_triple = None # Store the actual triple for provenance lookup
|
||||
reasoning = None
|
||||
for es, ep, eo in edge_triples:
|
||||
if debug:
|
||||
print(f" [debug] edge triple: ep={ep}, eo={eo}", file=sys.stderr)
|
||||
if ep == TG_EDGE and isinstance(eo, dict):
|
||||
# eo is a quoted triple dict
|
||||
edge_triple = eo
|
||||
elif ep == TG_REASONING:
|
||||
reasoning = eo
|
||||
if edge_triple:
|
||||
# Resolve labels for edge components
|
||||
s_label, p_label, o_label = await _resolve_edge_labels(
|
||||
ws_url, flow_id, edge_triple, collection,
|
||||
label_cache, debug=debug
|
||||
)
|
||||
print(f" Edge: ({s_label}, {p_label}, {o_label})", file=sys.stderr)
|
||||
if reasoning:
|
||||
r_short = reasoning[:100] + "..." if len(reasoning) > 100 else reasoning
|
||||
print(f" Reason: {r_short}", file=sys.stderr)
|
||||
|
||||
# Trace edge provenance in the workspace collection (not explainability)
|
||||
if edge_triple:
|
||||
sources = await _query_edge_provenance(
|
||||
ws_url, flow_id,
|
||||
edge_triple.get("s", ""),
|
||||
edge_triple.get("p", ""),
|
||||
edge_triple.get("o", ""),
|
||||
collection, # Use the query collection, not explainability
|
||||
debug=debug
|
||||
)
|
||||
if sources:
|
||||
for src in sources:
|
||||
# Trace full chain from source to root document
|
||||
chain = await _trace_provenance_chain(
|
||||
ws_url, flow_id, src, collection,
|
||||
label_cache, debug=debug
|
||||
)
|
||||
chain_str = _format_provenance_chain(chain)
|
||||
print(f" Source: {chain_str}", file=sys.stderr)
|
||||
|
||||
elif message_type == "chunk" or not message_type:
|
||||
# Display response chunk
|
||||
chunk = resp.get("response", "")
|
||||
if chunk:
|
||||
print(chunk, end="", flush=True)
|
||||
|
||||
# Check if session is complete
|
||||
if resp.get("end_of_session"):
|
||||
break
|
||||
|
||||
print() # Final newline
|
||||
|
||||
|
||||
def _question_explainable_api(
|
||||
url, flow_id, question_text, collection, entity_limit, triple_limit,
|
||||
max_subgraph_size, max_path_length, edge_score_limit=30,
|
||||
|
|
|
|||
|
|
@ -1,709 +1,82 @@
|
|||
"""
|
||||
Loads a PDF document into the library
|
||||
Loads sample documents into the TrustGraph library from bundled package data.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
import datetime
|
||||
import requests
|
||||
from importlib import resources
|
||||
|
||||
from trustgraph.api import Api
|
||||
from trustgraph.api.types import hash, Uri, Literal, Triple
|
||||
from trustgraph.api.types import Uri, Literal, Triple
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.response import HTTPResponse
|
||||
|
||||
class FileAdapter(HTTPAdapter):
|
||||
def send(self, request, *args, **kwargs):
|
||||
resp = HTTPResponse(body=open(request.url[7:], 'rb'), status=200, preload_content=False)
|
||||
return self.build_response(request, resp)
|
||||
|
||||
session = requests.session()
|
||||
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
try:
|
||||
os.mkdir("doc-cache")
|
||||
except:
|
||||
pass
|
||||
|
||||
documents = [
|
||||
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/challenger-report-vol-1",
|
||||
"title": "Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1",
|
||||
"comments": "The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined",
|
||||
"url": "https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf",
|
||||
"kind": "application/pdf",
|
||||
"date": datetime.datetime.now().date(),
|
||||
"tags": ["nasa", "safety-engineering", "space-shuttle"],
|
||||
"metadata": [
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/DigitalDocument")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/copyrightNotice"),
|
||||
o = Literal("Work of the US Gov. Public Use Permitted")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/copyrightHolder"),
|
||||
o = Literal("US Gov.")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/copyrightYear"),
|
||||
o = Literal("1986")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("nasa")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("space-shuttle")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("safety-engineering")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("challenger")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("space-transportation")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/publication"),
|
||||
o = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/PublicationEvent")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
|
||||
p = Uri("https://schema.org/publishedBy"),
|
||||
o = Uri("https://trustgraph.ai/org/nasa")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/nasa"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Organization")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/nasa"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("NASA")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/nasa"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("NASA")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
|
||||
p = Uri("https://schema.org/startDate"),
|
||||
o = Literal("1986-06-06")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
|
||||
p = Uri("https://schema.org/endDate"),
|
||||
o = Literal("1986-06-06")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
|
||||
p = Uri("https://schema.org/url"),
|
||||
o = Uri("https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf")
|
||||
)
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/icelandic-dictionary",
|
||||
"title": "A Concise Dictionary of Old Icelandic",
|
||||
"comments": "A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic.",
|
||||
"url": "https://css4.pub/2015/icelandic/dictionary.pdf",
|
||||
"kind": "application/pdf",
|
||||
"date": datetime.datetime.now().date(),
|
||||
"tags": ["old-icelandic", "dictionary", "language", "grammar", "old-norse", "icelandic"],
|
||||
"metadata": [
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/DigitalDocument")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("A Concise Dictionary of Old Icelandic"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("A Concise Dictionary of Old Icelandic"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic."),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/copyrightNotice"),
|
||||
o = Literal("Copyright expired, public domain")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/copyrightHolder"),
|
||||
o = Literal("Geir Zoëga, Clarendon Press")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/copyrightYear"),
|
||||
o = Literal("1910")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("icelandic")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("old-norse")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("dictionary")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("grammar")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("old-icelandic")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/publication"),
|
||||
o = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/PublicationEvent")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("Published by Clarendon Press in 1910"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
|
||||
p = Uri("https://schema.org/publishedBy"),
|
||||
o = Uri("https://trustgraph.ai/org/clarendon-press")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/clarendon-press"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Organization")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/clarendon-press"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("NASA")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/clarendon-press"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Clarendon Press")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
|
||||
p = Uri("https://schema.org/startDate"),
|
||||
o = Literal("1910-01-01")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
|
||||
p = Uri("https://schema.org/endDate"),
|
||||
o = Literal("1910-01-01")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
|
||||
p = Uri("https://schema.org/url"),
|
||||
o = Uri("https://digital-research-books-beta.nypl.org/edition/10476341")
|
||||
)
|
||||
]
|
||||
},
|
||||
SAMPLE_DOCS_PACKAGE = "trustgraph.cli.sample_documents"
|
||||
|
||||
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025",
|
||||
"title": "Annual threat assessment of the U.S. intelligence community - March 2025",
|
||||
"comments": "The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and America’s interests anywhere in the world.",
|
||||
"url": "https://www.intelligence.senate.gov/sites/default/files/2025%20Annual%20Threat%20Assessment%20of%20the%20U.S.%20Intelligence%20Community.pdf",
|
||||
"kind": "application/pdf",
|
||||
"date": datetime.datetime.now().date(),
|
||||
"tags": ["adversary-cooperation", "cyberthreats", "supply-chain-vulnerabilities", "economic-competition", "national-security", "data-privacy"],
|
||||
"metadata": [
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/DigitalDocument")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and America’s interests anywhere in the world."),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/copyrightNotice"),
|
||||
o = Literal("Not copyright")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/copyrightHolder"),
|
||||
o = Literal("US Government")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/copyrightYear"),
|
||||
o = Literal("2025")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("adversary-cooperation")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("cyberthreats")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("supply-chain-vulnerabilities")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("economic-competition")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("national-security")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/publication"),
|
||||
o = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/PublicationEvent")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("Published by the Director of National Intelligence (DNI)"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
|
||||
p = Uri("https://schema.org/publishedBy"),
|
||||
o = Uri("https://trustgraph.ai/org/us-gov-dni")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Organization")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("The Director of National Intelligence")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("The Director of National Intelligence")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
|
||||
p = Uri("https://schema.org/startDate"),
|
||||
o = Literal("2025-03-18")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
|
||||
p = Uri("https://schema.org/endDate"),
|
||||
o = Literal("2025-03-18")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
|
||||
p = Uri("https://schema.org/url"),
|
||||
o = Uri("https://www.dni.gov/index.php/newsroom/reports-publications/reports-publications-2025/4058-2025-annual-threat-assessment")
|
||||
)
|
||||
]
|
||||
},
|
||||
def get_data_path():
|
||||
return resources.files(SAMPLE_DOCS_PACKAGE)
|
||||
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/intelligence-and-state",
|
||||
"title": "The Role of Intelligence and State Policies in International Security",
|
||||
"comments": "A volume by Mehmet Emin Erendor, published by Cambridge Scholars Publishing (2021). It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.",
|
||||
"url": "https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf",
|
||||
"kind": "application/pdf",
|
||||
"date": "2025-05-06",
|
||||
"tags": ["intelligence", "state-policy", "international-security", "national-security", "geopolitics", "foreign-policy", "security-studies", "military", "crime"],
|
||||
"metadata": [
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Book")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("The Role of Intelligence and State Policies in International Security")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("The Role of Intelligence and State Policies in International Security")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("A volume by Mehmet Emin Erendor. It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/author"),
|
||||
o = Literal("Mehmet Emin Erendor")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/isbn"),
|
||||
o = Literal("9781527576049")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/numberOfPages"),
|
||||
o = Literal("220")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("intelligence")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("state policy")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("international security")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("national security")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("geopolitics")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/publication"),
|
||||
o = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/PublicationEvent")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("Published by Cambridge Scholars Publishing on October 28, 2021.")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
|
||||
p = Uri("https://schema.org/publishedBy"),
|
||||
o = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Organization")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("Cambridge Scholars Publishing")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Cambridge Scholars Publishing")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
|
||||
p = Uri("https://schema.org/startDate"),
|
||||
o = Literal("2021-10-28")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
|
||||
p = Uri("https://schema.org/url"),
|
||||
o = Uri("https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf")
|
||||
)
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/beyond-vigilant-state",
|
||||
"title": "Beyond the vigilant state: globalisation and intelligence",
|
||||
"comments": "This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era.",
|
||||
"url": "https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf",
|
||||
"kind": "application/pdf",
|
||||
"date": datetime.datetime.now().date(),
|
||||
"tags": ["intelligence", "globalization", "security-studies", "surveillance", "international-relations", "post-cold-war"],
|
||||
"metadata": [
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/ScholarlyArticle")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("Beyond the vigilant state: globalisation and intelligence"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Beyond the vigilant state: globalisation and intelligence"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era."),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/copyrightNotice"),
|
||||
o = Literal("(c) British International Studies Association")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/copyrightHolder"),
|
||||
o = Literal("British International Studies Association")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/author"),
|
||||
o = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Person")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("Richard J. Aldrich")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("Richard J. Aldrich")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("intelligence")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("globalisation")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("security-studies")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("surveillance")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("international-relations")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/keywords"),
|
||||
o = Literal("post-cold-war")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/publication"),
|
||||
o = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/PublicationEvent")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
|
||||
p = Uri("https://schema.org/description"),
|
||||
o = Literal("Published in Review of International Studies"),
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
|
||||
p = Uri("https://schema.org/publishedBy"),
|
||||
o = Uri("https://trustgraph.ai/org/british-international-studies-association")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
|
||||
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
o = Uri("https://schema.org/Organization")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
|
||||
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
|
||||
o = Literal("British International Studies Association")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
|
||||
p = Uri("https://schema.org/name"),
|
||||
o = Literal("British International Studies Association")
|
||||
),
|
||||
Triple(
|
||||
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
|
||||
p = Uri("https://schema.org/url"),
|
||||
o = Uri("https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf")
|
||||
)
|
||||
]
|
||||
}
|
||||
def load_metadata():
|
||||
data_path = get_data_path()
|
||||
metadata_file = data_path / "metadata.json"
|
||||
return json.loads(metadata_file.read_text(encoding="utf-8"))
|
||||
|
||||
]
|
||||
|
||||
class Loader:
|
||||
def convert_value(v):
|
||||
if v["type"] == "uri":
|
||||
return Uri(v["value"])
|
||||
else:
|
||||
return Literal(v["value"])
|
||||
|
||||
def __init__(
|
||||
self, url, token=None, workspace="default",
|
||||
):
|
||||
|
||||
self.api = Api(url, token=token, workspace=workspace).library()
|
||||
def convert_metadata(metadata_json):
|
||||
triples = []
|
||||
for t in metadata_json:
|
||||
triples.append(Triple(
|
||||
s=convert_value(t["s"]),
|
||||
p=convert_value(t["p"]),
|
||||
o=convert_value(t["o"]),
|
||||
))
|
||||
return triples
|
||||
|
||||
def load(self, documents):
|
||||
|
||||
for doc in documents:
|
||||
self.load_doc(doc)
|
||||
def load_document(api, doc_entry, data_path):
|
||||
|
||||
def load_doc(self, doc):
|
||||
doc_id = doc_entry["id"]
|
||||
title = doc_entry["title"]
|
||||
filename = doc_entry["file"]
|
||||
|
||||
try:
|
||||
print(f" [{filename}] {title}")
|
||||
|
||||
print(doc["title"], ":")
|
||||
print(f" reading content...")
|
||||
content_file = data_path / filename
|
||||
content = content_file.read_bytes()
|
||||
|
||||
hid = hash(doc["url"])
|
||||
cache_file = f"doc-cache/{hid}"
|
||||
print(f" loading into TrustGraph ({len(content) // 1024}KB)...")
|
||||
metadata = convert_metadata(doc_entry["metadata"])
|
||||
|
||||
if os.path.isfile(cache_file):
|
||||
print(" (use cache file)")
|
||||
content = open(cache_file, "rb").read()
|
||||
else:
|
||||
print(" downloading...")
|
||||
resp = session.get(doc["url"])
|
||||
content = resp.content
|
||||
open(cache_file, "wb").write(content)
|
||||
print(" done.")
|
||||
api.add_document(
|
||||
id=doc_id,
|
||||
metadata=metadata,
|
||||
kind=doc_entry["kind"],
|
||||
title=title,
|
||||
comments=doc_entry["comments"],
|
||||
tags=doc_entry["tags"],
|
||||
document=content,
|
||||
)
|
||||
|
||||
print(" adding...")
|
||||
print(f" done.")
|
||||
|
||||
self.api.add_document(
|
||||
id=doc["id"], metadata=doc["metadata"],
|
||||
kind=doc["kind"], title=doc["title"],
|
||||
comments=doc["comments"], tags=doc["tags"],
|
||||
document=content,
|
||||
)
|
||||
|
||||
print(" successful.")
|
||||
|
||||
except Exception as e:
|
||||
print("Failed: {str(e)}", flush=True)
|
||||
raise e
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-add-library-document',
|
||||
prog='tg-load-sample-documents',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
|
|
@ -729,18 +102,27 @@ def main():
|
|||
|
||||
try:
|
||||
|
||||
p = Loader(
|
||||
url=args.url,
|
||||
token=args.token,
|
||||
workspace=args.workspace,
|
||||
)
|
||||
api = Api(args.url, token=args.token, workspace=args.workspace)
|
||||
library = api.library()
|
||||
|
||||
p.load(documents)
|
||||
data_path = get_data_path()
|
||||
documents = load_metadata()
|
||||
|
||||
print(f"Loading {len(documents)} sample document(s)...\n")
|
||||
|
||||
for doc in documents:
|
||||
try:
|
||||
load_document(library, doc, data_path)
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
print()
|
||||
|
||||
print("Complete.")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print(f"Exception: {e}")
|
||||
raise e
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
|||
119
trustgraph-cli/trustgraph/cli/put_de_core.py
Normal file
119
trustgraph-cli/trustgraph/cli/put_de_core.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""
|
||||
Puts a document embeddings core into the knowledge manager via the API
|
||||
socket.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import msgpack
|
||||
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
def read_message(unpacked, id):
|
||||
|
||||
if unpacked[0] == "de":
|
||||
msg = unpacked[1]
|
||||
return {
|
||||
"metadata": {
|
||||
"id": id,
|
||||
"root": msg["m"]["m"],
|
||||
"collection": "default",
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch["i"],
|
||||
"vector": ch["v"],
|
||||
}
|
||||
for ch in msg["c"]
|
||||
],
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("Unexpected message type", unpacked[0])
|
||||
|
||||
def put(url, workspace, id, input, token=None):
|
||||
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
socket = api.socket()
|
||||
|
||||
try:
|
||||
de = 0
|
||||
|
||||
with open(input, "rb") as f:
|
||||
|
||||
unpacker = msgpack.Unpacker(f, raw=False)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
unpacked = unpacker.unpack()
|
||||
except msgpack.OutOfData:
|
||||
break
|
||||
|
||||
msg = read_message(unpacked, id)
|
||||
de += 1
|
||||
socket.put_de_core(id, document_embeddings=msg)
|
||||
|
||||
print(f"Put: {de} document embeddings messages.")
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-put-de-core',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-u', '--url',
|
||||
default=default_url,
|
||||
help=f'API URL (default: {default_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-w', '--workspace',
|
||||
default=default_workspace,
|
||||
help=f'Workspace (default: {default_workspace})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--id', '--identifier',
|
||||
required=True,
|
||||
help=f'Document embeddings core ID',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input',
|
||||
required=True,
|
||||
help=f'Input file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--token',
|
||||
default=default_token,
|
||||
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
put(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
input=args.input,
|
||||
token=args.token,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -4,13 +4,11 @@ Puts a knowledge core into the knowledge manager via the API socket.
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import uuid
|
||||
import asyncio
|
||||
import json
|
||||
from websockets.asyncio.client import connect
|
||||
import msgpack
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
|
|
@ -21,13 +19,13 @@ def read_message(unpacked, id):
|
|||
return "ge", {
|
||||
"metadata": {
|
||||
"id": id,
|
||||
"metadata": msg["m"]["m"],
|
||||
"collection": "default", # Not used?
|
||||
"root": msg["m"]["m"],
|
||||
"collection": "default",
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": ent["e"],
|
||||
"vectors": ent["v"],
|
||||
"vector": ent["v"],
|
||||
}
|
||||
for ent in msg["e"]
|
||||
],
|
||||
|
|
@ -37,26 +35,20 @@ def read_message(unpacked, id):
|
|||
return "t", {
|
||||
"metadata": {
|
||||
"id": id,
|
||||
"metadata": msg["m"]["m"],
|
||||
"collection": "default", # Not used by receiver?
|
||||
"root": msg["m"]["m"],
|
||||
"collection": "default",
|
||||
},
|
||||
"triples": msg["t"],
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("Unpacked unexpected messsage type", unpacked[0])
|
||||
|
||||
async def put(url, workspace, id, input, token=None):
|
||||
def put(url, workspace, id, input, token=None):
|
||||
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
|
||||
url = url + "api/v1/socket"
|
||||
|
||||
if token:
|
||||
url = f"{url}?token={token}"
|
||||
|
||||
async with connect(url) as ws:
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
socket = api.socket()
|
||||
|
||||
try:
|
||||
ge = 0
|
||||
t = 0
|
||||
|
||||
|
|
@ -68,69 +60,26 @@ async def put(url, workspace, id, input, token=None):
|
|||
|
||||
try:
|
||||
unpacked = unpacker.unpack()
|
||||
except:
|
||||
except msgpack.OutOfData:
|
||||
break
|
||||
|
||||
kind, msg = read_message(unpacked, id)
|
||||
|
||||
mid = str(uuid.uuid4())
|
||||
|
||||
if kind == "ge":
|
||||
|
||||
ge += 1
|
||||
|
||||
req = json.dumps({
|
||||
"id": mid,
|
||||
"workspace": workspace,
|
||||
"service": "knowledge",
|
||||
"request": {
|
||||
"operation": "put-kg-core",
|
||||
"workspace": workspace,
|
||||
"id": id,
|
||||
"graph-embeddings": msg
|
||||
}
|
||||
})
|
||||
socket.put_kg_core(id, graph_embeddings=msg)
|
||||
|
||||
elif kind == "t":
|
||||
|
||||
t += 1
|
||||
|
||||
req = json.dumps({
|
||||
"id": mid,
|
||||
"workspace": workspace,
|
||||
"service": "knowledge",
|
||||
"request": {
|
||||
"operation": "put-kg-core",
|
||||
"workspace": workspace,
|
||||
"id": id,
|
||||
"triples": msg
|
||||
}
|
||||
})
|
||||
socket.put_kg_core(id, triples=msg)
|
||||
|
||||
else:
|
||||
|
||||
raise RuntimeError("Unexpected message kind", kind)
|
||||
|
||||
await ws.send(req)
|
||||
|
||||
# Retry loop, wait for right response to come back
|
||||
while True:
|
||||
|
||||
msg = await ws.recv()
|
||||
msg = json.loads(msg)
|
||||
|
||||
if msg["id"] != mid:
|
||||
continue
|
||||
|
||||
if "response" in msg:
|
||||
if "error" in msg["response"]:
|
||||
raise RuntimeError(msg["response"]["error"])
|
||||
|
||||
break
|
||||
|
||||
print(f"Put: {t} triple, {ge} GE messages.")
|
||||
|
||||
await ws.close()
|
||||
finally:
|
||||
socket.close()
|
||||
|
||||
def main():
|
||||
|
||||
|
|
@ -173,14 +122,12 @@ def main():
|
|||
|
||||
try:
|
||||
|
||||
asyncio.run(
|
||||
put(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
input=args.input,
|
||||
token=args.token,
|
||||
)
|
||||
put(
|
||||
url=args.url,
|
||||
workspace=args.workspace,
|
||||
id=args.id,
|
||||
input=args.input,
|
||||
token=args.token,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
# The Brewing Traditions of Belgium
|
||||
|
||||
## Section 1: The Fermentation Foundations (Yeasts & Styles)
|
||||
|
||||
### Component A: The Ardennes Isolation Strain
|
||||
|
||||
- **Alternative Names:** Strain-71, The Trappist Yeast, Wallonian Culture.
|
||||
- **Origin:** Historically isolated in the rugged forests of the Ardennes region, shared via mutual trade agreements between various monastic brewing sites.
|
||||
- **Characteristics:** A high-attenuation, top-fermenting yeast culture (*Saccharomyces cerevisiae*) that thrives at elevated temperatures (20°C to 26°C). It produces heavy volatile esters reminiscent of clove, banana, and white pepper.
|
||||
- **Downstream Dependencies:** Essential biological engine required to brew The Westvleteren Quad and The Chimay Grand Réserve.
|
||||
|
||||
### Component B: Brettanomyces Bruxellensis
|
||||
|
||||
- **Alternative Names:** Wild Yeast, The Brussels Funk, Pajottenland Air.
|
||||
- **Origin:** Indigenous entirely to the Senne River Valley and the surrounding Pajottenland region southwest of Brussels. It cannot be cultured in a standard laboratory setting for primary fermentation; it must be caught ambiently from the atmosphere.
|
||||
- **Characteristics:** A slow-acting, wild yeast strain that consumes complex sugars that standard yeasts cannot digest. It introduces lactic acid and earthy, "barnyard" aroma characteristics over a 1 to 3-year aging cycle.
|
||||
- **Downstream Dependencies:** Biological prerequisite for Oude Geuze and The Flemish Red Ale.
|
||||
|
||||
## Section 2: Monastic & Trappist Hierarchies (Appellation Controlled)
|
||||
|
||||
### Beer 1: The Westvleteren Quad
|
||||
|
||||
- **Alternative Names:** Westvleteren 12, The Yellow Cap.
|
||||
- **Origin:** Brewed exclusively inside the walls of the Abbey of Saint-Sixtus in Westvleteren, Flanders. Holds the strict "Authentic Trappist Product" (ATP) legal designation.
|
||||
- **Ingredients:** The Ardennes Isolation Strain, local soft water, pale malt, dark liquid candi sugar (sucrose solution), and Northern Brewer hops.
|
||||
- **Process:** Primary fermentation utilizing the Ardennes strain for 7 days. Afterward, dark candi sugar is injected into the green beer to trigger a secondary fermentation stage. Crucially, the beer is bottled completely unfiltered with active yeast cells, requiring a mandatory 3-month cellar conditioning period to carbonate inside the bottle.
|
||||
|
||||
### Beer 2: The Chimay Grand Réserve
|
||||
|
||||
- **Alternative Names:** Chimay Blue, The Grande Réserve.
|
||||
- **Origin:** Brewed inside the Scourmont Abbey in Hainaut, Wallonia. Also carries the ATP designation.
|
||||
- **Ingredients:** The Ardennes Isolation Strain, estate-drawn well water, malted barley, Hallertau Mittelfrüh hops, and caramelized sugar.
|
||||
- **Process:** Follows a parallel fermentation profile to the Westvleteren Quad, using the exact same ancestral yeast strain but utilizing a different mineral profile in the water, resulting in a drier, more dark-fruit-forward profile.
|
||||
|
||||
## Section 3: Spontaneous & Sour Traditions (Wild Ecosystems)
|
||||
|
||||
### Beer 3: Oude Geuze
|
||||
|
||||
- **Alternative Names:** The Champagne of Belgium, Brussels Lambic.
|
||||
- **Origin:** The Pajottenland region. It is legally protected; it cannot be called "Oude Geuze" unless it is spontaneously fermented by the regional air.
|
||||
- **Ingredients:** Unmalted wheat (30%), Pale barley malt (70%), aged "suranné" hops (which lose their bitterness but retain preservative qualities), and ambient Brettanomyces Bruxellensis.
|
||||
- **Process:** Boiling wort is pumped into an open-air shallow vessel called a "coolship" overnight to cool down, absorbing wild microbes from the Senne Valley breeze.
|
||||
|
||||
**The Blending Protocol Dispute (Critical Logic Test):**
|
||||
|
||||
- **The Traditionalist Assembly:** A true Oude Geuze is a blend of 1-year-old young lambic (which provides active sugars) and 3-year-old vintage lambic (which provides complex sourness).
|
||||
- **The Industrial Controversy:** Some macro-breweries pasteurize the blend and inject artificial sweeteners (aspartame) to neutralize the sourness for commercial appeal. Traditionalists argue this strips the product of its geographic identity and violates the "Oude" (Old) designation.
|
||||
|
||||
### Beer 4: The Flemish Red Ale
|
||||
|
||||
- **Alternative Names:** Rodenbach style, West-Flemish Sour.
|
||||
- **Origin:** Roeselare, West Flanders.
|
||||
- **Ingredients:** Red-kilned malts, aged hops, standard top-fermenting yeast, and a secondary inoculation of Brettanomyces Bruxellensis.
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,13 @@
|
|||
# The Domestic Canopy: A Unified Narrative of Companionship
|
||||
|
||||
The story of the human-animal bond begins not with a conscious decision to breed a companion, but with an ancient, mutual opportunism in the frozen wastes of the late Pleistocene. Long before the advent of agriculture, the gray wolf (*Canis lupus*) began to separate from its wild packs, drawn to the peripheral campfires of Eurasian hunter-gatherers. These ancestral canids, which would morph over millennia into the domesticated dog (*Canis lupus familiaris*), offered early humans an unparalleled early-warning system against apex predators and an invaluable partner in the persistence hunt. In return, humans provided a steady supply of megafauna marrow, cooked gristle, and proximity to warmth. This biological pact was so profound that it transcended mere utility, as evidenced by the late Paleolithic Natufian burial sites in the Levant, where human skeletons were interred with their hands resting gently upon the ribcages of wolf pups, marking the earliest archaeological signature of the transition from working tool to sentimental proxy.
|
||||
|
||||
As the ice sheets retreated and humanity anchored itself to the soil during the Neolithic Revolution, the nature of animal companionship shifted dramatically, giving rise to an entirely different ecological dynamic in the Fertile Crescent. The rise of grain storehouses in ancient Egypt attracted unprecedented swarms of rodents, creating a pristine ecological niche for the North African wildcat (*Felis lybica*). Unlike the highly structured social hierarchy of the wolf, the cat domesticated itself on terms of aloof independence, transitioning from a tolerated pest-control mechanism to a revered icon of divine protection. By the time of the Egyptian Middle Kingdom, cats were so thoroughly integrated into the domestic fabric that they were granted formal mourning rites; Roman historians like Herodotus noted that when a house cat died of natural causes, the entire human household would shave their eyebrows as a public manifestation of grief. These felines were often mummified using the same costly natron resins reserved for the nobility and entombed in specialized necropolises like Bubastis, dedicated to the feline-headed deity Bastet, effectively blending religious cosmology with domestic affection.
|
||||
|
||||
Parallel developments were unfolding across the globe, creating distinct regional pockets of companionship that would later collide through imperial trade. In the Andean highlands of South America, the Incas domesticated the guinea pig (*Cavia porcellus*), known locally as the cuy. While primarily a source of protein and a diagnostic tool used by folk healers to absorb illness from the sick, select lineages were kept by children as cherished house-dwellers. Meanwhile, in the imperial courts of Han Dynasty China, a parallel phenomenon saw the intensive breeding of the Pekingese dog. These small, flat-faced canids were selectively bred to resemble miniature lions — the mythical protectors of Buddhism — and were guarded so fiercely within the walls of the Forbidden City that stealing one was punishable by death. They lived a life of pampered luxury, carried in the sleeves of silk robes and tended to by dedicated eunuchs, establishing an early historical precedent where certain animal breeds functioned strictly as status symbols and manifestations of political sovereignty rather than utilitarian workers.
|
||||
|
||||
The classical antiquity of Europe further complicated this tapestry, as the Roman elite integrated exoticism into their definition of the domestic sphere. Roman matrons frequently kept ring-necked parakeets (*Psittacula krameri*) imported from the conquests of India, housing them in elaborate cages of ivory and silver, and teaching them to speak the name of the Emperor. Concurrently, the Roman fondness for the ferret (*Mustela furo*) emerged as a dual-purpose phenomenon; these mustelids were kept both to flush rabbits from agricultural burrows and as slinky, playful companions within the villa. This Roman domestic ecosystem was heavily documented by Pliny the Elder in his Natural History, where he noted that the elite often developed deep, seemingly irrational emotional attachments to their companion animals, including pet fish like the moray eel, which the orator Hortensius reportedly wept over when it died in his private ornamental pond.
|
||||
|
||||
The medieval period in Europe introduced a sharp class divide to the concept of the pet, often viewed through the suspicious lens of ecclesiastical authority. While the peasantry kept functional yard dogs and barn cats, the high nobility — particularly noblewomen and monastic figures — indulged in the keeping of lapdogs, such as the early Maltese, and refined birds of prey. These lapdogs were often criticized by conservative church theologians who argued that the excessive meat fed to pampered pets belonged in the mouths of the starving peasantry. Furthermore, during the height of the European witch trials, the domestic pet — particularly the black cat, the toad, or the ferret — was frequently demonized by inquisitors as a "familiar," a physical vessel housing a demonic spirit. This created a perilous cultural paradox where an animal could be viewed as a comforting hearth-companion in one household and an existential piece of heretical evidence in another.
|
||||
|
||||
The modern concept of pet-keeping as a universal consumer phenomenon crystallized during the Industrial Revolution and the rise of the Victorian middle class. As populations migrated from rural farms to dense urban centers, the severed connection to nature triggered a romanticized counter-movement. The Victorians elevated the domestic home into a moral sanctuary, and the pet was introduced as a pedagogical tool to teach children empathy, kindness, and middle-class domestic virtues. This era saw the birth of the commercial pet industry: standard kibble formulations were patented by James Spratt in the 1860s, dog shows like Crufts were established to formalize breed standards, and specialized pet cemeteries, like the one in London's Hyde Park, emerged to afford animals a dignified transition into the afterlife. The pet was no longer a working asset or an eccentric luxury of the aristocratic elite; it had become an institutionalized member of the nuclear family unit, setting the stage for the hyper-commodified, emotionally complex multi-billion dollar pet industry of the contemporary era.
|
||||
527
trustgraph-cli/trustgraph/cli/sample_documents/metadata.json
Normal file
527
trustgraph-cli/trustgraph/cli/sample_documents/metadata.json
Normal file
|
|
@ -0,0 +1,527 @@
|
|||
[
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/west-country-recipes",
|
||||
"title": "The Foundations of West Country Cooking",
|
||||
"comments": "A structured guide to traditional West Country recipes including clotted cream, scones, hedgerow jam, the cream tea, Cornish pasty, homity pie, and Devonshire fudge, with regional supply chain dependencies.",
|
||||
"file": "recipes.md",
|
||||
"kind": "text/markdown",
|
||||
"tags": ["cooking", "west-country", "devon", "cornwall", "recipes", "food-history"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "The Foundations of West Country Cooking"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "The Foundations of West Country Cooking"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "A structured guide to traditional West Country recipes including clotted cream, scones, hedgerow jam, the cream tea, Cornish pasty, homity pie, and Devonshire fudge, with regional supply chain dependencies."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "cooking"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "west-country"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "devon"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "cornwall"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "recipes"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/west-country-recipes"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "food-history"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/belgian-beer",
|
||||
"title": "The Brewing Traditions of Belgium",
|
||||
"comments": "An exploration of Belgian brewing traditions covering Trappist yeasts, wild fermentation, monastic beers including Westvleteren and Chimay, spontaneous sour traditions like Oude Geuze and Flemish Red Ale.",
|
||||
"file": "belgian-beer.md",
|
||||
"kind": "text/markdown",
|
||||
"tags": ["brewing", "belgium", "beer", "trappist", "fermentation", "food-history"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "The Brewing Traditions of Belgium"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "The Brewing Traditions of Belgium"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "An exploration of Belgian brewing traditions covering Trappist yeasts, wild fermentation, monastic beers including Westvleteren and Chimay, spontaneous sour traditions like Oude Geuze and Flemish Red Ale."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "brewing"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "belgium"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "beer"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "trappist"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "fermentation"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/belgian-beer"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "food-history"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/trade-routes-europe",
|
||||
"title": "Traditional Trade Routes of Pre-Modern Europe",
|
||||
"comments": "A structured overview of pre-modern European trade networks including the Hanseatic Baltic Route and Venetian Maritime Route, hub cities like Bruges, Lübeck, and Constantinople, specialized commodities, and geopolitical chokepoints.",
|
||||
"file": "trade-routes-europe.md",
|
||||
"kind": "text/markdown",
|
||||
"tags": ["trade", "medieval", "europe", "hanseatic-league", "venice", "economic-history"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "Traditional Trade Routes of Pre-Modern Europe"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "Traditional Trade Routes of Pre-Modern Europe"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "A structured overview of pre-modern European trade networks including the Hanseatic Baltic Route and Venetian Maritime Route, hub cities like Bruges, Lübeck, and Constantinople, specialized commodities, and geopolitical chokepoints."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "trade"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "medieval"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "europe"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "hanseatic-league"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "venice"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/trade-routes-europe"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "economic-history"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/corporate-scandals",
|
||||
"title": "Global Corporate Fraud & Governance Failures",
|
||||
"comments": "Detailed analysis of major corporate fraud cases including Enron and Wirecard, covering the financial engineering mechanisms, key executives, audit failures, whistleblowers, and collapse sequences.",
|
||||
"file": "corporate-scandals.pdf",
|
||||
"kind": "application/pdf",
|
||||
"tags": ["corporate-fraud", "enron", "wirecard", "accounting", "governance", "financial-crime"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "Global Corporate Fraud & Governance Failures"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "Global Corporate Fraud & Governance Failures"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "Detailed analysis of major corporate fraud cases including Enron and Wirecard, covering the financial engineering mechanisms, key executives, audit failures, whistleblowers, and collapse sequences."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "corporate-fraud"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "enron"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "wirecard"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "accounting"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "governance"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/corporate-scandals"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "financial-crime"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/history-of-pets",
|
||||
"title": "The Domestic Canopy: A Unified Narrative of Companionship",
|
||||
"comments": "A narrative history of human-animal companionship from Pleistocene wolf domestication through Egyptian cat worship, Roman exotic pets, medieval class divides, to the Victorian birth of the commercial pet industry.",
|
||||
"file": "history-of-pets.md",
|
||||
"kind": "text/markdown",
|
||||
"tags": ["pets", "domestication", "animal-history", "cultural-history", "dogs", "cats"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "The Domestic Canopy: A Unified Narrative of Companionship"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "The Domestic Canopy: A Unified Narrative of Companionship"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "A narrative history of human-animal companionship from Pleistocene wolf domestication through Egyptian cat worship, Roman exotic pets, medieval class divides, to the Victorian birth of the commercial pet industry."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "pets"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "domestication"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "animal-history"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "cultural-history"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "dogs"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/history-of-pets"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "cats"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c",
|
||||
"title": "Military Fortifications in 19th Century America",
|
||||
"comments": "The evolution of American military fortification from the Third System masonry forts through Civil War obsolescence to earthwork defenses, plus the parallel development of frontier forts for westward expansion.",
|
||||
"file": "mil-fortifications-america-19th-c.md",
|
||||
"kind": "text/markdown",
|
||||
"tags": ["military-history", "fortifications", "civil-war", "engineering", "american-history", "coastal-defense"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "Military Fortifications in 19th Century America"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "Military Fortifications in 19th Century America"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "The evolution of American military fortification from the Third System masonry forts through Civil War obsolescence to earthwork defenses, plus the parallel development of frontier forts for westward expansion."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "military-history"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "fortifications"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "civil-war"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "engineering"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "american-history"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/mil-fortifications-america-19th-c"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "coastal-defense"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://trustgraph.ai/doc/bronze-age-collapse",
|
||||
"title": "Echoes of the Void: The Late Bronze Age Collapse",
|
||||
"comments": "A synthesis of the Late Bronze Age Collapse (c. 1200-1150 BCE) covering the interconnected trade networks, the cascade of failure from drought through the Sea Peoples to internal revolt, archaeological evidence, and the transition to the Iron Age.",
|
||||
"file": "bronze-age-collapse.pdf",
|
||||
"kind": "application/pdf",
|
||||
"tags": ["bronze-age", "ancient-history", "archaeology", "mediterranean", "trade", "collapse"],
|
||||
"metadata": [
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"},
|
||||
"o": {"type": "uri", "value": "https://schema.org/DigitalDocument"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "http://www.w3.org/2000/01/rdf-schema#label"},
|
||||
"o": {"type": "literal", "value": "Echoes of the Void: The Late Bronze Age Collapse"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/name"},
|
||||
"o": {"type": "literal", "value": "Echoes of the Void: The Late Bronze Age Collapse"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/description"},
|
||||
"o": {"type": "literal", "value": "A synthesis of the Late Bronze Age Collapse (c. 1200-1150 BCE) covering the interconnected trade networks, the cascade of failure from drought through the Sea Peoples to internal revolt, archaeological evidence, and the transition to the Iron Age."}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightNotice"},
|
||||
"o": {"type": "literal", "value": "Original content, public domain"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightHolder"},
|
||||
"o": {"type": "literal", "value": "TrustGraph AI"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/copyrightYear"},
|
||||
"o": {"type": "literal", "value": "2025"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "bronze-age"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "ancient-history"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "archaeology"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "mediterranean"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "trade"}
|
||||
},
|
||||
{
|
||||
"s": {"type": "uri", "value": "https://trustgraph.ai/doc/bronze-age-collapse"},
|
||||
"p": {"type": "uri", "value": "https://schema.org/keywords"},
|
||||
"o": {"type": "literal", "value": "collapse"}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
# Military Fortifications in 19th Century America
|
||||
|
||||
The evolution of coastal and frontier defense across North America during the nineteenth century reflects a turbulent transition from traditional European masonry concepts to the brutal realities of industrialized warfare. At the dawn of the century, the young United States found its sprawling coastline dangerously exposed to the naval might of European empires. In response, the federal government embarked on a massive, highly centralized building program known as the Third System of fortifications. Orchestrated primarily by the newly formed U.S. Army Corps of Engineers and heavily influenced by French military engineer Simon Bernard, this system sought to seal off strategic harbors, naval shipyards, and commercial estuaries from maritime invasion. These fortifications were characterized by massive, multi-tiered masonry walls constructed of brick and stone, designed to mount several tiers of heavy cannon firing through vaulted casemates, thereby concentrating overwhelming firepower against hostile warships.
|
||||
|
||||
The architectural pinnacle of this philosophy was realized in structures like Fort Jefferson, situated on a remote key in the Dry Tortugas of the Gulf of Mexico, and Fort Sumter, guarding the entrance to Charleston Harbor. These fortresses were essentially artificial islands of masonry, featuring complex geometric designs — often pentagonal or hexagonal — to eliminate dead angles where an enemy could seek shelter from the garrison's fire. The walls were constructed using millions of locally fired bricks, backed by concrete and earth, creating a dense barrier meant to absorb the impact of smoothbore solid shot. Within these structures, the invention of the Totten shutter — a pair of iron doors that automatically closed over the cannon embrasure after firing — protected the artillerists from incoming musket fire and grape shot. For the first half of the century, these towering masonry sentinels were considered functionally impregnable to naval assault, as wooden warships could rarely sustain the prolonged, concentrated bombardment required to breach such thick brick facades.
|
||||
|
||||
However, the catastrophic vulnerabilities of the Third System were violently exposed during the American Civil War, rendering traditional masonry fortifications obsolete almost overnight. The catalyst for this military revolution was the introduction of the rifled cannon, most notably the James and Parrott rifles. Unlike the round, smoothbore cannonballs that shattered against brickwork with diminishing effect, elongated rifled projectiles spun like rifle bullets, striking the masonry with immense kinetic energy and drilling into the brickwork with a devastating, jackhammer-like effect. This paradigm shift was demonstrated at the Siege of Fort Pulaski near Savannah, Georgia, in April 1862. Union forces stationed on Tybee Island opened fire with rifled artillery from a distance of over a mile — a range previously considered entirely safe by the fort's defenders. Within thirty hours, the rifled shells tore through the massive brick walls of Fort Pulaski, breaching the solid masonry and threatening to ignite the fort's main powder magazine, forcing an immediate surrender.
|
||||
|
||||
This sudden obsolescence forced military engineers to radically re-evaluate defensive architecture, pivoting away from vertical masonry toward low-profile earthworks and subterranean engineering. It was discovered that simple mounds of loose sand and compacted earth absorbed the impact of rifled shells far better than rigid brick, as the displaced soil naturally filled in the craters left by explosions. This gave rise to formidable improvisations like Fort Fisher in North Carolina, often dubbed the "Malachite of America." Fort Fisher was an immense earthen stronghold constructed of sand face-traverses and underground bombproofs, which successfully withstood the largest naval bombardments of the war because the Union fleet's shells merely rearranged the sand rather than shattering a structural foundation. Consequently, post-Civil War modifications to coastal defense saw engineers cutting down the towering brick walls of older forts, burying them under massive earth glacis, and preparing the way for the Endicott System at the end of the century, which utilized reinforced concrete, low-profile designs, and disappearing guns.
|
||||
|
||||
Concurrently, a completely different doctrine of military architecture was unfolding along the interior frontiers of the continent, where the purpose of fortification was not to resist heavy naval artillery, but to project geopolitical power, control trade routes, and subjugate Indigenous populations. These interior strongholds, such as Fort Laramie in Wyoming or Fort Snelling in Minnesota, abandoned the complex geometry and multi-tiered casemates of coastal engineering in favor of practical, localized utility. Often constructed initially of timber palisades or sun-dried adobe brick depending on the regional geography, these frontier forts served as fortified outposts for the U.S. Army, fur trading companies, and westward migrants. Rather than being designed to withstand a siege by a peer military force, their layouts usually featured a wide, open central parade ground surrounded by barracks, officer quarters, and a defensive perimeter designed to repel swift cavalry raids, protect supply depots, and enforce the shifting boundaries of American westward expansion.
|
||||
70
trustgraph-cli/trustgraph/cli/sample_documents/recipes.md
Normal file
70
trustgraph-cli/trustgraph/cli/sample_documents/recipes.md
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# The Foundations of West Country Cooking
|
||||
|
||||
## Section 1: The Foundations (Primary Ingredients & Sub-Recipes)
|
||||
|
||||
### Component A: West Country Clotted Cream
|
||||
|
||||
- **Alternative Names:** Devonshire Cream, Cornish Cream.
|
||||
- **Origin:** Universally produced across the pastures of Devon and Cornwall, utilizing milk from Red Ruby Devon cattle.
|
||||
- **Ingredients:** 2 Litres of Unpasteurized Whole Milk (High-fat dairy).
|
||||
- **Process:** Pour the raw milk into a shallow brass pan. Allow it to sit for 12 hours until a thick layer of cream rises to the surface. Heat the pan slowly over a low, indirect flame (traditionally over a wood-fired stove) until the cream begins to "crinkle" but never boil. Remove from heat and cool in a larder for 24 hours. Gently skim the thick, golden crust from the top.
|
||||
- **Downstream Dependencies:** Essential component for The South West Cream Tea and Devonshire Fudge.
|
||||
|
||||
### Component B: Sweet Scones
|
||||
|
||||
- **Alternative Names:** Hearth Cakes, Country Splits.
|
||||
- **Origin:** Common across Somerset, Dorset, Devon, and Cornwall.
|
||||
- **Ingredients:** 450g Self-Raising Flour, 100g Salted Butter, 50g Caster Sugar, 1 pinch of Salt, 250ml Whole Milk.
|
||||
- **Process:** Rub the salted butter into the self-raising flour until it resembles fine breadcrumbs. Stir in the caster sugar and salt. Pour in the whole milk gradually, mixing with a blunt knife until a soft dough forms. Roll out on a floured surface to 2cm thickness and stamp out rounds. Bake at 220°C for 12 minutes until golden.
|
||||
- **Downstream Dependencies:** Required component for The South West Cream Tea.
|
||||
|
||||
### Component C: Hedgerow Jam
|
||||
|
||||
- **Alternative Names:** Whortleberry Preserve, Blackberry Jam.
|
||||
- **Origin:** Produced extensively in the Exmoor and Dartmoor regions of Devon and Somerset.
|
||||
- **Ingredients:** 1kg Wild Blackberries (Whortleberries), 1kg Granulated Sugar, Juice of 1 Lemon (for pectin).
|
||||
- **Process:** Combine blackberries, sugar, and lemon juice in a large copper preserving pan. Heat gently until the sugar dissolves entirely. Bring to a rolling boil for 15 minutes until the setting point of 105°C is reached. Pour into sterilized jars.
|
||||
- **Downstream Dependencies:** Required component for The South West Cream Tea.
|
||||
|
||||
## Section 2: Regional Showcases (Assembled Dishes)
|
||||
|
||||
### Dish 1: The South West Cream Tea
|
||||
|
||||
- **Alternative Names:** Afternoon Tea, Devonshire Tea.
|
||||
- **Origin:** Heavily disputed between the historical houses of Devon and Cornwall.
|
||||
- **Core Components:** Requires 1 batch of Sweet Scones, 1 jar of Hedgerow Jam, and a generous portion of West Country Clotted Cream. Always served alongside a pot of hot Black Tea.
|
||||
|
||||
**The Assembly Protocol Dispute (Critical Logic Test):**
|
||||
|
||||
- **The Devon Method:** The scone is split in half. The West Country Clotted Cream is spread first onto the warm scone, acting as butter, and is then topped with a dollop of Hedgerow Jam.
|
||||
- **The Cornish Method:** The scone is split in half. The Hedgerow Jam is spread directly onto the scone first, and is subsequently topped with a spoonful of West Country Clotted Cream.
|
||||
|
||||
### Dish 2: The Cornish Pasty
|
||||
|
||||
- **Alternative Names:** Oggy, The Miner's Lunch.
|
||||
- **Origin:** Cornwall (Specifically protected by PGI status, meaning it must be prepared within the county borders).
|
||||
- **Ingredients:** 500g Shortcrust Pastry, 400g Beef Skirt (cubed), 300g Potato (peeled and diced), 150g Swede (locally referred to as 'Turnip' in Cornwall), 150g Onion (finely chopped), Salt and Black Pepper, 1 Egg (beaten, for glaze).
|
||||
- **Process:** Roll the pastry and cut into large circles. Place the raw beef, potato, swede, and onion on one half of the circle. Season generously. Fold the pastry over to form a D-shape and crimp the edges firmly to create a thick ridge. Brush with the beaten egg and bake at 200°C for 45 minutes.
|
||||
- **Historical Context:** The thick crimped ridge allowed Cornish tin miners to hold the pasty with dirty, arsenic-covered hands and discard the crust afterward.
|
||||
|
||||
### Dish 3: Devonshire Homity Pie
|
||||
|
||||
- **Alternative Names:** Land Girls' Pie.
|
||||
- **Origin:** Popularized in Devon by the Women's Land Army during world war rationing.
|
||||
- **Ingredients:** 1 Pre-baked Shortcrust Pastry Case, 500g Potatoes (boiled and cubed), 2 Large Leeks (sliced), 1 Large Onion (chopped), 50g Salted Butter, 150g Mature Cheddar Cheese (sourced from Somerset), 2 tbsps Fresh Parsley.
|
||||
- **Process:** Melt the salted butter in a pan and sauté the leeks and onions until soft. Stir in the cubed potatoes, parsley, and half of the Somerset-sourced Mature Cheddar Cheese. Spoon the filling into the pre-baked pastry case. Top with the remaining cheese and bake at 190°C for 25 minutes until bubbling.
|
||||
|
||||
## Section 3: Sweets & Confectionery
|
||||
|
||||
### Dish 4: Devonshire Fudge
|
||||
|
||||
- **Alternative Names:** Clotted Cream Fudge.
|
||||
- **Origin:** Widely sold along the coastal towns of South Devon.
|
||||
- **Ingredients:** 175g West Country Clotted Cream, 450g Caster Sugar, 115g Glucose Syrup, 1 tbsp Vanilla Extract.
|
||||
- **Process:** Combine the caster sugar, glucose syrup, and West Country Clotted Cream in a heavy-based saucepan. Bring to a boil, stirring continuously until the mixture reaches the "soft ball" stage (116°C). Remove from heat, add vanilla extract, and beat vigorously with a wooden spoon until the mixture thickens and loses its gloss. Pour into a lined tin and allow to set.
|
||||
|
||||
## Section 4: Local Supply Chain & Geopolitical Geography
|
||||
|
||||
- **The Dairy Belt:** Devon and Cornwall are primary rivals in dairy manufacturing. Devon relies heavily on the Red Ruby cattle breed, whereas Cornwall utilizes traditional mixed-pasture herds. Both rely on Somerset for auxiliary hard cheeses like Cheddar.
|
||||
- **The Allium Link:** Onions and leeks are the common denominator connecting the savory dishes of Devon (Homity Pie) and Cornwall (Cornish Pasty).
|
||||
- **The Pectin Shortage Risk:** Should a frost hit the Tamar Valley, the production of Hedgerow Jam ceases, creating a supply bottleneck that directly disables the serving of The South West Cream Tea, despite dairy availability.
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
# Traditional Trade Routes of Pre-Modern Europe
|
||||
|
||||
## Section 1: The Arteries (The Core Networks)
|
||||
|
||||
### Network A: The Hanseatic Baltic Route
|
||||
|
||||
- **Alternative Names:** The Hansa Network, The Northern Guild Rim.
|
||||
- **Geographical Span:** Spans from the North Sea across the Baltic Sea, linking London, Bruges, Lübeck, Danzig, and Novgorod.
|
||||
- **Primary Commodities:** Timber, Fur, Flax, Stockfish, Amber.
|
||||
- **Downstream Dependencies:** Provides raw materials for Western European shipbuilding and winter clothing markets.
|
||||
|
||||
### Network B: The Venetian Maritime Route
|
||||
|
||||
- **Alternative Names:** The Levantine Silk Spoke, The Adriatic Lifeline.
|
||||
- **Geographical Span:** Connects Venice through the Adriatic Sea, around Greece, to Constantinople and Alexandria.
|
||||
- **Primary Commodities:** Silk, Pepper, Cinnamon, Alum, Glassware.
|
||||
- **Downstream Dependencies:** Feeds the luxury markets of the Holy Roman Empire via alpine passes.
|
||||
|
||||
## Section 2: Hub Cities & Commodity Crossings
|
||||
|
||||
### Hub 1: Bruges (The Low Countries)
|
||||
|
||||
- **Alternative Names:** Brugge, The Flanders Staple.
|
||||
- **Geographical Intersection:** The primary terminus where The Hanseatic Baltic Route meets Western European land routes.
|
||||
|
||||
**The Staple Right Dispute (Critical Logic Test):**
|
||||
|
||||
- **The Guild Law:** By ducal decree, all foreign merchants traveling through Flanders must unload their ships at Bruges and offer their goods for sale for a mandatory 15 days before they can proceed.
|
||||
- **The English Subversion:** English wool merchants, seeking to bypass the Bruges tax, began smuggling raw wool directly to Antwerp, sparking an economic blockade by the Hanseatic League against English shipping.
|
||||
|
||||
### Hub 2: Lübeck (The Baltic Capital)
|
||||
|
||||
- **Alternative Names:** Lubeca, The Queen of the Hansa.
|
||||
- **Geographical Intersection:** Located in Northern Germany, acting as the administrative node connecting the North Sea (via the Kiel land-bridge) to the wider Baltic Sea.
|
||||
- **Resource Matrix:** Completely dependent on the Lüneburg Salt Works for its primary processing industry (herring preservation).
|
||||
|
||||
### Hub 3: Constantinople (The Gateway)
|
||||
|
||||
- **Alternative Names:** Byzantium, Istanbul, Miklagard.
|
||||
- **Geographical Intersection:** The western terminus of the Silk Road land routes and the northern terminus of The Venetian Maritime Route.
|
||||
- **Controlling Entity:** Transferred from Byzantine control to Ottoman control in 1453, altering the tariff structures for all Christian merchants.
|
||||
|
||||
## Section 3: Specialized Commodities & Processing Nodes
|
||||
|
||||
### Item 1: Lüneburg Salt
|
||||
|
||||
- **Alternative Names:** White Gold, Northern Brine.
|
||||
- **Origin:** Extracted from the brine springs of Lüneburg, Germany.
|
||||
- **Process:** Boiled in massive lead pans using timber sourced from local forests.
|
||||
- **Critical Dependency Link:** This salt is shipped directly to Bergen (Norway) via Lübeck to pack and preserve Scania Herring. Without this specific salt supply, Baltic fish rots before reaching Western markets.
|
||||
|
||||
### Item 2: Phocaean Alum
|
||||
|
||||
- **Alternative Names:** The Weaver's Fixative, Anatolian Alum.
|
||||
- **Origin:** Mined in the hills of Phocaea (Asia Minor) under the jurisdiction of the Genoese Republic, later seized by regional powers.
|
||||
- **Process:** Shipped via Mediterranean maritime routes to Flanders and Florence.
|
||||
- **Chemical Function:** A mandatory chemical mordant required to fix dyes to wool and textiles. Without Alum, the famous Flemish textile industry cannot produce colored cloth.
|
||||
|
||||
## Section 4: Geopolitical Disruptions & Chokepoints
|
||||
|
||||
- **The Sound Toll Bottleneck:** The King of Denmark levies a mandatory tax on all ships entering or leaving the Baltic Sea through the Øresund strait. A diplomatic dispute or military blockade of the Sound by Denmark instantly halts the flow of Russian timber to the English Royal Dockyards.
|
||||
- **The Sound-to-Salt Ripple Effect:** If the forests around Lüneburg are depleted, salt production drops. This directly causes a collapse in the Bergen fish trade, which in turn causes a protein shortage and subsequent famine in the labor forces of the Flemish textile hubs.
|
||||
- **The Alum Monopolization:** Following conflicts in the East, the discovery of a domestic alum mine in Tolfa (Papal States) in 1461 caused a massive geopolitical shift, as the Pope banned the import of "infidel alum" from the East, forcing Venetian merchants to pivot their supply lines inward.
|
||||
|
|
@ -10,8 +10,8 @@ description = "HuggingFace embeddings support for TrustGraph."
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"trustgraph-base>=2.4,<2.5",
|
||||
"trustgraph-flow>=2.4,<2.5",
|
||||
"trustgraph-base>=2.5,<2.6",
|
||||
"trustgraph-flow>=2.5,<2.6",
|
||||
"torch",
|
||||
"urllib3",
|
||||
"transformers",
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"trustgraph-base>=2.4,<2.5",
|
||||
"trustgraph-base>=2.5,<2.6",
|
||||
"aiohttp",
|
||||
"anthropic",
|
||||
"scylla-driver",
|
||||
|
|
@ -64,6 +64,7 @@ bootstrap = "trustgraph.bootstrap.bootstrapper:run"
|
|||
config-svc = "trustgraph.config.service:run"
|
||||
flow-svc = "trustgraph.flow.service:run"
|
||||
iam-svc = "trustgraph.iam.service:run"
|
||||
no-auth-svc = "trustgraph.iam.noauth:run"
|
||||
doc-embeddings-query-milvus = "trustgraph.query.doc_embeddings.milvus:run"
|
||||
doc-embeddings-query-pinecone = "trustgraph.query.doc_embeddings.pinecone:run"
|
||||
doc-embeddings-query-qdrant = "trustgraph.query.doc_embeddings.qdrant:run"
|
||||
|
|
|
|||
|
|
@ -326,6 +326,58 @@ class Processor(AsyncProcessor):
|
|||
# Main loop.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _run_pre_service(self):
|
||||
"""Run pre-service initialisers before opening pub/sub clients.
|
||||
|
||||
These bring up infrastructure that other services depend on
|
||||
(e.g. Pulsar tenant/namespaces). They use out-of-band APIs
|
||||
(HTTP admin), not pub/sub, so they don't need a config client.
|
||||
They run without flag tracking — they must be idempotent.
|
||||
"""
|
||||
pre_specs = [
|
||||
s for s in self.specs
|
||||
if not s.instance.wait_for_services
|
||||
]
|
||||
if not pre_specs:
|
||||
return
|
||||
|
||||
for spec in pre_specs:
|
||||
child_logger = logger.getChild(spec.name)
|
||||
child_ctx = InitContext(
|
||||
logger=child_logger,
|
||||
config=None,
|
||||
make_flow_client=self._make_flow_client,
|
||||
make_iam_client=self._make_iam_client,
|
||||
)
|
||||
child_logger.info(f"Running pre-service initialiser")
|
||||
try:
|
||||
await spec.instance.run(child_ctx, None, spec.flag)
|
||||
child_logger.info(f"Pre-service initialiser completed")
|
||||
except Exception as e:
|
||||
child_logger.error(
|
||||
f"Pre-service initialiser failed: "
|
||||
f"{type(e).__name__}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
async def start(self):
|
||||
# Run pre-service initialisers before opening any pub/sub
|
||||
# connections. They bring up infrastructure (Pulsar
|
||||
# namespaces, etc.) that super().start() depends on.
|
||||
while self.running:
|
||||
try:
|
||||
await self._run_pre_service()
|
||||
break
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
f"Pre-service initialisation failed "
|
||||
f"({type(e).__name__}: {e}); retry in {GATE_BACKOFF}s"
|
||||
)
|
||||
await asyncio.sleep(GATE_BACKOFF)
|
||||
|
||||
await super().start()
|
||||
|
||||
async def run(self):
|
||||
|
||||
logger.info(
|
||||
|
|
@ -347,29 +399,18 @@ class Processor(AsyncProcessor):
|
|||
continue
|
||||
|
||||
try:
|
||||
# Phase 1: pre-service initialisers run unconditionally.
|
||||
pre_specs = [
|
||||
s for s in self.specs
|
||||
if not s.instance.wait_for_services
|
||||
]
|
||||
pre_results = {}
|
||||
for spec in pre_specs:
|
||||
pre_results[spec.name] = await self._run_spec(
|
||||
spec, config,
|
||||
)
|
||||
|
||||
# Phase 2: gate.
|
||||
# Phase 1: gate.
|
||||
gate_ok = await self._gate_ready(config)
|
||||
|
||||
# Phase 3: post-service initialisers, if gate passed.
|
||||
post_results = {}
|
||||
# Phase 2: post-service initialisers, if gate passed.
|
||||
results = {}
|
||||
if gate_ok:
|
||||
post_specs = [
|
||||
s for s in self.specs
|
||||
if s.instance.wait_for_services
|
||||
]
|
||||
for spec in post_specs:
|
||||
post_results[spec.name] = await self._run_spec(
|
||||
results[spec.name] = await self._run_spec(
|
||||
spec, config,
|
||||
)
|
||||
|
||||
|
|
@ -377,8 +418,7 @@ class Processor(AsyncProcessor):
|
|||
if not gate_ok:
|
||||
sleep_for = GATE_BACKOFF
|
||||
else:
|
||||
all_results = {**pre_results, **post_results}
|
||||
if any(r != "skip" for r in all_results.values()):
|
||||
if any(r != "skip" for r in results.values()):
|
||||
sleep_for = INIT_RETRY
|
||||
else:
|
||||
sleep_for = STEADY_INTERVAL
|
||||
|
|
|
|||
|
|
@ -112,6 +112,10 @@ class PulsarTopology(Initialiser):
|
|||
def _reconcile_sync(self, logger):
|
||||
if not self._tenant_exists():
|
||||
clusters = self._get_clusters()
|
||||
if not clusters:
|
||||
raise RuntimeError(
|
||||
"Pulsar cluster list is empty — broker not ready yet"
|
||||
)
|
||||
logger.info(
|
||||
f"Creating tenant {self.tenant!r} with clusters {clusters}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Error, Triples, GraphEmbeddings
|
||||
from .. schema import DocumentEmbeddings
|
||||
from .. knowledge import hash
|
||||
from .. exceptions import RequestError
|
||||
from .. tables.knowledge import KnowledgeTableStore
|
||||
|
|
@ -157,6 +158,98 @@ class KnowledgeManager:
|
|||
)
|
||||
)
|
||||
|
||||
async def list_de_cores(self, request, respond, workspace):
|
||||
|
||||
ids = await self.table_store.list_de_cores(workspace)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = ids,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def get_de_core(self, request, respond, workspace):
|
||||
|
||||
logger.info("Getting document embeddings core...")
|
||||
|
||||
async def publish_de(de):
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
document_embeddings = de,
|
||||
)
|
||||
)
|
||||
|
||||
await self.table_store.get_document_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_de,
|
||||
)
|
||||
|
||||
logger.debug("Document embeddings core retrieval complete")
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = True,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def put_de_core(self, request, respond, workspace):
|
||||
|
||||
if request.document_embeddings:
|
||||
await self.table_store.add_document_embeddings(
|
||||
workspace, request.document_embeddings
|
||||
)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def delete_de_core(self, request, respond, workspace):
|
||||
|
||||
logger.info("Deleting document embeddings core...")
|
||||
|
||||
await self.table_store.delete_document_embeddings(
|
||||
workspace, request.id
|
||||
)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def load_de_core(self, request, respond, workspace):
|
||||
|
||||
if self.background_task is None:
|
||||
self.background_task = asyncio.create_task(
|
||||
self.core_loader()
|
||||
)
|
||||
|
||||
await self.loader_queue.put((request, respond, workspace))
|
||||
|
||||
async def core_loader(self):
|
||||
|
||||
logger.info("Knowledge background processor running...")
|
||||
|
|
@ -165,7 +258,7 @@ class KnowledgeManager:
|
|||
logger.debug("Waiting for next load...")
|
||||
request, respond, workspace = await self.loader_queue.get()
|
||||
|
||||
logger.info(f"Loading knowledge: {request.id}")
|
||||
logger.info(f"Loading: {request.operation} {request.id}")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -187,25 +280,14 @@ class KnowledgeManager:
|
|||
if "interfaces" not in flow:
|
||||
raise RuntimeError("No defined interfaces")
|
||||
|
||||
if "triples-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no triples-store")
|
||||
|
||||
if "graph-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no graph-embeddings-store")
|
||||
|
||||
t_q = flow["interfaces"]["triples-store"]["flow"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]["flow"]
|
||||
|
||||
# Got this far, it should all work
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
if request.operation == "load-de-core":
|
||||
await self._load_de_core(
|
||||
request, respond, workspace, flow,
|
||||
)
|
||||
else:
|
||||
await self._load_kg_core(
|
||||
request, respond, workspace, flow,
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
|
@ -223,72 +305,145 @@ class KnowledgeManager:
|
|||
)
|
||||
)
|
||||
|
||||
|
||||
logger.debug("Starting knowledge loading process...")
|
||||
|
||||
try:
|
||||
|
||||
t_pub = None
|
||||
ge_pub = None
|
||||
|
||||
logger.debug(f"Triples queue: {t_q}")
|
||||
logger.debug(f"Graph embeddings queue: {ge_q}")
|
||||
|
||||
t_pub = Publisher(
|
||||
self.flow_config.pubsub, t_q,
|
||||
schema=Triples,
|
||||
)
|
||||
ge_pub = Publisher(
|
||||
self.flow_config.pubsub, ge_q,
|
||||
schema=GraphEmbeddings
|
||||
)
|
||||
|
||||
logger.debug("Starting publishers...")
|
||||
|
||||
await t_pub.start()
|
||||
await ge_pub.start()
|
||||
|
||||
async def publish_triples(t):
|
||||
# Override collection with request collection
|
||||
if hasattr(t, 'metadata') and hasattr(t.metadata, 'collection'):
|
||||
t.metadata.collection = request.collection or "default"
|
||||
await t_pub.send(None, t)
|
||||
|
||||
logger.debug("Publishing triples...")
|
||||
|
||||
await self.table_store.get_triples(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_triples,
|
||||
)
|
||||
|
||||
async def publish_ge(g):
|
||||
# Override collection with request collection
|
||||
if hasattr(g, 'metadata') and hasattr(g.metadata, 'collection'):
|
||||
g.metadata.collection = request.collection or "default"
|
||||
await ge_pub.send(None, g)
|
||||
|
||||
logger.debug("Publishing graph embeddings...")
|
||||
|
||||
await self.table_store.get_graph_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_ge,
|
||||
)
|
||||
|
||||
logger.debug("Knowledge loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publishers...")
|
||||
|
||||
if t_pub: await t_pub.stop()
|
||||
if ge_pub: await ge_pub.stop()
|
||||
|
||||
logger.debug("Knowledge processing done")
|
||||
|
||||
continue
|
||||
|
||||
async def _load_kg_core(self, request, respond, workspace, flow):
|
||||
|
||||
if "triples-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no triples-store")
|
||||
|
||||
if "graph-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no graph-embeddings-store")
|
||||
|
||||
t_q = flow["interfaces"]["triples-store"]["flow"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]["flow"]
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
)
|
||||
)
|
||||
|
||||
t_pub = None
|
||||
ge_pub = None
|
||||
|
||||
try:
|
||||
|
||||
logger.debug(f"Triples queue: {t_q}")
|
||||
logger.debug(f"Graph embeddings queue: {ge_q}")
|
||||
|
||||
t_pub = Publisher(
|
||||
self.flow_config.pubsub, t_q,
|
||||
schema=Triples,
|
||||
)
|
||||
ge_pub = Publisher(
|
||||
self.flow_config.pubsub, ge_q,
|
||||
schema=GraphEmbeddings
|
||||
)
|
||||
|
||||
logger.debug("Starting publishers...")
|
||||
|
||||
await t_pub.start()
|
||||
await ge_pub.start()
|
||||
|
||||
async def publish_triples(t):
|
||||
if hasattr(t, 'metadata') and hasattr(t.metadata, 'collection'):
|
||||
t.metadata.collection = request.collection or "default"
|
||||
await t_pub.send(None, t)
|
||||
|
||||
logger.debug("Publishing triples...")
|
||||
|
||||
await self.table_store.get_triples(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_triples,
|
||||
)
|
||||
|
||||
async def publish_ge(g):
|
||||
if hasattr(g, 'metadata') and hasattr(g.metadata, 'collection'):
|
||||
g.metadata.collection = request.collection or "default"
|
||||
await ge_pub.send(None, g)
|
||||
|
||||
logger.debug("Publishing graph embeddings...")
|
||||
|
||||
await self.table_store.get_graph_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_ge,
|
||||
)
|
||||
|
||||
logger.debug("Knowledge core loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publishers...")
|
||||
|
||||
if t_pub: await t_pub.stop()
|
||||
if ge_pub: await ge_pub.stop()
|
||||
|
||||
async def _load_de_core(self, request, respond, workspace, flow):
|
||||
|
||||
if "document-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no document-embeddings-store")
|
||||
|
||||
de_q = flow["interfaces"]["document-embeddings-store"]["flow"]
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
)
|
||||
)
|
||||
|
||||
de_pub = None
|
||||
|
||||
try:
|
||||
|
||||
logger.debug(f"Document embeddings queue: {de_q}")
|
||||
|
||||
de_pub = Publisher(
|
||||
self.flow_config.pubsub, de_q,
|
||||
schema=DocumentEmbeddings,
|
||||
)
|
||||
|
||||
logger.debug("Starting publisher...")
|
||||
|
||||
await de_pub.start()
|
||||
|
||||
async def publish_de(de):
|
||||
if hasattr(de, 'metadata') and hasattr(de.metadata, 'collection'):
|
||||
de.metadata.collection = request.collection or "default"
|
||||
await de_pub.send(None, de)
|
||||
|
||||
logger.debug("Publishing document embeddings...")
|
||||
|
||||
await self.table_store.get_document_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_de,
|
||||
)
|
||||
|
||||
logger.debug("Document embeddings core loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publisher...")
|
||||
|
||||
if de_pub: await de_pub.stop()
|
||||
|
|
|
|||
|
|
@ -187,6 +187,11 @@ class Processor(WorkspaceProcessor):
|
|||
"put-kg-core": self.knowledge.put_kg_core,
|
||||
"load-kg-core": self.knowledge.load_kg_core,
|
||||
"unload-kg-core": self.knowledge.unload_kg_core,
|
||||
"list-de-cores": self.knowledge.list_de_cores,
|
||||
"get-de-core": self.knowledge.get_de_core,
|
||||
"delete-de-core": self.knowledge.delete_de_core,
|
||||
"put-de-core": self.knowledge.put_de_core,
|
||||
"load-de-core": self.knowledge.load_de_core,
|
||||
}
|
||||
|
||||
if v.operation not in impls:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
|
||||
import datetime
|
||||
import os
|
||||
import logging
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
from cassandra.query import BatchStatement, SimpleStatement
|
||||
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
||||
import os
|
||||
import logging
|
||||
|
||||
from ..tables.cassandra_async import async_execute
|
||||
|
||||
# Global list to track clusters for cleanup
|
||||
_active_clusters = []
|
||||
|
|
@ -461,7 +465,6 @@ class KnowledgeGraph:
|
|||
def create_collection(self, collection):
|
||||
"""Create collection by inserting metadata row"""
|
||||
try:
|
||||
import datetime
|
||||
self.session.execute(
|
||||
f"INSERT INTO {self.collection_metadata_table} (collection, created_at) VALUES (%s, %s)",
|
||||
(collection, datetime.datetime.now())
|
||||
|
|
@ -954,7 +957,6 @@ class EntityCentricKnowledgeGraph:
|
|||
def create_collection(self, collection):
|
||||
"""Create collection by inserting metadata row"""
|
||||
try:
|
||||
import datetime
|
||||
self.session.execute(
|
||||
f"INSERT INTO {self.collection_metadata_table} (collection, created_at) VALUES (%s, %s)",
|
||||
(collection, datetime.datetime.now())
|
||||
|
|
@ -1045,6 +1047,222 @@ class EntityCentricKnowledgeGraph:
|
|||
|
||||
logger.info(f"Deleted collection {collection}: {len(entities)} entity partitions, {len(quads)} quads")
|
||||
|
||||
# ========================================================================
|
||||
# Async methods — use cassandra driver's native async API via async_execute
|
||||
# ========================================================================
|
||||
|
||||
async def async_insert(self, collection, s, p, o, g=None, otype=None, dtype="", lang=""):
|
||||
if g is None:
|
||||
g = DEFAULT_GRAPH
|
||||
if otype is None:
|
||||
if o.startswith("http://") or o.startswith("https://"):
|
||||
otype = "u"
|
||||
else:
|
||||
otype = "l"
|
||||
|
||||
batch = BatchStatement()
|
||||
batch.add(self.insert_entity_stmt, (collection, s, 'S', p, otype, s, o, g, dtype, lang))
|
||||
batch.add(self.insert_entity_stmt, (collection, p, 'P', p, otype, s, o, g, dtype, lang))
|
||||
if otype == 'u' or otype == 't':
|
||||
batch.add(self.insert_entity_stmt, (collection, o, 'O', p, otype, s, o, g, dtype, lang))
|
||||
if g != DEFAULT_GRAPH:
|
||||
batch.add(self.insert_entity_stmt, (collection, g, 'G', p, otype, s, o, g, dtype, lang))
|
||||
batch.add(self.insert_collection_stmt, (collection, g, s, p, o, otype, dtype, lang))
|
||||
|
||||
await async_execute(self.session, batch)
|
||||
|
||||
async def async_get_all(self, collection, limit=50):
|
||||
return await async_execute(
|
||||
self.session, self.get_collection_all_stmt, (collection, limit)
|
||||
)
|
||||
|
||||
async def async_get_s(self, collection, s, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_s_stmt, (collection, s, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=row.s, p=row.p, o=row.o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_p(self, collection, p, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_p_stmt, (collection, p, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=row.s, p=row.p, o=row.o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_o(self, collection, o, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_o_stmt, (collection, o, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=row.s, p=row.p, o=row.o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_sp(self, collection, s, p, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_s_p_stmt, (collection, s, p, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=s, p=p, o=row.o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_po(self, collection, p, o, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_o_p_stmt, (collection, o, p, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=row.s, p=p, o=o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_os(self, collection, o, s, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_s_stmt, (collection, s, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
if row.o != o:
|
||||
continue
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=s, p=row.p, o=o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_spo(self, collection, s, p, o, g=None, limit=10):
|
||||
rows = await async_execute(
|
||||
self.session, self.get_entity_as_s_p_stmt, (collection, s, p, limit)
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
if row.o != o:
|
||||
continue
|
||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||
if g is not None and d != g:
|
||||
continue
|
||||
results.append(QuadResult(
|
||||
s=s, p=p, o=o, g=d,
|
||||
otype=row.otype, dtype=row.dtype, lang=row.lang
|
||||
))
|
||||
return results
|
||||
|
||||
async def async_get_g(self, collection, g, limit=50):
|
||||
if g is None:
|
||||
g = DEFAULT_GRAPH
|
||||
return await async_execute(
|
||||
self.session, self.get_collection_by_graph_stmt, (collection, g, limit)
|
||||
)
|
||||
|
||||
async def async_collection_exists(self, collection):
|
||||
try:
|
||||
result = await async_execute(
|
||||
self.session,
|
||||
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
|
||||
(collection,)
|
||||
)
|
||||
return bool(result)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking collection existence: {e}")
|
||||
return False
|
||||
|
||||
async def async_create_collection(self, collection):
|
||||
await async_execute(
|
||||
self.session,
|
||||
f"INSERT INTO {self.collection_metadata_table} (collection, created_at) VALUES (%s, %s)",
|
||||
(collection, datetime.datetime.now())
|
||||
)
|
||||
logger.info(f"Created collection metadata for {collection}")
|
||||
|
||||
async def async_delete_collection(self, collection):
|
||||
rows = await async_execute(
|
||||
self.session,
|
||||
f"SELECT d, s, p, o, otype, dtype, lang FROM {self.collection_table} WHERE collection = %s",
|
||||
(collection,)
|
||||
)
|
||||
|
||||
entities = set()
|
||||
quads = []
|
||||
for row in rows:
|
||||
d, s, p, o = row.d, row.s, row.p, row.o
|
||||
otype = row.otype
|
||||
dtype = row.dtype if hasattr(row, 'dtype') else ''
|
||||
lang = row.lang if hasattr(row, 'lang') else ''
|
||||
quads.append((d, s, p, o, otype, dtype, lang))
|
||||
entities.add(s)
|
||||
entities.add(p)
|
||||
if otype == 'u' or otype == 't':
|
||||
entities.add(o)
|
||||
if d != DEFAULT_GRAPH:
|
||||
entities.add(d)
|
||||
|
||||
batch = BatchStatement()
|
||||
count = 0
|
||||
for entity in entities:
|
||||
batch.add(self.delete_entity_partition_stmt, (collection, entity))
|
||||
count += 1
|
||||
if count % 50 == 0:
|
||||
await async_execute(self.session, batch)
|
||||
batch = BatchStatement()
|
||||
if count % 50 != 0:
|
||||
await async_execute(self.session, batch)
|
||||
|
||||
batch = BatchStatement()
|
||||
count = 0
|
||||
for d, s, p, o, otype, dtype, lang in quads:
|
||||
batch.add(self.delete_collection_row_stmt, (collection, d, s, p, o, otype, dtype, lang))
|
||||
count += 1
|
||||
if count % 50 == 0:
|
||||
await async_execute(self.session, batch)
|
||||
batch = BatchStatement()
|
||||
if count % 50 != 0:
|
||||
await async_execute(self.session, batch)
|
||||
|
||||
await async_execute(
|
||||
self.session,
|
||||
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
|
||||
(collection,)
|
||||
)
|
||||
logger.info(f"Deleted collection {collection}: {len(entities)} entity partitions, {len(quads)} quads")
|
||||
|
||||
def close(self):
|
||||
"""Close connections"""
|
||||
if hasattr(self, 'session') and self.session:
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ class Processor(FlowProcessor):
|
|||
# Configuration
|
||||
self.top_k = params.get("top_k", 10)
|
||||
self.similarity_threshold = params.get("similarity_threshold", 0.3)
|
||||
self.bypass_selector_below = params.get("bypass_selector_below", 5)
|
||||
|
||||
# Per-workspace ontology version tracking
|
||||
self.current_ontology_versions = {} # workspace -> version
|
||||
|
|
@ -187,7 +188,8 @@ class Processor(FlowProcessor):
|
|||
ontology_embedder=ontology_embedder,
|
||||
ontology_loader=loader,
|
||||
top_k=self.top_k,
|
||||
similarity_threshold=self.similarity_threshold
|
||||
similarity_threshold=self.similarity_threshold,
|
||||
bypass_selector_below=self.bypass_selector_below,
|
||||
)
|
||||
|
||||
# Store flow-specific components
|
||||
|
|
@ -981,6 +983,13 @@ class Processor(FlowProcessor):
|
|||
default=0.3,
|
||||
help='Similarity threshold for ontology matching (default: 0.3, range: 0.0-1.0)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--bypass-selector-below',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Bypass ontology selector when total ontology elements '
|
||||
'(classes + properties) is below this value (default: 5)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--triples-batch-size',
|
||||
type=int,
|
||||
|
|
|
|||
|
|
@ -33,19 +33,44 @@ class OntologySelector:
|
|||
def __init__(self, ontology_embedder: OntologyEmbedder,
|
||||
ontology_loader: OntologyLoader,
|
||||
top_k: int = 10,
|
||||
similarity_threshold: float = 0.7):
|
||||
"""Initialize the ontology selector.
|
||||
|
||||
Args:
|
||||
ontology_embedder: Embedder with vector store
|
||||
ontology_loader: Loader with ontology definitions
|
||||
top_k: Number of top results to retrieve per segment
|
||||
similarity_threshold: Minimum similarity score
|
||||
"""
|
||||
similarity_threshold: float = 0.3,
|
||||
bypass_selector_below: int = 5):
|
||||
self.embedder = ontology_embedder
|
||||
self.loader = ontology_loader
|
||||
self.top_k = top_k
|
||||
self.similarity_threshold = similarity_threshold
|
||||
self.bypass_selector_below = bypass_selector_below
|
||||
|
||||
def _total_ontology_elements(self) -> int:
|
||||
total = 0
|
||||
for ontology in self.loader.get_all_ontologies().values():
|
||||
total += len(ontology.classes)
|
||||
total += len(ontology.object_properties)
|
||||
total += len(ontology.datatype_properties)
|
||||
return total
|
||||
|
||||
def _build_full_subsets(self) -> List[OntologySubset]:
|
||||
subsets = []
|
||||
for ont_id, ontology in self.loader.get_all_ontologies().items():
|
||||
subset = OntologySubset(
|
||||
ontology_id=ont_id,
|
||||
classes={
|
||||
cid: cls.__dict__
|
||||
for cid, cls in ontology.classes.items()
|
||||
},
|
||||
object_properties={
|
||||
pid: prop.__dict__
|
||||
for pid, prop in ontology.object_properties.items()
|
||||
},
|
||||
datatype_properties={
|
||||
pid: prop.__dict__
|
||||
for pid, prop in ontology.datatype_properties.items()
|
||||
},
|
||||
metadata=ontology.metadata,
|
||||
relevance_score=1.0,
|
||||
)
|
||||
subsets.append(subset)
|
||||
return subsets
|
||||
|
||||
async def select_ontology_subset(self, segments: List[TextSegment]) -> List[OntologySubset]:
|
||||
"""Select relevant ontology subsets for text segments.
|
||||
|
|
@ -56,6 +81,15 @@ class OntologySelector:
|
|||
Returns:
|
||||
List of ontology subsets with relevant elements
|
||||
"""
|
||||
total = self._total_ontology_elements()
|
||||
if total < self.bypass_selector_below:
|
||||
logger.info(
|
||||
f"Ontology has {total} elements (below "
|
||||
f"bypass_selector_below={self.bypass_selector_below}), "
|
||||
f"using full ontology"
|
||||
)
|
||||
return self._build_full_subsets()
|
||||
|
||||
# Collect all relevant elements
|
||||
relevant_elements = await self._find_relevant_elements(segments)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ with full URIs and correct is_uri flags.
|
|||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from .... schema import Triple, Term, IRI, LITERAL
|
||||
from .... rdf import RDF_TYPE, RDF_LABEL
|
||||
|
|
@ -32,6 +32,25 @@ class TripleConverter:
|
|||
self.ontology_id = ontology_id
|
||||
self.entity_registry = EntityRegistry(ontology_id)
|
||||
|
||||
def _get_ancestor_classes(self, class_id: str) -> Set[str]:
|
||||
ancestors = set()
|
||||
current = class_id
|
||||
while current:
|
||||
cls_def = self.ontology_subset.classes.get(current)
|
||||
if not cls_def:
|
||||
break
|
||||
parent = cls_def.get("subclass_of") if isinstance(cls_def, dict) else getattr(cls_def, "subclass_of", None)
|
||||
if not parent or parent in ancestors:
|
||||
break
|
||||
ancestors.add(parent)
|
||||
current = parent
|
||||
return ancestors
|
||||
|
||||
def _matches_class_constraint(self, actual_type: str, expected_type: str) -> bool:
|
||||
if actual_type == expected_type:
|
||||
return True
|
||||
return expected_type in self._get_ancestor_classes(actual_type)
|
||||
|
||||
def convert_all(self, extraction: ExtractionResult) -> List[Triple]:
|
||||
"""Convert complete extraction result to RDF triples.
|
||||
|
||||
|
|
@ -129,6 +148,29 @@ class TripleConverter:
|
|||
logger.warning(f"Unknown relationship '{relationship.relation}', skipping")
|
||||
return None
|
||||
|
||||
# Enforce domain/range constraints when declared
|
||||
prop_def = self.ontology_subset.object_properties.get(
|
||||
relationship.relation, {}
|
||||
)
|
||||
domain = prop_def.get("domain") if isinstance(prop_def, dict) else getattr(prop_def, "domain", None)
|
||||
range_ = prop_def.get("range") if isinstance(prop_def, dict) else getattr(prop_def, "range", None)
|
||||
|
||||
if domain and not self._matches_class_constraint(relationship.subject_type, domain):
|
||||
logger.warning(
|
||||
f"Domain violation: '{relationship.relation}' expects "
|
||||
f"domain '{domain}', got subject type "
|
||||
f"'{relationship.subject_type}', skipping"
|
||||
)
|
||||
return None
|
||||
|
||||
if range_ and not self._matches_class_constraint(relationship.object_type, range_):
|
||||
logger.warning(
|
||||
f"Range violation: '{relationship.relation}' expects "
|
||||
f"range '{range_}', got object type "
|
||||
f"'{relationship.object_type}', skipping"
|
||||
)
|
||||
return None
|
||||
|
||||
# Generate triple: subject property object
|
||||
return Triple(
|
||||
s=Term(type=IRI, iri=subject_uri),
|
||||
|
|
@ -157,11 +199,25 @@ class TripleConverter:
|
|||
logger.warning(f"Unknown attribute '{attribute.attribute}', skipping")
|
||||
return None
|
||||
|
||||
# Enforce domain constraint when declared
|
||||
prop_def = self.ontology_subset.datatype_properties.get(
|
||||
attribute.attribute, {}
|
||||
)
|
||||
domain = prop_def.get("domain") if isinstance(prop_def, dict) else getattr(prop_def, "domain", None)
|
||||
|
||||
if domain and not self._matches_class_constraint(attribute.entity_type, domain):
|
||||
logger.warning(
|
||||
f"Domain violation: attribute '{attribute.attribute}' "
|
||||
f"expects domain '{domain}', got entity type "
|
||||
f"'{attribute.entity_type}', skipping"
|
||||
)
|
||||
return None
|
||||
|
||||
# Generate triple: entity property "literal value"
|
||||
return Triple(
|
||||
s=Term(type=IRI, iri=entity_uri),
|
||||
p=Term(type=IRI, iri=property_uri),
|
||||
o=Term(type=LITERAL, value=attribute.value) # Literal!
|
||||
o=Term(type=LITERAL, value=attribute.value)
|
||||
)
|
||||
|
||||
def _get_class_uri(self, class_id: str) -> Optional[str]:
|
||||
|
|
|
|||
|
|
@ -233,10 +233,10 @@ class IamAuth:
|
|||
|
||||
header = request.headers.get("Authorization", "")
|
||||
if not header.startswith("Bearer "):
|
||||
raise _auth_failure()
|
||||
return await self._authenticate_anonymous()
|
||||
token = header[len("Bearer "):].strip()
|
||||
if not token:
|
||||
raise _auth_failure()
|
||||
return await self._authenticate_anonymous()
|
||||
|
||||
# API keys always start with "tg_". JWTs have two dots and
|
||||
# no "tg_" prefix. Discriminate cheaply.
|
||||
|
|
@ -266,6 +266,26 @@ class IamAuth:
|
|||
handle=sub, workspace=ws, principal_id=sub, source="jwt",
|
||||
)
|
||||
|
||||
async def _authenticate_anonymous(self):
|
||||
try:
|
||||
async def _call(client):
|
||||
return await client.authenticate_anonymous()
|
||||
user_id, workspace, _roles = await self._with_client(_call)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Anonymous authentication rejected: "
|
||||
f"{type(e).__name__}: {e}"
|
||||
)
|
||||
raise _auth_failure()
|
||||
|
||||
if not user_id or not workspace:
|
||||
raise _auth_failure()
|
||||
|
||||
return Identity(
|
||||
handle=user_id, workspace=workspace,
|
||||
principal_id=user_id, source="anonymous",
|
||||
)
|
||||
|
||||
async def _resolve_api_key(self, plaintext):
|
||||
h = hashlib.sha256(plaintext.encode("utf-8")).hexdigest()
|
||||
|
||||
|
|
|
|||
|
|
@ -135,13 +135,19 @@ class DispatcherWrapper:
|
|||
class DispatcherManager:
|
||||
|
||||
def __init__(self, backend, config_receiver, auth,
|
||||
prefix="api-gateway", queue_overrides=None):
|
||||
prefix="api-gateway", queue_overrides=None, timeout=120):
|
||||
"""
|
||||
``auth`` is required. It flows into the Mux for first-frame
|
||||
WebSocket authentication and into downstream dispatcher
|
||||
construction. There is no permissive default — constructing
|
||||
a DispatcherManager without an authenticator would be a
|
||||
silent downgrade to no-auth on the socket path.
|
||||
|
||||
``timeout`` is the per-request timeout in seconds, propagated
|
||||
to every dispatcher created by this manager. Must match the
|
||||
gateway's ``--timeout`` flag so that long-running requests
|
||||
are not prematurely cut off at the old hard-coded 120 s
|
||||
ceiling.
|
||||
"""
|
||||
if auth is None:
|
||||
raise ValueError(
|
||||
|
|
@ -149,6 +155,8 @@ class DispatcherManager:
|
|||
"is no no-auth mode"
|
||||
)
|
||||
|
||||
self.timeout = timeout
|
||||
|
||||
self.backend = backend
|
||||
self.config_receiver = config_receiver
|
||||
self.config_receiver.add_handler(self)
|
||||
|
|
@ -291,7 +299,7 @@ class DispatcherManager:
|
|||
|
||||
dispatcher = global_dispatchers[kind](
|
||||
backend = self.backend,
|
||||
timeout = 120,
|
||||
timeout = self.timeout,
|
||||
consumer = consumer_name,
|
||||
subscriber = consumer_name,
|
||||
request_queue = request_queue,
|
||||
|
|
@ -448,7 +456,7 @@ class DispatcherManager:
|
|||
backend = self.backend,
|
||||
request_queue = qconfig["request"],
|
||||
response_queue = qconfig["response"],
|
||||
timeout = 120,
|
||||
timeout = self.timeout,
|
||||
consumer = f"{self.prefix}-{workspace}-{flow}-{kind}-request",
|
||||
subscriber = f"{self.prefix}-{workspace}-{flow}-{kind}-request",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -57,16 +57,13 @@ class Mux:
|
|||
(important for browsers, which treat a handshake-time 401
|
||||
as terminal)."""
|
||||
token = data.get("token", "")
|
||||
if not token:
|
||||
await self.ws.send_json({
|
||||
"type": "auth-failed",
|
||||
"error": "auth failure",
|
||||
})
|
||||
return
|
||||
|
||||
class _Shim:
|
||||
def __init__(self, tok):
|
||||
self.headers = {"Authorization": f"Bearer {tok}"}
|
||||
self.headers = (
|
||||
{"Authorization": f"Bearer {tok}"} if tok
|
||||
else {}
|
||||
)
|
||||
|
||||
try:
|
||||
identity = await self.auth.authenticate(_Shim(token))
|
||||
|
|
|
|||
|
|
@ -457,6 +457,12 @@ for _op in ("put-kg-core", "delete-kg-core",
|
|||
"load-kg-core", "unload-kg-core"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:write")
|
||||
|
||||
# knowledge: document-embeddings core service.
|
||||
for _op in ("get-de-core", "list-de-cores"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:read")
|
||||
for _op in ("put-de-core", "delete-de-core", "load-de-core"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:write")
|
||||
|
||||
|
||||
# collection-management: workspace collection lifecycle.
|
||||
_register_kind_op("collection-management", "list-collections", "collections:read")
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ class Api:
|
|||
prefix = "gateway",
|
||||
queue_overrides = queue_overrides,
|
||||
auth = self.auth,
|
||||
timeout = self.timeout,
|
||||
)
|
||||
|
||||
self.endpoint_manager = EndpointManager(
|
||||
|
|
|
|||
1
trustgraph-flow/trustgraph/iam/noauth/__init__.py
Normal file
1
trustgraph-flow/trustgraph/iam/noauth/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from . service import *
|
||||
4
trustgraph-flow/trustgraph/iam/noauth/__main__.py
Normal file
4
trustgraph-flow/trustgraph/iam/noauth/__main__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
from . service import run
|
||||
|
||||
run()
|
||||
131
trustgraph-flow/trustgraph/iam/noauth/handler.py
Normal file
131
trustgraph-flow/trustgraph/iam/noauth/handler.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""
|
||||
No-auth IAM handler. Implements the IAM contract with every operation
|
||||
returning a permissive or stub response. No database, no crypto,
|
||||
no state.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from trustgraph.schema import IamResponse, Error, UserRecord
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _err(type, message):
|
||||
return IamResponse(error=Error(type=type, message=message))
|
||||
|
||||
|
||||
class NoAuthHandler:
|
||||
|
||||
def __init__(self, default_user_id="anonymous",
|
||||
default_workspace="default",
|
||||
on_workspace_created=None):
|
||||
self.default_user_id = default_user_id
|
||||
self.default_workspace = default_workspace
|
||||
self._on_workspace_created = on_workspace_created
|
||||
|
||||
def _default_identity_response(self):
|
||||
return IamResponse(
|
||||
resolved_user_id=self.default_user_id,
|
||||
resolved_workspace=self.default_workspace,
|
||||
resolved_roles=["admin"],
|
||||
)
|
||||
|
||||
def _default_user_record(self):
|
||||
return UserRecord(
|
||||
id=self.default_user_id,
|
||||
workspace=self.default_workspace,
|
||||
username=self.default_user_id,
|
||||
name="Anonymous User",
|
||||
roles=["admin"],
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
async def handle(self, v):
|
||||
op = v.operation
|
||||
|
||||
try:
|
||||
if op == "authenticate-anonymous":
|
||||
return self._default_identity_response()
|
||||
|
||||
if op == "resolve-api-key":
|
||||
return self._default_identity_response()
|
||||
|
||||
if op == "authorise":
|
||||
return IamResponse(
|
||||
decision_allow=True,
|
||||
decision_ttl_seconds=3600,
|
||||
)
|
||||
|
||||
if op == "authorise-many":
|
||||
checks = json.loads(v.authorise_checks or "[]")
|
||||
decisions = [
|
||||
{"allow": True, "ttl": 3600}
|
||||
for _ in checks
|
||||
]
|
||||
return IamResponse(
|
||||
decisions_json=json.dumps(decisions),
|
||||
)
|
||||
|
||||
if op == "get-signing-key-public":
|
||||
return IamResponse(signing_key_public="")
|
||||
|
||||
if op == "bootstrap":
|
||||
return IamResponse()
|
||||
|
||||
if op == "bootstrap-status":
|
||||
return IamResponse(bootstrap_available=False)
|
||||
|
||||
if op == "whoami":
|
||||
return IamResponse(user=self._default_user_record())
|
||||
|
||||
if op == "login":
|
||||
return IamResponse()
|
||||
|
||||
if op in (
|
||||
"create-user", "get-user", "update-user",
|
||||
"disable-user", "enable-user",
|
||||
):
|
||||
return IamResponse(user=self._default_user_record())
|
||||
|
||||
if op == "list-users":
|
||||
return IamResponse(users=[self._default_user_record()])
|
||||
|
||||
if op == "delete-user":
|
||||
return IamResponse()
|
||||
|
||||
if op == "create-workspace":
|
||||
if self._on_workspace_created and v.workspace_record:
|
||||
await self._on_workspace_created(v.workspace_record.id)
|
||||
return IamResponse()
|
||||
|
||||
if op in (
|
||||
"get-workspace", "update-workspace",
|
||||
"disable-workspace",
|
||||
):
|
||||
return IamResponse()
|
||||
|
||||
if op == "list-workspaces":
|
||||
return IamResponse()
|
||||
|
||||
if op in ("create-api-key", "list-api-keys", "revoke-api-key"):
|
||||
return IamResponse()
|
||||
|
||||
if op in ("change-password", "reset-password"):
|
||||
return IamResponse()
|
||||
|
||||
if op == "rotate-signing-key":
|
||||
return IamResponse()
|
||||
|
||||
return _err(
|
||||
"invalid-argument",
|
||||
f"unknown operation: {op!r}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"no-auth {op} failed: {type(e).__name__}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return _err("internal-error", str(e))
|
||||
182
trustgraph-flow/trustgraph/iam/noauth/service.py
Normal file
182
trustgraph-flow/trustgraph/iam/noauth/service.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
"""
|
||||
No-auth IAM service. Drop-in replacement for iam-svc that permits
|
||||
all access unconditionally. No database, no bootstrap, no signing keys.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from trustgraph.schema import Error
|
||||
from trustgraph.schema import IamRequest, IamResponse
|
||||
from trustgraph.schema import iam_request_queue, iam_response_queue
|
||||
from trustgraph.schema import ConfigRequest, ConfigResponse, ConfigValue
|
||||
from trustgraph.schema import config_request_queue, config_response_queue
|
||||
|
||||
from trustgraph.base import AsyncProcessor, Consumer, Producer
|
||||
from trustgraph.base import ConsumerMetrics, ProducerMetrics
|
||||
from trustgraph.base.metrics import SubscriberMetrics
|
||||
from trustgraph.base.request_response_spec import RequestResponse
|
||||
|
||||
from . handler import NoAuthHandler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "no-auth-svc"
|
||||
|
||||
default_iam_request_queue = iam_request_queue
|
||||
default_iam_response_queue = iam_response_queue
|
||||
|
||||
|
||||
class Processor(AsyncProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
iam_req_q = params.get(
|
||||
"iam_request_queue", default_iam_request_queue,
|
||||
)
|
||||
iam_resp_q = params.get(
|
||||
"iam_response_queue", default_iam_response_queue,
|
||||
)
|
||||
|
||||
default_user_id = params.get("default_user_id", "anonymous")
|
||||
default_workspace = params.get("default_workspace", "default")
|
||||
|
||||
super().__init__(**params)
|
||||
|
||||
iam_request_metrics = ConsumerMetrics(
|
||||
processor=self.id, flow=None, name="iam-request",
|
||||
)
|
||||
iam_response_metrics = ProducerMetrics(
|
||||
processor=self.id, flow=None, name="iam-response",
|
||||
)
|
||||
|
||||
self.iam_request_topic = iam_req_q
|
||||
|
||||
self.iam_request_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=iam_req_q,
|
||||
subscriber=self.id,
|
||||
schema=IamRequest,
|
||||
handler=self.on_iam_request,
|
||||
metrics=iam_request_metrics,
|
||||
)
|
||||
|
||||
self.iam_response_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=iam_resp_q,
|
||||
schema=IamResponse,
|
||||
metrics=iam_response_metrics,
|
||||
)
|
||||
|
||||
self.handler = NoAuthHandler(
|
||||
default_user_id=default_user_id,
|
||||
default_workspace=default_workspace,
|
||||
on_workspace_created=self._ensure_workspace_registered,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"No-auth IAM service initialised "
|
||||
f"(user={default_user_id}, workspace={default_workspace})"
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
await self.pubsub.ensure_topic(self.iam_request_topic)
|
||||
await self.iam_request_consumer.start()
|
||||
|
||||
def _create_config_client(self):
|
||||
config_rr_id = str(uuid.uuid4())
|
||||
config_req_metrics = ProducerMetrics(
|
||||
processor=self.id, flow=None, name="config-request",
|
||||
)
|
||||
config_resp_metrics = SubscriberMetrics(
|
||||
processor=self.id, flow=None, name="config-response",
|
||||
)
|
||||
return RequestResponse(
|
||||
backend=self.pubsub,
|
||||
subscription=f"{self.id}--config--{config_rr_id}",
|
||||
consumer_name=self.id,
|
||||
request_topic=config_request_queue,
|
||||
request_schema=ConfigRequest,
|
||||
request_metrics=config_req_metrics,
|
||||
response_topic=config_response_queue,
|
||||
response_schema=ConfigResponse,
|
||||
response_metrics=config_resp_metrics,
|
||||
)
|
||||
|
||||
async def _ensure_workspace_registered(self, workspace_id):
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
await client.request(
|
||||
ConfigRequest(
|
||||
operation="put",
|
||||
workspace="__workspaces__",
|
||||
values=[ConfigValue(
|
||||
type="workspace", key=workspace_id,
|
||||
value='{"enabled": true}',
|
||||
)],
|
||||
),
|
||||
timeout=10,
|
||||
)
|
||||
finally:
|
||||
await client.stop()
|
||||
logger.info(
|
||||
f"Registered workspace in config: {workspace_id}"
|
||||
)
|
||||
|
||||
async def on_iam_request(self, msg, consumer, flow):
|
||||
|
||||
id = None
|
||||
try:
|
||||
v = msg.value()
|
||||
id = msg.properties()["id"]
|
||||
logger.debug(
|
||||
f"Handling IAM request {id} op={v.operation!r}"
|
||||
)
|
||||
resp = await self.handler.handle(v)
|
||||
await self.iam_response_producer.send(
|
||||
resp, properties={"id": id},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"IAM request failed: {type(e).__name__}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
resp = IamResponse(
|
||||
error=Error(type="internal-error", message=str(e)),
|
||||
)
|
||||
if id is not None:
|
||||
await self.iam_response_producer.send(
|
||||
resp, properties={"id": id},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
AsyncProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
"--iam-request-queue",
|
||||
default=default_iam_request_queue,
|
||||
help=f"IAM request queue (default: {default_iam_request_queue})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iam-response-queue",
|
||||
default=default_iam_response_queue,
|
||||
help=f"IAM response queue (default: {default_iam_response_queue})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--default-user-id",
|
||||
default="anonymous",
|
||||
help="User ID for all requests (default: anonymous)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--default-workspace",
|
||||
default="default",
|
||||
help="Workspace for all requests (default: default)",
|
||||
)
|
||||
|
||||
|
||||
def run():
|
||||
Processor.launch(default_ident, __doc__)
|
||||
|
|
@ -287,6 +287,9 @@ class IamService:
|
|||
op = v.operation
|
||||
|
||||
try:
|
||||
if op == "authenticate-anonymous":
|
||||
return _err("auth-failed", "anonymous access not permitted")
|
||||
|
||||
if op == "bootstrap":
|
||||
return await self.handle_bootstrap(v)
|
||||
if op == "bootstrap-status":
|
||||
|
|
@ -394,8 +397,8 @@ class IamService:
|
|||
|
||||
async def auto_bootstrap_if_token_mode(self):
|
||||
"""Called from the service processor at startup. In
|
||||
``token`` mode, if tables are empty, seeds the default
|
||||
workspace / admin / signing key using the operator-provided
|
||||
``token`` mode, if tables are empty, seeds the admin user,
|
||||
API key, and signing key using the operator-provided
|
||||
bootstrap token. The admin's API key plaintext is *the*
|
||||
``bootstrap_token`` — the operator already knows it, nothing
|
||||
needs to be returned or logged.
|
||||
|
|
@ -405,7 +408,7 @@ class IamService:
|
|||
if self.bootstrap_mode != "token":
|
||||
return
|
||||
|
||||
if await self.table_store.any_workspace_exists():
|
||||
if await self.table_store.any_signing_key_exists():
|
||||
logger.info(
|
||||
"IAM: token mode, tables already populated; skipping "
|
||||
"auto-bootstrap"
|
||||
|
|
@ -420,22 +423,13 @@ class IamService:
|
|||
|
||||
async def _seed_tables(self, api_key_plaintext):
|
||||
"""Shared seeding logic used by token-mode auto-bootstrap and
|
||||
bootstrap-mode handle_bootstrap. Creates the default
|
||||
workspace, admin user, admin API key (using the given
|
||||
plaintext), and an initial signing key. Returns the admin
|
||||
bootstrap-mode handle_bootstrap. Creates the admin user,
|
||||
admin API key (using the given plaintext), and an initial
|
||||
signing key. The workspace is created separately by the
|
||||
bootstrapper's WorkspaceInit initialiser. Returns the admin
|
||||
user id."""
|
||||
now = _now_dt()
|
||||
|
||||
await self.table_store.put_workspace(
|
||||
id=DEFAULT_WORKSPACE,
|
||||
name="Default",
|
||||
enabled=True,
|
||||
created=now,
|
||||
)
|
||||
|
||||
if self._on_workspace_created:
|
||||
await self._on_workspace_created(DEFAULT_WORKSPACE)
|
||||
|
||||
admin_user_id = str(uuid.uuid4())
|
||||
admin_password = secrets.token_urlsafe(32)
|
||||
await self.table_store.put_user(
|
||||
|
|
@ -488,7 +482,7 @@ class IamService:
|
|||
if self.bootstrap_mode != "bootstrap":
|
||||
return _err("auth-failed", "auth failure")
|
||||
|
||||
if await self.table_store.any_workspace_exists():
|
||||
if await self.table_store.any_signing_key_exists():
|
||||
return _err("auth-failed", "auth failure")
|
||||
|
||||
plaintext = _generate_api_key()
|
||||
|
|
@ -528,7 +522,7 @@ class IamService:
|
|||
instead of forcing callers to probe the masked-failure path."""
|
||||
available = (
|
||||
self.bootstrap_mode == "bootstrap"
|
||||
and not await self.table_store.any_workspace_exists()
|
||||
and not await self.table_store.any_signing_key_exists()
|
||||
)
|
||||
return IamResponse(bootstrap_available=available)
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,15 @@ class Processor(LlmService):
|
|||
|
||||
return resp
|
||||
|
||||
except RateLimitError:
|
||||
except RateLimitError as e:
|
||||
try:
|
||||
body = getattr(e, 'body', {})
|
||||
if isinstance(body, dict):
|
||||
code = body.get('error', {}).get('code')
|
||||
if code in ('insufficient_quota', 'invalid_api_key', 'account_deactivated'):
|
||||
raise RuntimeError(f"OpenAI unrecoverable error: {code} - {body['error'].get('message', '')}")
|
||||
except (ValueError, KeyError, TypeError, AttributeError):
|
||||
pass
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
|
||||
|
|
@ -188,7 +196,16 @@ class Processor(LlmService):
|
|||
|
||||
logger.debug("Streaming complete")
|
||||
|
||||
except RateLimitError:
|
||||
except RateLimitError as e:
|
||||
try:
|
||||
body = getattr(e, 'body', {})
|
||||
if isinstance(body, dict):
|
||||
code = body.get('error', {}).get('code')
|
||||
if code in ('insufficient_quota', 'invalid_api_key', 'account_deactivated'):
|
||||
logger.warning(f"Hit unrecoverable rate limit error during streaming: {code}")
|
||||
raise RuntimeError(f"OpenAI unrecoverable error: {code} - {body['error'].get('message', '')}")
|
||||
except (ValueError, KeyError, TypeError, AttributeError):
|
||||
pass
|
||||
logger.warning("Hit rate limit during streaming")
|
||||
raise TooManyRequests()
|
||||
|
||||
|
|
|
|||
|
|
@ -4,11 +4,10 @@ Document embeddings query service. Input is vector, output is an array
|
|||
of chunk_ids
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
|
||||
from .... schema import DocumentEmbeddingsResponse, ChunkMatch
|
||||
from .... schema import Error
|
||||
|
|
@ -38,32 +37,6 @@ class Processor(DocumentEmbeddingsQueryService):
|
|||
)
|
||||
|
||||
self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
|
||||
self.last_collection = None
|
||||
|
||||
def ensure_collection_exists(self, collection, dim):
|
||||
"""Ensure collection exists, create if it doesn't"""
|
||||
if collection != self.last_collection:
|
||||
if not self.qdrant.collection_exists(collection):
|
||||
try:
|
||||
self.qdrant.create_collection(
|
||||
collection_name=collection,
|
||||
vectors_config=VectorParams(
|
||||
size=dim, distance=Distance.COSINE
|
||||
),
|
||||
)
|
||||
logger.info(f"Created collection: {collection}")
|
||||
except Exception as e:
|
||||
logger.error(f"Qdrant collection creation failed: {e}")
|
||||
raise e
|
||||
self.last_collection = collection
|
||||
|
||||
def collection_exists(self, collection):
|
||||
"""Check if collection exists (no implicit creation)"""
|
||||
return self.qdrant.collection_exists(collection)
|
||||
|
||||
def collection_exists(self, collection):
|
||||
"""Check if collection exists (no implicit creation)"""
|
||||
return self.qdrant.collection_exists(collection)
|
||||
|
||||
async def query_document_embeddings(self, workspace, msg):
|
||||
|
||||
|
|
@ -73,21 +46,24 @@ class Processor(DocumentEmbeddingsQueryService):
|
|||
if not vec:
|
||||
return []
|
||||
|
||||
# Use dimension suffix in collection name
|
||||
dim = len(vec)
|
||||
collection = f"d_{workspace}_{msg.collection}_{dim}"
|
||||
|
||||
# Check if collection exists - return empty if not
|
||||
if not self.collection_exists(collection):
|
||||
exists = await asyncio.to_thread(
|
||||
self.qdrant.collection_exists, collection
|
||||
)
|
||||
if not exists:
|
||||
logger.info(f"Collection {collection} does not exist, returning empty results")
|
||||
return []
|
||||
|
||||
search_result = self.qdrant.query_points(
|
||||
result = await asyncio.to_thread(
|
||||
self.qdrant.query_points,
|
||||
collection_name=collection,
|
||||
query=vec,
|
||||
limit=msg.limit,
|
||||
with_payload=True,
|
||||
).points
|
||||
)
|
||||
search_result = result.points
|
||||
|
||||
chunks = []
|
||||
for r in search_result:
|
||||
|
|
|
|||
|
|
@ -4,11 +4,10 @@ Graph embeddings query service. Input is vector, output is list of
|
|||
entities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
|
||||
from .... schema import GraphEmbeddingsResponse, EntityMatch
|
||||
from .... schema import Error, Term, IRI, LITERAL
|
||||
|
|
@ -38,32 +37,6 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
)
|
||||
|
||||
self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
|
||||
self.last_collection = None
|
||||
|
||||
def ensure_collection_exists(self, collection, dim):
|
||||
"""Ensure collection exists, create if it doesn't"""
|
||||
if collection != self.last_collection:
|
||||
if not self.qdrant.collection_exists(collection):
|
||||
try:
|
||||
self.qdrant.create_collection(
|
||||
collection_name=collection,
|
||||
vectors_config=VectorParams(
|
||||
size=dim, distance=Distance.COSINE
|
||||
),
|
||||
)
|
||||
logger.info(f"Created collection: {collection}")
|
||||
except Exception as e:
|
||||
logger.error(f"Qdrant collection creation failed: {e}")
|
||||
raise e
|
||||
self.last_collection = collection
|
||||
|
||||
def collection_exists(self, collection):
|
||||
"""Check if collection exists (no implicit creation)"""
|
||||
return self.qdrant.collection_exists(collection)
|
||||
|
||||
def collection_exists(self, collection):
|
||||
"""Check if collection exists (no implicit creation)"""
|
||||
return self.qdrant.collection_exists(collection)
|
||||
|
||||
def create_value(self, ent):
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
|
|
@ -79,23 +52,26 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
if not vec:
|
||||
return []
|
||||
|
||||
# Use dimension suffix in collection name
|
||||
dim = len(vec)
|
||||
collection = f"t_{workspace}_{msg.collection}_{dim}"
|
||||
|
||||
# Check if collection exists - return empty if not
|
||||
if not self.collection_exists(collection):
|
||||
exists = await asyncio.to_thread(
|
||||
self.qdrant.collection_exists, collection
|
||||
)
|
||||
if not exists:
|
||||
logger.info(f"Collection {collection} does not exist")
|
||||
return []
|
||||
|
||||
# Heuristic hack, get (2*limit), so that we have more chance
|
||||
# of getting (limit) unique entities
|
||||
search_result = self.qdrant.query_points(
|
||||
result = await asyncio.to_thread(
|
||||
self.qdrant.query_points,
|
||||
collection_name=collection,
|
||||
query=vec,
|
||||
limit=msg.limit * 2,
|
||||
with_payload=True,
|
||||
).points
|
||||
)
|
||||
search_result = result.points
|
||||
|
||||
entity_set = set()
|
||||
entities = []
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Provides semantic query understanding, ontology matching, and answer generation.
|
|||
|
||||
from .query_service import OntoRAGQueryService, QueryRequest, QueryResponse
|
||||
from .question_analyzer import QuestionAnalyzer, QuestionComponents, QuestionType
|
||||
from .ontology_matcher import OntologyMatcher, QueryOntologySubset
|
||||
from .ontology_matcher import OntologyMatcherForQueries, QueryOntologySubset
|
||||
from .backend_router import BackendRouter, BackendType, QueryRoute
|
||||
from .sparql_generator import SPARQLGenerator, SPARQLQuery
|
||||
from .sparql_cassandra import SPARQLCassandraEngine, SPARQLResult
|
||||
|
|
@ -27,7 +27,7 @@ __all__ = [
|
|||
'QuestionType',
|
||||
|
||||
# Ontology matching
|
||||
'OntologyMatcher',
|
||||
'OntologyMatcherForQueries',
|
||||
'QueryOntologySubset',
|
||||
|
||||
# Backend routing
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ Provides comprehensive monitoring of system performance, query patterns, and res
|
|||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import asyncio
|
||||
import inspect
|
||||
|
|
@ -276,6 +277,26 @@ class MetricsCollector:
|
|||
return f"{name}{{{label_str}}}"
|
||||
|
||||
|
||||
def _extract_metric_label(metric_name: str, label: str) -> Optional[str]:
|
||||
"""Extract a label value from an internal metric key."""
|
||||
labels_start = metric_name.find('{')
|
||||
labels_end = metric_name.find('}', labels_start + 1)
|
||||
|
||||
if labels_start == -1 or labels_end == -1:
|
||||
return None
|
||||
|
||||
labels = metric_name[labels_start + 1:labels_end]
|
||||
label_match = re.search(
|
||||
rf'(?:^|,){re.escape(label)}=(?:"([^"]*)"|([^,]*))',
|
||||
labels,
|
||||
)
|
||||
if not label_match:
|
||||
return None
|
||||
|
||||
quoted_value, unquoted_value = label_match.groups()
|
||||
return quoted_value if quoted_value is not None else unquoted_value
|
||||
|
||||
|
||||
class PerformanceMonitor:
|
||||
"""Monitors system performance and component health."""
|
||||
|
||||
|
|
@ -474,8 +495,8 @@ class PerformanceMonitor:
|
|||
# Cache performance
|
||||
cache_types = set()
|
||||
for metric_name in self.metrics_collector.counters.keys():
|
||||
if 'cache_type=' in metric_name:
|
||||
cache_type = metric_name.split('cache_type=')[1].split(',')[0].split('}')[0]
|
||||
cache_type = _extract_metric_label(metric_name, 'cache_type')
|
||||
if cache_type is not None:
|
||||
cache_types.add(cache_type)
|
||||
|
||||
for cache_type in cache_types:
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@ import logging
|
|||
from typing import List, Dict, Any, Set, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...extract.kg.ontology.ontology_loader import Ontology, OntologyLoader
|
||||
from ...extract.kg.ontology.ontology_embedder import OntologyEmbedder
|
||||
from ...extract.kg.ontology.text_processor import TextSegment
|
||||
from ...extract.kg.ontology.ontology_selector import OntologySelector, OntologySubset
|
||||
from trustgraph.extract.kg.ontology.ontology_loader import Ontology, OntologyLoader
|
||||
from trustgraph.extract.kg.ontology.ontology_embedder import OntologyEmbedder
|
||||
from trustgraph.extract.kg.ontology.text_processor import TextSegment
|
||||
from trustgraph.extract.kg.ontology.ontology_selector import OntologySelector, OntologySubset
|
||||
from .question_analyzer import QuestionComponents, QuestionType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -8,13 +8,13 @@ from typing import Dict, Any, List, Optional, Union
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
from ....flow.flow_processor import FlowProcessor
|
||||
from ....tables.config import ConfigTableStore
|
||||
from ...extract.kg.ontology.ontology_loader import OntologyLoader
|
||||
from ...extract.kg.ontology.vector_store import InMemoryVectorStore
|
||||
from trustgraph.base.flow_processor import FlowProcessor
|
||||
from trustgraph.tables.config import ConfigTableStore
|
||||
from trustgraph.extract.kg.ontology.ontology_loader import OntologyLoader
|
||||
from trustgraph.extract.kg.ontology.vector_store import InMemoryVectorStore
|
||||
|
||||
from .question_analyzer import QuestionAnalyzer, QuestionComponents
|
||||
from .ontology_matcher import OntologyMatcher, QueryOntologySubset
|
||||
from .ontology_matcher import OntologyMatcherForQueries, QueryOntologySubset
|
||||
from .backend_router import BackendRouter, QueryRoute, BackendType
|
||||
from .sparql_generator import SPARQLGenerator, SPARQLQuery
|
||||
from .sparql_cassandra import SPARQLCassandraEngine, SPARQLResult
|
||||
|
|
@ -105,7 +105,7 @@ class OntoRAGQueryService(FlowProcessor):
|
|||
|
||||
# Initialize ontology matcher
|
||||
matcher_config = self.config.get('ontology_matcher', {})
|
||||
self.ontology_matcher = OntologyMatcher(
|
||||
self.ontology_matcher = OntologyMatcherForQueries(
|
||||
vector_store=self.vector_store,
|
||||
embedding_service=self.embedding_service,
|
||||
config=matcher_config
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ try:
|
|||
except ImportError:
|
||||
CASSANDRA_AVAILABLE = False
|
||||
|
||||
from ....tables.config import ConfigTableStore
|
||||
from trustgraph.tables.config import ConfigTableStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -202,11 +202,14 @@ ASK {{
|
|||
|
||||
if response and isinstance(response, dict):
|
||||
query = response.get('query', '').strip()
|
||||
if query.upper().startswith(('SELECT', 'ASK', 'CONSTRUCT', 'DESCRIBE')):
|
||||
parts = query.split()
|
||||
if parts and parts[0].upper() in (
|
||||
'SELECT', 'ASK', 'CONSTRUCT', 'DESCRIBE',
|
||||
):
|
||||
return SPARQLQuery(
|
||||
query=query,
|
||||
variables=self._extract_variables(query),
|
||||
query_type=query.split()[0].upper(),
|
||||
query_type=parts[0].upper(),
|
||||
explanation=response.get('explanation', 'Generated by LLM'),
|
||||
complexity_score=self._calculate_complexity(query)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Output is matching row index information (index_name, index_value) for
|
|||
use in subsequent Cassandra lookups.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
|
@ -70,7 +71,7 @@ class Processor(FlowProcessor):
|
|||
safe_name = 'r_' + safe_name
|
||||
return safe_name.lower()
|
||||
|
||||
def find_collection(self, workspace: str, collection: str, schema_name: str) -> Optional[str]:
|
||||
async def find_collection(self, workspace: str, collection: str, schema_name: str) -> Optional[str]:
|
||||
"""Find the Qdrant collection for a given workspace/collection/schema"""
|
||||
prefix = (
|
||||
f"rows_{self.sanitize_name(workspace)}_"
|
||||
|
|
@ -78,14 +79,15 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
|
||||
try:
|
||||
all_collections = self.qdrant.get_collections().collections
|
||||
all_collections = await asyncio.to_thread(
|
||||
lambda: self.qdrant.get_collections().collections
|
||||
)
|
||||
matching = [
|
||||
coll.name for coll in all_collections
|
||||
if coll.name.startswith(prefix)
|
||||
]
|
||||
|
||||
if matching:
|
||||
# Return first match (there should typically be only one per dimension)
|
||||
return matching[0]
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -100,8 +102,7 @@ class Processor(FlowProcessor):
|
|||
if not vec:
|
||||
return []
|
||||
|
||||
# Find the collection for this workspace/collection/schema
|
||||
qdrant_collection = self.find_collection(
|
||||
qdrant_collection = await self.find_collection(
|
||||
workspace, request.collection, request.schema_name
|
||||
)
|
||||
|
||||
|
|
@ -113,7 +114,6 @@ class Processor(FlowProcessor):
|
|||
return []
|
||||
|
||||
try:
|
||||
# Build optional filter for index_name
|
||||
query_filter = None
|
||||
if request.index_name:
|
||||
query_filter = Filter(
|
||||
|
|
@ -125,16 +125,16 @@ class Processor(FlowProcessor):
|
|||
]
|
||||
)
|
||||
|
||||
# Query Qdrant
|
||||
search_result = self.qdrant.query_points(
|
||||
result = await asyncio.to_thread(
|
||||
self.qdrant.query_points,
|
||||
collection_name=qdrant_collection,
|
||||
query=vec,
|
||||
limit=request.limit,
|
||||
with_payload=True,
|
||||
query_filter=query_filter,
|
||||
).points
|
||||
)
|
||||
search_result = result.points
|
||||
|
||||
# Convert to RowIndexMatch objects
|
||||
matches = []
|
||||
for point in search_result:
|
||||
payload = point.payload or {}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ Queries against the unified 'rows' table with schema:
|
|||
- source: text
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
|
@ -97,34 +98,38 @@ class Processor(FlowProcessor):
|
|||
# Cassandra session
|
||||
self.cluster = None
|
||||
self.session = None
|
||||
self._setup_lock = asyncio.Lock()
|
||||
|
||||
# Known keyspaces
|
||||
self.known_keyspaces: Set[str] = set()
|
||||
|
||||
def connect_cassandra(self):
|
||||
async def connect_cassandra(self):
|
||||
"""Connect to Cassandra cluster"""
|
||||
if self.session:
|
||||
return
|
||||
async with self._setup_lock:
|
||||
if self.session:
|
||||
return
|
||||
|
||||
try:
|
||||
if self.cassandra_username and self.cassandra_password:
|
||||
auth_provider = PlainTextAuthProvider(
|
||||
username=self.cassandra_username,
|
||||
password=self.cassandra_password
|
||||
)
|
||||
self.cluster = Cluster(
|
||||
contact_points=self.cassandra_host,
|
||||
auth_provider=auth_provider
|
||||
)
|
||||
else:
|
||||
self.cluster = Cluster(contact_points=self.cassandra_host)
|
||||
try:
|
||||
if self.cassandra_username and self.cassandra_password:
|
||||
auth_provider = PlainTextAuthProvider(
|
||||
username=self.cassandra_username,
|
||||
password=self.cassandra_password
|
||||
)
|
||||
cluster = Cluster(
|
||||
contact_points=self.cassandra_host,
|
||||
auth_provider=auth_provider
|
||||
)
|
||||
else:
|
||||
cluster = Cluster(contact_points=self.cassandra_host)
|
||||
|
||||
self.session = self.cluster.connect()
|
||||
logger.info(f"Connected to Cassandra cluster at {self.cassandra_host}")
|
||||
session = await asyncio.to_thread(cluster.connect)
|
||||
self.cluster = cluster
|
||||
self.session = session
|
||||
logger.info(f"Connected to Cassandra cluster at {self.cassandra_host}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Cassandra: {e}", exc_info=True)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Cassandra: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def sanitize_name(self, name: str) -> str:
|
||||
"""Sanitize names for Cassandra compatibility"""
|
||||
|
|
@ -140,14 +145,17 @@ class Processor(FlowProcessor):
|
|||
f"for workspace {workspace}"
|
||||
)
|
||||
|
||||
# Replace existing schemas for this workspace
|
||||
async with self._setup_lock:
|
||||
await self._apply_schema_config(workspace, config)
|
||||
|
||||
async def _apply_schema_config(self, workspace, config):
|
||||
|
||||
ws_schemas: Dict[str, RowSchema] = {}
|
||||
self.schemas[workspace] = ws_schemas
|
||||
|
||||
builder = GraphQLSchemaBuilder()
|
||||
self.schema_builders[workspace] = builder
|
||||
|
||||
# Check if our config type exists
|
||||
if self.config_key not in config:
|
||||
logger.warning(
|
||||
f"No '{self.config_key}' type in configuration "
|
||||
|
|
@ -156,16 +164,12 @@ class Processor(FlowProcessor):
|
|||
self.graphql_schemas[workspace] = None
|
||||
return
|
||||
|
||||
# Get the schemas dictionary for our type
|
||||
schemas_config = config[self.config_key]
|
||||
|
||||
# Process each schema in the schemas config
|
||||
for schema_name, schema_json in schemas_config.items():
|
||||
try:
|
||||
# Parse the JSON schema definition
|
||||
schema_def = json.loads(schema_json)
|
||||
|
||||
# Create Field objects
|
||||
fields = []
|
||||
for field_def in schema_def.get("fields", []):
|
||||
field = SchemaField(
|
||||
|
|
@ -180,7 +184,6 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
fields.append(field)
|
||||
|
||||
# Create RowSchema
|
||||
row_schema = RowSchema(
|
||||
name=schema_def.get("name", schema_name),
|
||||
description=schema_def.get("description", ""),
|
||||
|
|
@ -202,7 +205,6 @@ class Processor(FlowProcessor):
|
|||
f"{len(ws_schemas)} schemas"
|
||||
)
|
||||
|
||||
# Regenerate GraphQL schema for this workspace
|
||||
self.graphql_schemas[workspace] = builder.build(self.query_cassandra)
|
||||
|
||||
def get_index_names(self, schema: RowSchema) -> List[str]:
|
||||
|
|
@ -254,7 +256,7 @@ class Processor(FlowProcessor):
|
|||
For other queries, we need to scan and post-filter.
|
||||
"""
|
||||
# Connect if needed
|
||||
self.connect_cassandra()
|
||||
await self.connect_cassandra()
|
||||
|
||||
safe_keyspace = self.sanitize_name(workspace)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ SPARQL algebra evaluator.
|
|||
Recursively evaluates an rdflib SPARQL algebra tree by issuing triple
|
||||
pattern queries via TriplesClient (streaming) and performing in-memory
|
||||
joins, filters, and projections.
|
||||
|
||||
Handlers are async generators that yield solutions incrementally.
|
||||
Blocking operators (joins, sort, group, distinct) materialise their
|
||||
upstream into a list at the boundary, then yield results.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
|
@ -17,7 +21,7 @@ from ... knowledge import Uri
|
|||
from ... knowledge import Literal as KgLiteral
|
||||
from . parser import rdflib_term_to_term
|
||||
from . solutions import (
|
||||
hash_join, left_join, union, project, distinct,
|
||||
hash_join, left_join, minus, union, project, distinct,
|
||||
order_by, slice_solutions, _term_key,
|
||||
)
|
||||
from . expressions import evaluate_expression, _effective_boolean
|
||||
|
|
@ -30,61 +34,60 @@ class EvaluationError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
async def evaluate(node, triples_client, workspace, collection, limit=10000):
|
||||
async def evaluate(node, triples_client, collection, limit=10000):
|
||||
"""
|
||||
Evaluate a SPARQL algebra node.
|
||||
|
||||
Args:
|
||||
node: rdflib CompValue algebra node
|
||||
triples_client: TriplesClient instance for triple pattern queries
|
||||
workspace: workspace/keyspace identifier
|
||||
collection: collection identifier
|
||||
limit: safety limit on results
|
||||
|
||||
Returns:
|
||||
list of solutions (dicts mapping variable names to Term values)
|
||||
Yields solutions (dicts mapping variable names to Term values)
|
||||
incrementally as an async generator.
|
||||
"""
|
||||
if not isinstance(node, CompValue):
|
||||
logger.warning(f"Expected CompValue, got {type(node)}: {node}")
|
||||
return [{}]
|
||||
yield {}
|
||||
return
|
||||
|
||||
name = node.name
|
||||
handler = _HANDLERS.get(name)
|
||||
|
||||
if handler is None:
|
||||
logger.warning(f"Unsupported algebra node: {name}")
|
||||
return [{}]
|
||||
yield {}
|
||||
return
|
||||
|
||||
return await handler(node, triples_client, workspace, collection, limit)
|
||||
async for sol in handler(node, triples_client, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
# --- Node handlers ---
|
||||
|
||||
async def _eval_select_query(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a SelectQuery node."""
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
async def materialise(node, triples_client, collection, limit=10000):
|
||||
"""Collect all solutions from evaluate() into a list."""
|
||||
return [sol async for sol in evaluate(node, triples_client, collection, limit)]
|
||||
|
||||
|
||||
async def _eval_project(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Project node (SELECT variable projection)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
# --- Node handlers (async generators) ---
|
||||
|
||||
async def _eval_select_query(node, tc, collection, limit):
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_project(node, tc, collection, limit):
|
||||
variables = [str(v) for v in node.PV]
|
||||
return project(solutions, variables)
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
yield {v: sol[v] for v in variables if v in sol}
|
||||
|
||||
|
||||
async def _eval_bgp(node, tc, workspace, collection, limit):
|
||||
async def _eval_bgp(node, tc, collection, limit):
|
||||
"""
|
||||
Evaluate a Basic Graph Pattern.
|
||||
|
||||
Issues streaming triple pattern queries and joins results. Patterns
|
||||
are ordered by selectivity (more bound terms first) and evaluated
|
||||
sequentially with bound-variable substitution.
|
||||
Patterns are ordered by selectivity and evaluated sequentially.
|
||||
For the final pattern, results stream directly from the triple store.
|
||||
"""
|
||||
triples = node.triples
|
||||
if not triples:
|
||||
return [{}]
|
||||
yield {}
|
||||
return
|
||||
|
||||
# Sort patterns by selectivity: more bound terms = more selective
|
||||
def selectivity(pattern):
|
||||
return sum(1 for t in pattern if not isinstance(t, Variable))
|
||||
|
||||
|
|
@ -92,55 +95,222 @@ async def _eval_bgp(node, tc, workspace, collection, limit):
|
|||
enumerate(triples), key=lambda x: -selectivity(x[1])
|
||||
)
|
||||
|
||||
# For all patterns except the last, we must materialise intermediate
|
||||
# solutions because each pattern depends on bindings from prior ones.
|
||||
# The last pattern streams directly.
|
||||
solutions = [{}]
|
||||
|
||||
for _, pattern in sorted_patterns:
|
||||
for pattern_idx, (_, pattern) in enumerate(sorted_patterns):
|
||||
s_tmpl, p_tmpl, o_tmpl = pattern
|
||||
is_last = (pattern_idx == len(sorted_patterns) - 1)
|
||||
|
||||
new_solutions = []
|
||||
if is_last:
|
||||
# Stream the final pattern — yield as triples arrive
|
||||
count = 0
|
||||
for sol in solutions:
|
||||
s_val = _resolve_term(s_tmpl, sol)
|
||||
p_val = _resolve_term(p_tmpl, sol)
|
||||
o_val = _resolve_term(o_tmpl, sol)
|
||||
|
||||
for sol in solutions:
|
||||
# Substitute known bindings into the pattern
|
||||
s_val = _resolve_term(s_tmpl, sol)
|
||||
p_val = _resolve_term(p_tmpl, sol)
|
||||
o_val = _resolve_term(o_tmpl, sol)
|
||||
async for triple in tc.query_gen(
|
||||
s=s_val, p=p_val, o=o_val,
|
||||
limit=limit, collection=collection,
|
||||
):
|
||||
binding = dict(sol)
|
||||
if isinstance(s_tmpl, Variable):
|
||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||
if isinstance(p_tmpl, Variable):
|
||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||
if isinstance(o_tmpl, Variable):
|
||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||
yield binding
|
||||
count += 1
|
||||
if count >= limit:
|
||||
return
|
||||
else:
|
||||
# Materialise intermediate patterns
|
||||
new_solutions = []
|
||||
for sol in solutions:
|
||||
s_val = _resolve_term(s_tmpl, sol)
|
||||
p_val = _resolve_term(p_tmpl, sol)
|
||||
o_val = _resolve_term(o_tmpl, sol)
|
||||
|
||||
# Query the triples store
|
||||
results = await _query_pattern(
|
||||
tc, s_val, p_val, o_val, workspace, collection, limit
|
||||
)
|
||||
async for triple in tc.query_gen(
|
||||
s=s_val, p=p_val, o=o_val,
|
||||
limit=limit, collection=collection,
|
||||
):
|
||||
binding = dict(sol)
|
||||
if isinstance(s_tmpl, Variable):
|
||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||
if isinstance(p_tmpl, Variable):
|
||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||
if isinstance(o_tmpl, Variable):
|
||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||
new_solutions.append(binding)
|
||||
|
||||
# Map results back to variable bindings,
|
||||
# converting Uri/Literal to Term objects
|
||||
for triple in results:
|
||||
binding = dict(sol)
|
||||
if isinstance(s_tmpl, Variable):
|
||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||
if isinstance(p_tmpl, Variable):
|
||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||
if isinstance(o_tmpl, Variable):
|
||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||
new_solutions.append(binding)
|
||||
|
||||
solutions = new_solutions
|
||||
|
||||
if not solutions:
|
||||
break
|
||||
|
||||
return solutions[:limit]
|
||||
solutions = new_solutions
|
||||
if not solutions:
|
||||
return
|
||||
|
||||
|
||||
async def _eval_join(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Join node."""
|
||||
left = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
return hash_join(left, right)[:limit]
|
||||
# --- Blocking operators: materialise upstream, then yield ---
|
||||
|
||||
def _is_small_node(node):
|
||||
"""Check if a node is likely to produce a small number of solutions."""
|
||||
if not isinstance(node, CompValue):
|
||||
return False
|
||||
if node.name in ("values", "ToMultiSet"):
|
||||
return True
|
||||
if node.name == "Extend" and hasattr(node, "p"):
|
||||
return _is_small_node(node.p)
|
||||
return False
|
||||
|
||||
|
||||
async def _eval_left_join(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a LeftJoin node (OPTIONAL)."""
|
||||
left_sols = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right_sols = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
async def _eval_join(node, tc, collection, limit):
|
||||
# Bind join: if one side is small (e.g. VALUES), materialise it and
|
||||
# substitute its bindings into the other side's evaluation. This
|
||||
# turns wildcard BGP queries into selective ones.
|
||||
if _is_small_node(node.p1):
|
||||
yield_from = _bind_join(node.p1, node.p2, tc, collection, limit)
|
||||
elif _is_small_node(node.p2):
|
||||
yield_from = _bind_join(node.p2, node.p1, tc, collection, limit)
|
||||
else:
|
||||
yield_from = _hash_join(node, tc, collection, limit)
|
||||
|
||||
async for sol in yield_from:
|
||||
yield sol
|
||||
|
||||
|
||||
async def _hash_join(node, tc, collection, limit):
|
||||
left = await materialise(node.p1, tc, collection, limit)
|
||||
right = await materialise(node.p2, tc, collection, limit)
|
||||
for sol in hash_join(left, right)[:limit]:
|
||||
yield sol
|
||||
|
||||
|
||||
async def _bind_join(small_node, big_node, tc, collection, limit):
|
||||
"""Iterate over the small side and inject bindings into the big side."""
|
||||
small_sols = await materialise(small_node, tc, collection, limit)
|
||||
|
||||
count = 0
|
||||
for binding in small_sols:
|
||||
async for sol in _evaluate_with_bindings(
|
||||
big_node, binding, tc, collection, limit
|
||||
):
|
||||
yield sol
|
||||
count += 1
|
||||
if count >= limit:
|
||||
return
|
||||
|
||||
|
||||
def _merge_compatible(left, right):
|
||||
"""Merge two solutions if compatible (shared vars have equal values)."""
|
||||
merged = dict(left)
|
||||
for k, v in right.items():
|
||||
if k in merged:
|
||||
if _term_key(merged[k]) != _term_key(v):
|
||||
return None
|
||||
else:
|
||||
merged[k] = v
|
||||
return merged
|
||||
|
||||
|
||||
async def _evaluate_with_bindings(node, bindings, tc, collection, limit):
|
||||
"""Evaluate a node with pre-seeded variable bindings.
|
||||
|
||||
For BGP nodes, the bindings are injected so _resolve_term sees them,
|
||||
turning wildcard queries into selective ones. For other node types,
|
||||
evaluate normally and merge/filter against the bindings.
|
||||
"""
|
||||
if isinstance(node, CompValue) and node.name == "BGP":
|
||||
async for sol in _eval_bgp_with_bindings(
|
||||
node, bindings, tc, collection, limit
|
||||
):
|
||||
yield sol
|
||||
else:
|
||||
async for sol in evaluate(node, tc, collection, limit):
|
||||
merged = _merge_compatible(bindings, sol)
|
||||
if merged is not None:
|
||||
yield merged
|
||||
|
||||
|
||||
async def _eval_bgp_with_bindings(node, bindings, tc, collection, limit):
|
||||
"""Evaluate a BGP with pre-seeded bindings so variables resolve to terms."""
|
||||
triples = node.triples
|
||||
if not triples:
|
||||
yield dict(bindings)
|
||||
return
|
||||
|
||||
def selectivity(pattern):
|
||||
score = 0
|
||||
for t in pattern:
|
||||
if not isinstance(t, Variable):
|
||||
score += 1
|
||||
elif str(t) in bindings:
|
||||
score += 1
|
||||
return score
|
||||
|
||||
sorted_patterns = sorted(
|
||||
enumerate(triples), key=lambda x: -selectivity(x[1])
|
||||
)
|
||||
|
||||
solutions = [dict(bindings)]
|
||||
|
||||
for pattern_idx, (_, pattern) in enumerate(sorted_patterns):
|
||||
s_tmpl, p_tmpl, o_tmpl = pattern
|
||||
is_last = (pattern_idx == len(sorted_patterns) - 1)
|
||||
|
||||
if is_last:
|
||||
count = 0
|
||||
for sol in solutions:
|
||||
s_val = _resolve_term(s_tmpl, sol)
|
||||
p_val = _resolve_term(p_tmpl, sol)
|
||||
o_val = _resolve_term(o_tmpl, sol)
|
||||
|
||||
async for triple in tc.query_gen(
|
||||
s=s_val, p=p_val, o=o_val,
|
||||
limit=limit, collection=collection,
|
||||
):
|
||||
binding = dict(sol)
|
||||
if isinstance(s_tmpl, Variable):
|
||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||
if isinstance(p_tmpl, Variable):
|
||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||
if isinstance(o_tmpl, Variable):
|
||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||
yield binding
|
||||
count += 1
|
||||
if count >= limit:
|
||||
return
|
||||
else:
|
||||
new_solutions = []
|
||||
for sol in solutions:
|
||||
s_val = _resolve_term(s_tmpl, sol)
|
||||
p_val = _resolve_term(p_tmpl, sol)
|
||||
o_val = _resolve_term(o_tmpl, sol)
|
||||
|
||||
async for triple in tc.query_gen(
|
||||
s=s_val, p=p_val, o=o_val,
|
||||
limit=limit, collection=collection,
|
||||
):
|
||||
binding = dict(sol)
|
||||
if isinstance(s_tmpl, Variable):
|
||||
binding[str(s_tmpl)] = _to_term(triple.s)
|
||||
if isinstance(p_tmpl, Variable):
|
||||
binding[str(p_tmpl)] = _to_term(triple.p)
|
||||
if isinstance(o_tmpl, Variable):
|
||||
binding[str(o_tmpl)] = _to_term(triple.o)
|
||||
new_solutions.append(binding)
|
||||
|
||||
solutions = new_solutions
|
||||
if not solutions:
|
||||
return
|
||||
|
||||
|
||||
async def _eval_left_join(node, tc, collection, limit):
|
||||
# Buffer right side for hash index; stream left through probe
|
||||
left_sols = await materialise(node.p1, tc, collection, limit)
|
||||
right_sols = await materialise(node.p2, tc, collection, limit)
|
||||
|
||||
filter_fn = None
|
||||
if hasattr(node, "expr") and node.expr is not None:
|
||||
|
|
@ -150,42 +320,35 @@ async def _eval_left_join(node, tc, workspace, collection, limit):
|
|||
evaluate_expression(expr, sol)
|
||||
)
|
||||
|
||||
return left_join(left_sols, right_sols, filter_fn)[:limit]
|
||||
for sol in left_join(left_sols, right_sols, filter_fn)[:limit]:
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_union(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Union node."""
|
||||
left = await evaluate(node.p1, tc, workspace, collection, limit)
|
||||
right = await evaluate(node.p2, tc, workspace, collection, limit)
|
||||
return union(left, right)[:limit]
|
||||
async def _eval_minus(node, tc, collection, limit):
|
||||
left = await materialise(node.p1, tc, collection, limit)
|
||||
right = await materialise(node.p2, tc, collection, limit)
|
||||
for sol in minus(left, right):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_filter(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Filter node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
expr = node.expr
|
||||
return [
|
||||
sol for sol in solutions
|
||||
if _effective_boolean(evaluate_expression(expr, sol))
|
||||
]
|
||||
async def _eval_distinct(node, tc, collection, limit):
|
||||
seen = set()
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
key = tuple(sorted(
|
||||
(k, _term_key(v)) for k, v in sol.items()
|
||||
))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_distinct(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Distinct node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return distinct(solutions)
|
||||
async def _eval_reduced(node, tc, collection, limit):
|
||||
async for sol in _eval_distinct(node, tc, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_reduced(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Reduced node (like Distinct but implementation-defined)."""
|
||||
# Treat same as Distinct
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
return distinct(solutions)
|
||||
|
||||
|
||||
async def _eval_order_by(node, tc, workspace, collection, limit):
|
||||
"""Evaluate an OrderBy node."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
async def _eval_order_by(node, tc, collection, limit):
|
||||
solutions = await materialise(node.p, tc, collection, limit)
|
||||
|
||||
key_fns = []
|
||||
for cond in node.expr:
|
||||
|
|
@ -197,36 +360,104 @@ async def _eval_order_by(node, tc, workspace, collection, limit):
|
|||
ascending,
|
||||
))
|
||||
else:
|
||||
# Simple variable or expression
|
||||
key_fns.append((
|
||||
lambda sol, e=cond: evaluate_expression(e, sol),
|
||||
True,
|
||||
))
|
||||
|
||||
return order_by(solutions, key_fns)
|
||||
for sol in order_by(solutions, key_fns):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_slice(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Slice node (LIMIT/OFFSET)."""
|
||||
# Pass tighter limit downstream if possible
|
||||
inner_limit = limit
|
||||
if node.length is not None:
|
||||
offset = node.start or 0
|
||||
inner_limit = min(limit, offset + node.length)
|
||||
# --- Streamable operators ---
|
||||
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, inner_limit)
|
||||
return slice_solutions(solutions, node.start or 0, node.length)
|
||||
async def _eval_slice(node, tc, collection, limit):
|
||||
offset = node.start or 0
|
||||
length = node.length
|
||||
skipped = 0
|
||||
emitted = 0
|
||||
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
yield sol
|
||||
emitted += 1
|
||||
if length is not None and emitted >= length:
|
||||
return
|
||||
|
||||
|
||||
async def _eval_extend(node, tc, workspace, collection, limit):
|
||||
"""Evaluate an Extend node (BIND)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
async def _eval_union(node, tc, collection, limit):
|
||||
async for sol in evaluate(node.p1, tc, collection, limit):
|
||||
yield sol
|
||||
async for sol in evaluate(node.p2, tc, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _check_exists(graph_node, sol, tc, collection, limit):
|
||||
"""Evaluate an EXISTS graph pattern against a solution."""
|
||||
async for r in evaluate(graph_node, tc, collection, limit):
|
||||
shared = set(sol.keys()) & set(r.keys())
|
||||
if all(
|
||||
_term_key(sol[v]) == _term_key(r[v])
|
||||
for v in shared
|
||||
if sol.get(v) is not None and r.get(v) is not None
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def _pre_eval_exists(expr, sol, tc, collection, limit, cache):
|
||||
"""Walk an expression tree, pre-evaluate EXISTS/NOT EXISTS, cache results."""
|
||||
if not isinstance(expr, CompValue):
|
||||
return
|
||||
if expr.name in ("Builtin_EXISTS", "Builtin_NOTEXISTS"):
|
||||
key = id(expr.graph), id(sol)
|
||||
if key not in cache:
|
||||
cache[key] = await _check_exists(
|
||||
expr.graph, sol, tc, collection, limit
|
||||
)
|
||||
return
|
||||
for attr in ("expr", "other", "arg", "arg1", "arg2", "arg3"):
|
||||
child = getattr(expr, attr, None)
|
||||
if child is None:
|
||||
continue
|
||||
if isinstance(child, CompValue):
|
||||
await _pre_eval_exists(child, sol, tc, collection, limit, cache)
|
||||
elif isinstance(child, (list, tuple)):
|
||||
for item in child:
|
||||
if isinstance(item, CompValue):
|
||||
await _pre_eval_exists(
|
||||
item, sol, tc, collection, limit, cache
|
||||
)
|
||||
|
||||
|
||||
async def _eval_filter(node, tc, collection, limit):
|
||||
expr = node.expr
|
||||
exists_cache = {}
|
||||
|
||||
def exists_cb(graph_node, sol):
|
||||
key = id(graph_node), id(sol)
|
||||
return exists_cache.get(key, False)
|
||||
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||
if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_extend(node, tc, collection, limit):
|
||||
var_name = str(node.var)
|
||||
expr = node.expr
|
||||
exists_cache = {}
|
||||
|
||||
result = []
|
||||
for sol in solutions:
|
||||
val = evaluate_expression(expr, sol)
|
||||
def exists_cb(graph_node, sol):
|
||||
key = id(graph_node), id(sol)
|
||||
return exists_cache.get(key, False)
|
||||
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||
val = evaluate_expression(expr, sol, exists_cb=exists_cb)
|
||||
new_sol = dict(sol)
|
||||
if isinstance(val, Term):
|
||||
new_sol[var_name] = val
|
||||
|
|
@ -241,16 +472,14 @@ async def _eval_extend(node, tc, workspace, collection, limit):
|
|||
)
|
||||
elif val is not None:
|
||||
new_sol[var_name] = Term(type=LITERAL, value=str(val))
|
||||
result.append(new_sol)
|
||||
|
||||
return result
|
||||
yield new_sol
|
||||
|
||||
|
||||
async def _eval_group(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Group node (GROUP BY with aggregation)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
# --- Aggregation (blocking) ---
|
||||
|
||||
async def _eval_group(node, tc, collection, limit):
|
||||
solutions = await materialise(node.p, tc, collection, limit)
|
||||
|
||||
# Extract grouping expressions
|
||||
group_exprs = []
|
||||
if hasattr(node, "expr") and node.expr:
|
||||
for expr in node.expr:
|
||||
|
|
@ -261,7 +490,6 @@ async def _eval_group(node, tc, workspace, collection, limit):
|
|||
else:
|
||||
group_exprs.append((expr, None))
|
||||
|
||||
# Group solutions
|
||||
groups = defaultdict(list)
|
||||
for sol in solutions:
|
||||
key_parts = []
|
||||
|
|
@ -271,81 +499,72 @@ async def _eval_group(node, tc, workspace, collection, limit):
|
|||
groups[tuple(key_parts)].append(sol)
|
||||
|
||||
if not group_exprs:
|
||||
# No GROUP BY - entire result is one group
|
||||
groups[()].extend(solutions)
|
||||
|
||||
# Build grouped solutions (one per group)
|
||||
result = []
|
||||
for key, group_sols in groups.items():
|
||||
sol = {}
|
||||
# Include group key variables
|
||||
if group_sols:
|
||||
for (expr, var_name), k in zip(group_exprs, key):
|
||||
if var_name and group_sols:
|
||||
sol[var_name] = evaluate_expression(expr, group_sols[0])
|
||||
sol["__group__"] = group_sols
|
||||
result.append(sol)
|
||||
|
||||
return result
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_aggregate_join(node, tc, workspace, collection, limit):
|
||||
"""Evaluate an AggregateJoin (aggregation functions after GROUP BY)."""
|
||||
solutions = await evaluate(node.p, tc, workspace, collection, limit)
|
||||
|
||||
result = []
|
||||
for sol in solutions:
|
||||
async def _eval_aggregate_join(node, tc, collection, limit):
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
group = sol.get("__group__", [sol])
|
||||
new_sol = {k: v for k, v in sol.items() if k != "__group__"}
|
||||
|
||||
# Apply aggregate functions
|
||||
if hasattr(node, "A") and node.A:
|
||||
for agg in node.A:
|
||||
var_name = str(agg.res)
|
||||
agg_val = _compute_aggregate(agg, group)
|
||||
new_sol[var_name] = agg_val
|
||||
|
||||
result.append(new_sol)
|
||||
|
||||
return result
|
||||
yield new_sol
|
||||
|
||||
|
||||
async def _eval_graph(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a Graph node (GRAPH clause)."""
|
||||
async def _eval_graph(node, tc, collection, limit):
|
||||
term = node.term
|
||||
|
||||
if isinstance(term, URIRef):
|
||||
# GRAPH <uri> { ... } — fixed graph
|
||||
# We'd need to pass graph to triples queries
|
||||
# For now, evaluate inner pattern normally
|
||||
logger.info(f"GRAPH <{term}> clause - graph filtering not yet wired")
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
elif isinstance(term, Variable):
|
||||
# GRAPH ?g { ... } — variable graph
|
||||
logger.info(f"GRAPH ?{term} clause - variable graph not yet wired")
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
else:
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_values(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a VALUES clause (inline data)."""
|
||||
async def _eval_values(node, tc, collection, limit):
|
||||
# rdflib has two representations for VALUES:
|
||||
# 1. var=[Variable...], value=[[val, ...], ...] — positional
|
||||
# 2. var=None, res=[{Variable: val, ...}, ...] — dict-based
|
||||
if hasattr(node, "res") and node.res:
|
||||
for row in node.res:
|
||||
sol = {}
|
||||
for var, val in row.items():
|
||||
if val is not None and str(val) != "UNDEF":
|
||||
sol[str(var)] = rdflib_term_to_term(val)
|
||||
yield sol
|
||||
return
|
||||
|
||||
if not node.var or not node.value:
|
||||
yield {}
|
||||
return
|
||||
variables = [str(v) for v in node.var]
|
||||
solutions = []
|
||||
|
||||
for row in node.value:
|
||||
sol = {}
|
||||
for var_name, val in zip(variables, row):
|
||||
if val is not None and str(val) != "UNDEF":
|
||||
sol[var_name] = rdflib_term_to_term(val)
|
||||
solutions.append(sol)
|
||||
|
||||
return solutions
|
||||
yield sol
|
||||
|
||||
|
||||
async def _eval_to_multiset(node, tc, workspace, collection, limit):
|
||||
"""Evaluate a ToMultiSet node (subquery)."""
|
||||
return await evaluate(node.p, tc, workspace, collection, limit)
|
||||
async def _eval_to_multiset(node, tc, collection, limit):
|
||||
async for sol in evaluate(node.p, tc, collection, limit):
|
||||
yield sol
|
||||
|
||||
|
||||
# --- Aggregate computation ---
|
||||
|
|
@ -354,7 +573,6 @@ def _compute_aggregate(agg, group):
|
|||
"""Compute a single aggregate function over a group of solutions."""
|
||||
agg_name = agg.name if hasattr(agg, "name") else ""
|
||||
|
||||
# Get the expression to aggregate
|
||||
expr = agg.vars if hasattr(agg, "vars") else None
|
||||
|
||||
if agg_name == "Aggregate_Count":
|
||||
|
|
@ -487,7 +705,7 @@ def _resolve_term(tmpl, solution):
|
|||
return rdflib_term_to_term(tmpl)
|
||||
|
||||
|
||||
async def _query_pattern(tc, s, p, o, workspace, collection, limit):
|
||||
async def _query_pattern(tc, s, p, o, collection, limit):
|
||||
"""
|
||||
Issue a streaming triple pattern query via TriplesClient.
|
||||
|
||||
|
|
@ -496,7 +714,6 @@ async def _query_pattern(tc, s, p, o, workspace, collection, limit):
|
|||
results = await tc.query(
|
||||
s=s, p=p, o=o,
|
||||
limit=limit,
|
||||
workspace=workspace,
|
||||
collection=collection,
|
||||
)
|
||||
return results
|
||||
|
|
@ -527,6 +744,7 @@ _HANDLERS = {
|
|||
"Join": _eval_join,
|
||||
"LeftJoin": _eval_left_join,
|
||||
"Union": _eval_union,
|
||||
"Minus": _eval_minus,
|
||||
"Filter": _eval_filter,
|
||||
"Distinct": _eval_distinct,
|
||||
"Reduced": _eval_reduced,
|
||||
|
|
|
|||
|
|
@ -5,9 +5,15 @@ Evaluates rdflib algebra expression nodes against a solution (variable
|
|||
binding) to produce a value or boolean result.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
import logging
|
||||
import operator
|
||||
import uuid
|
||||
from datetime import datetime, date, timezone
|
||||
from urllib.parse import quote
|
||||
|
||||
from rdflib.term import Variable, URIRef, Literal, BNode
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
|
|
@ -17,23 +23,31 @@ from . parser import rdflib_term_to_term
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_exists_callback = None
|
||||
|
||||
|
||||
class ExpressionError(Exception):
|
||||
"""Raised when a SPARQL expression cannot be evaluated."""
|
||||
pass
|
||||
|
||||
|
||||
def evaluate_expression(expr, solution):
|
||||
def evaluate_expression(expr, solution, exists_cb=None):
|
||||
"""
|
||||
Evaluate a SPARQL expression against a solution binding.
|
||||
|
||||
Args:
|
||||
expr: rdflib algebra expression node
|
||||
solution: dict mapping variable names to Term values
|
||||
exists_cb: optional callback(graph_node, solution) -> bool for
|
||||
EXISTS/NOT EXISTS evaluation; provided by algebra.py
|
||||
|
||||
Returns:
|
||||
The result value (Term, bool, number, string, or None)
|
||||
"""
|
||||
global _exists_callback
|
||||
if exists_cb is not None:
|
||||
_exists_callback = exists_cb
|
||||
|
||||
if expr is None:
|
||||
return True
|
||||
|
||||
|
|
@ -111,6 +125,13 @@ def _evaluate_comp_value(node, solution):
|
|||
if name == "MultiplicativeExpression":
|
||||
return _eval_multiplicative(node, solution)
|
||||
|
||||
# IN / NOT IN — must be checked before the generic Builtin_ dispatch
|
||||
if name == "Builtin_IN":
|
||||
return _eval_in(node, solution)
|
||||
|
||||
if name == "Builtin_NOTIN":
|
||||
return not _eval_in(node, solution)
|
||||
|
||||
# SPARQL built-in functions
|
||||
if name.startswith("Builtin_"):
|
||||
return _eval_builtin(name, node, solution)
|
||||
|
|
@ -119,27 +140,10 @@ def _evaluate_comp_value(node, solution):
|
|||
if name == "Function":
|
||||
return _eval_function(node, solution)
|
||||
|
||||
# Exists / NotExists
|
||||
if name == "Builtin_EXISTS":
|
||||
# EXISTS requires graph pattern evaluation - not handled here
|
||||
logger.warning("EXISTS not supported in filter expressions")
|
||||
return True
|
||||
|
||||
if name == "Builtin_NOTEXISTS":
|
||||
logger.warning("NOT EXISTS not supported in filter expressions")
|
||||
return True
|
||||
|
||||
# TrueFilter (used with OPTIONAL)
|
||||
if name == "TrueFilter":
|
||||
return True
|
||||
|
||||
# IN / NOT IN
|
||||
if name == "Builtin_IN":
|
||||
return _eval_in(node, solution)
|
||||
|
||||
if name == "Builtin_NOTIN":
|
||||
return not _eval_in(node, solution)
|
||||
|
||||
logger.warning(f"Unknown CompValue expression: {name}")
|
||||
return None
|
||||
|
||||
|
|
@ -165,6 +169,22 @@ def _eval_relational(node, solution):
|
|||
">=": operator.ge,
|
||||
}
|
||||
|
||||
if str(op) == "IN":
|
||||
items = node.other if isinstance(node.other, list) else [node.other]
|
||||
for item in items:
|
||||
other_val = evaluate_expression(item, solution)
|
||||
if _comparable_value(left) == _comparable_value(other_val):
|
||||
return True
|
||||
return False
|
||||
|
||||
if str(op) == "NOT IN":
|
||||
items = node.other if isinstance(node.other, list) else [node.other]
|
||||
for item in items:
|
||||
other_val = evaluate_expression(item, solution)
|
||||
if _comparable_value(left) == _comparable_value(other_val):
|
||||
return False
|
||||
return True
|
||||
|
||||
op_fn = ops.get(str(op))
|
||||
if op_fn is None:
|
||||
logger.warning(f"Unknown relational operator: {op}")
|
||||
|
|
@ -335,6 +355,197 @@ def _eval_builtin(name, node, solution):
|
|||
return val
|
||||
return None
|
||||
|
||||
if builtin == "YEAR":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
return dt.year if dt is not None else None
|
||||
|
||||
if builtin == "MONTH":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
return dt.month if dt is not None else None
|
||||
|
||||
if builtin == "DAY":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
return dt.day if dt is not None else None
|
||||
|
||||
if builtin == "HOURS":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
if dt is None:
|
||||
return None
|
||||
return dt.hour if isinstance(dt, datetime) else 0
|
||||
|
||||
if builtin == "MINUTES":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
if dt is None:
|
||||
return None
|
||||
return dt.minute if isinstance(dt, datetime) else 0
|
||||
|
||||
if builtin == "SECONDS":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
if dt is None:
|
||||
return None
|
||||
return dt.second if isinstance(dt, datetime) else 0
|
||||
|
||||
if builtin == "FLOOR":
|
||||
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||
if val is None:
|
||||
return None
|
||||
return int(math.floor(val))
|
||||
|
||||
if builtin == "CEIL":
|
||||
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||
if val is None:
|
||||
return None
|
||||
return int(math.ceil(val))
|
||||
|
||||
if builtin == "ABS":
|
||||
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||
if val is None:
|
||||
return None
|
||||
return abs(val)
|
||||
|
||||
if builtin == "ROUND":
|
||||
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||
if val is None:
|
||||
return None
|
||||
return round(val)
|
||||
|
||||
if builtin == "STRBEFORE":
|
||||
string = _to_string(evaluate_expression(node.arg1, solution))
|
||||
sep = _to_string(evaluate_expression(node.arg2, solution))
|
||||
idx = string.find(sep)
|
||||
if idx < 0:
|
||||
return Term(type=LITERAL, value="")
|
||||
return Term(type=LITERAL, value=string[:idx])
|
||||
|
||||
if builtin == "STRAFTER":
|
||||
string = _to_string(evaluate_expression(node.arg1, solution))
|
||||
sep = _to_string(evaluate_expression(node.arg2, solution))
|
||||
idx = string.find(sep)
|
||||
if idx < 0:
|
||||
return Term(type=LITERAL, value="")
|
||||
return Term(type=LITERAL, value=string[idx + len(sep):])
|
||||
|
||||
if builtin == "ENCODE_FOR_URI":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(type=LITERAL, value=quote(val, safe=""))
|
||||
|
||||
if builtin == "REPLACE":
|
||||
string = _to_string(evaluate_expression(node.arg, solution))
|
||||
pattern = _to_string(evaluate_expression(node.pattern, solution))
|
||||
replacement = _to_string(
|
||||
evaluate_expression(node.replacement, solution)
|
||||
)
|
||||
flags_str = ""
|
||||
if hasattr(node, "flags") and node.flags is not None:
|
||||
flags_str = _to_string(evaluate_expression(node.flags, solution))
|
||||
re_flags = 0
|
||||
if "i" in flags_str:
|
||||
re_flags |= re.IGNORECASE
|
||||
if "m" in flags_str:
|
||||
re_flags |= re.MULTILINE
|
||||
if "s" in flags_str:
|
||||
re_flags |= re.DOTALL
|
||||
try:
|
||||
result = re.sub(pattern, replacement, string, flags=re_flags)
|
||||
return Term(type=LITERAL, value=result)
|
||||
except re.error:
|
||||
return None
|
||||
|
||||
if builtin == "SUBSTR":
|
||||
string = _to_string(evaluate_expression(node.arg, solution))
|
||||
start = _to_numeric(evaluate_expression(node.start, solution))
|
||||
if start is None:
|
||||
return None
|
||||
start_idx = max(int(start) - 1, 0)
|
||||
if hasattr(node, "length") and node.length is not None:
|
||||
length = _to_numeric(evaluate_expression(node.length, solution))
|
||||
if length is None:
|
||||
return None
|
||||
return Term(
|
||||
type=LITERAL, value=string[start_idx:start_idx + int(length)]
|
||||
)
|
||||
return Term(type=LITERAL, value=string[start_idx:])
|
||||
|
||||
if builtin == "EXISTS":
|
||||
if _exists_callback is not None:
|
||||
return _exists_callback(node.graph, solution)
|
||||
logger.warning("EXISTS requires an exists_cb; not available")
|
||||
return True
|
||||
|
||||
if builtin == "NOTEXISTS":
|
||||
if _exists_callback is not None:
|
||||
return not _exists_callback(node.graph, solution)
|
||||
logger.warning("NOT EXISTS requires an exists_cb; not available")
|
||||
return True
|
||||
|
||||
if builtin == "LANGMATCHES":
|
||||
tag = _to_string(evaluate_expression(node.arg1, solution))
|
||||
rng = _to_string(evaluate_expression(node.arg2, solution))
|
||||
if rng == "*":
|
||||
return len(tag) > 0
|
||||
return tag.lower().startswith(rng.lower())
|
||||
|
||||
if builtin == "IRI" or builtin == "URI":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(type=IRI, iri=val)
|
||||
|
||||
if builtin == "BNODE":
|
||||
if hasattr(node, "arg") and node.arg is not None:
|
||||
label = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(type=BLANK, id=label)
|
||||
return Term(type=BLANK, id=str(uuid.uuid4()))
|
||||
|
||||
if builtin == "NOW":
|
||||
now = datetime.now(timezone.utc)
|
||||
return Term(
|
||||
type=LITERAL,
|
||||
value=now.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||
datatype="http://www.w3.org/2001/XMLSchema#dateTime",
|
||||
)
|
||||
|
||||
if builtin == "TZ":
|
||||
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||
if dt is None:
|
||||
return Term(type=LITERAL, value="")
|
||||
if dt.tzinfo is not None:
|
||||
offset = dt.strftime("%z")
|
||||
if offset:
|
||||
return Term(type=LITERAL, value=offset[:3] + ":" + offset[3:])
|
||||
return Term(type=LITERAL, value="")
|
||||
|
||||
if builtin == "RAND":
|
||||
return random.random()
|
||||
|
||||
if builtin == "UUID":
|
||||
return Term(type=IRI, iri="urn:uuid:" + str(uuid.uuid4()))
|
||||
|
||||
if builtin == "STRUUID":
|
||||
return Term(type=LITERAL, value=str(uuid.uuid4()))
|
||||
|
||||
if builtin == "MD5":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(
|
||||
type=LITERAL, value=hashlib.md5(val.encode()).hexdigest()
|
||||
)
|
||||
|
||||
if builtin == "SHA1":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(
|
||||
type=LITERAL, value=hashlib.sha1(val.encode()).hexdigest()
|
||||
)
|
||||
|
||||
if builtin == "SHA256":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(
|
||||
type=LITERAL, value=hashlib.sha256(val.encode()).hexdigest()
|
||||
)
|
||||
|
||||
if builtin == "SHA512":
|
||||
val = _to_string(evaluate_expression(node.arg, solution))
|
||||
return Term(
|
||||
type=LITERAL, value=hashlib.sha512(val.encode()).hexdigest()
|
||||
)
|
||||
|
||||
if builtin == "sameTerm":
|
||||
left = evaluate_expression(node.arg1, solution)
|
||||
right = evaluate_expression(node.arg2, solution)
|
||||
|
|
@ -454,6 +665,27 @@ def _to_numeric(val):
|
|||
return None
|
||||
|
||||
|
||||
def _to_datetime(val):
|
||||
"""Convert a value to a date or datetime object."""
|
||||
if val is None:
|
||||
return None
|
||||
s = _to_string(val)
|
||||
for fmt in (
|
||||
"%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z",
|
||||
"%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M",
|
||||
"%Y-%m-%d",
|
||||
):
|
||||
try:
|
||||
return datetime.strptime(s, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
try:
|
||||
return datetime.fromisoformat(s)
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _comparable_value(val):
|
||||
"""
|
||||
Convert a value to a form suitable for comparison.
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue