From b536d78b574e95d76535ba9860665b10dda1651c Mon Sep 17 00:00:00 2001
From: Cyber MacGeddon <cybermaggedon@gmail.com>
Date: Wed, 20 Nov 2024 19:55:05 +0000
Subject: [PATCH 01/37] Prepare for 0.16: Change Python dep restrictions and
 Gitlab merge criteria

---
 .github/workflows/release.yaml    |  2 +-
 trustgraph-bedrock/setup.py       |  2 +-
 trustgraph-cli/setup.py           |  2 +-
 trustgraph-embeddings-hf/setup.py |  4 ++--
 trustgraph-flow/setup.py          |  2 +-
 trustgraph-parquet/setup.py       |  2 +-
 trustgraph-vertexai/setup.py      |  2 +-
 trustgraph/setup.py               | 14 +++++++-------
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 5ca3b735..0d6d2d29 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -5,7 +5,7 @@ on:
   workflow_dispatch:
   push:
     tags:
-      - v0.15.*
+      - v0.16.*
 
 permissions:
   contents: read
diff --git a/trustgraph-bedrock/setup.py b/trustgraph-bedrock/setup.py
index 80cee09c..1a99e227 100644
--- a/trustgraph-bedrock/setup.py
+++ b/trustgraph-bedrock/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
         "pulsar-client",
         "prometheus-client",
         "boto3",
diff --git a/trustgraph-cli/setup.py b/trustgraph-cli/setup.py
index 651fdc27..ec541c8b 100644
--- a/trustgraph-cli/setup.py
+++ b/trustgraph-cli/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
         "requests",
         "pulsar-client",
         "rdflib",
diff --git a/trustgraph-embeddings-hf/setup.py b/trustgraph-embeddings-hf/setup.py
index ad01667f..2fbe079e 100644
--- a/trustgraph-embeddings-hf/setup.py
+++ b/trustgraph-embeddings-hf/setup.py
@@ -34,8 +34,8 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
-        "trustgraph-flow>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-flow>=0.16,<0.17",
         "torch",
         "urllib3",
         "transformers",
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 8b46b2d2..8aeb7ce2 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
         "urllib3",
         "rdflib",
         "pymilvus",
diff --git a/trustgraph-parquet/setup.py b/trustgraph-parquet/setup.py
index 668cde1c..7dab60ac 100644
--- a/trustgraph-parquet/setup.py
+++ b/trustgraph-parquet/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
         "pulsar-client",
         "prometheus-client",
         "pyarrow",
diff --git a/trustgraph-vertexai/setup.py b/trustgraph-vertexai/setup.py
index 0cdc3a97..d19e8c0d 100644
--- a/trustgraph-vertexai/setup.py
+++ b/trustgraph-vertexai/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
         "pulsar-client",
         "google-cloud-aiplatform",
         "prometheus-client",
diff --git a/trustgraph/setup.py b/trustgraph/setup.py
index 8e50aed5..7bb8dfd3 100644
--- a/trustgraph/setup.py
+++ b/trustgraph/setup.py
@@ -34,13 +34,13 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.15,<0.16",
-        "trustgraph-bedrock>=0.15,<0.16",
-        "trustgraph-cli>=0.15,<0.16",
-        "trustgraph-embeddings-hf>=0.15,<0.16",
-        "trustgraph-flow>=0.15,<0.16",
-        "trustgraph-parquet>=0.15,<0.16",
-        "trustgraph-vertexai>=0.15,<0.16",
+        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-bedrock>=0.16,<0.17",
+        "trustgraph-cli>=0.16,<0.17",
+        "trustgraph-embeddings-hf>=0.16,<0.17",
+        "trustgraph-flow>=0.16,<0.17",
+        "trustgraph-parquet>=0.16,<0.17",
+        "trustgraph-vertexai>=0.16,<0.17",
     ],
     scripts=[
     ]

From 92b84441eb8cd5b4afffe34fe800c0744256cd37 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 20 Nov 2024 19:55:40 +0000
Subject: [PATCH 02/37] Feature/api gateway (#164)

* Bare bones API gateway
* Working for LLM + prompt
* RAG query works
* Triples query
* Added agent API
* Embeddings API
* Put API tests in a subdir
---
 test-api/test-agent-api             |  28 ++
 test-api/test-embeddings-api        |  25 ++
 test-api/test-graph-rag-api         |  31 ++
 test-api/test-llm-api               |  31 ++
 test-api/test-prompt-api            |  38 ++
 test-api/test-prompt2-api           |  39 ++
 test-api/test-triples-query-api     |  35 ++
 trustgraph-flow/scripts/api-gateway | 540 ++++++++++++++++++++++++++++
 trustgraph-flow/setup.py            |   1 +
 9 files changed, 768 insertions(+)
 create mode 100755 test-api/test-agent-api
 create mode 100755 test-api/test-embeddings-api
 create mode 100755 test-api/test-graph-rag-api
 create mode 100755 test-api/test-llm-api
 create mode 100755 test-api/test-prompt-api
 create mode 100755 test-api/test-prompt2-api
 create mode 100755 test-api/test-triples-query-api
 create mode 100755 trustgraph-flow/scripts/api-gateway

diff --git a/test-api/test-agent-api b/test-api/test-agent-api
new file mode 100755
index 00000000..f36ba196
--- /dev/null
+++ b/test-api/test-agent-api
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "question": "What is the highest risk aspect of running a space shuttle program?  Provide 5 detailed reasons to justify our answer.",
+}
+
+resp = requests.post(
+    f"{url}agent",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["answer"])
+
+
diff --git a/test-api/test-embeddings-api b/test-api/test-embeddings-api
new file mode 100755
index 00000000..ef9ea099
--- /dev/null
+++ b/test-api/test-embeddings-api
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "text": "What is the highest risk aspect of running a space shuttle program?  Provide 5 detailed reasons to justify our answer.",
+}
+
+resp = requests.post(
+    f"{url}embeddings",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
diff --git a/test-api/test-graph-rag-api b/test-api/test-graph-rag-api
new file mode 100755
index 00000000..c329934c
--- /dev/null
+++ b/test-api/test-graph-rag-api
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "query": "Give me 10 facts",
+}
+
+resp = requests.post(
+    f"{url}graph-rag",
+    json=input,
+)
+
+resp = resp.json()
+
+print(resp)
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["response"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-llm-api b/test-api/test-llm-api
new file mode 100755
index 00000000..c33c6634
--- /dev/null
+++ b/test-api/test-llm-api
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "system": "Respond in French.  Use long word, form of numbers, no digits",
+#     "prompt": "Add 2 and 12"
+    "prompt": "Add 12 and 14, and then make a poem about llamas which incorporates that number.  Then write a joke about llamas"
+}
+
+resp = requests.post(
+    f"{url}text-completion",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["response"])
+
+############################################################################
+
diff --git a/test-api/test-prompt-api b/test-api/test-prompt-api
new file mode 100755
index 00000000..1005bc90
--- /dev/null
+++ b/test-api/test-prompt-api
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "id": "question",
+    "variables": {
+        "question": "Write a joke about llamas."
+    }
+}
+
+resp = requests.post(
+    f"{url}prompt",
+    json=input,
+)
+
+resp = resp.json()
+
+print(resp)
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+if "object" in resp:
+    print(f"Object: {resp['object']}")
+    sys.exit(1)
+
+print(resp["text"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-prompt2-api b/test-api/test-prompt2-api
new file mode 100755
index 00000000..f1b80c48
--- /dev/null
+++ b/test-api/test-prompt2-api
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "id": "extract-definitions",
+    "variables": {
+        "text": "A cat is a large mammal."
+    }
+}
+
+resp = requests.post(
+    f"{url}prompt",
+    json=input,
+)
+
+resp = resp.json()
+
+print(resp)
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+if "object" in resp:
+    object = json.loads(resp["object"])
+    print(json.dumps(object, indent=4))
+    sys.exit(1)
+
+print(resp["text"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-triples-query-api b/test-api/test-triples-query-api
new file mode 100755
index 00000000..e2895a28
--- /dev/null
+++ b/test-api/test-triples-query-api
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "p": "http://www.w3.org/2000/01/rdf-schema#label",
+    "limit": 10
+}
+
+resp = requests.post(
+    f"{url}triples-query",
+    json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+
+print(resp)
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["response"])
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/trustgraph-flow/scripts/api-gateway b/trustgraph-flow/scripts/api-gateway
new file mode 100755
index 00000000..748b5c7d
--- /dev/null
+++ b/trustgraph-flow/scripts/api-gateway
@@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+
+import asyncio
+from aiohttp import web
+import json
+import logging
+import uuid
+
+import pulsar
+from pulsar.asyncio import Client
+from pulsar.schema import JsonSchema
+import _pulsar
+import aiopulsar
+
+from trustgraph.clients.llm_client import LlmClient
+from trustgraph.clients.prompt_client import PromptClient
+
+from trustgraph.schema import TextCompletionRequest, TextCompletionResponse
+from trustgraph.schema import text_completion_request_queue
+from trustgraph.schema import text_completion_response_queue
+
+from trustgraph.schema import PromptRequest, PromptResponse
+from trustgraph.schema import prompt_request_queue
+from trustgraph.schema import prompt_response_queue
+
+from trustgraph.schema import GraphRagQuery, GraphRagResponse
+from trustgraph.schema import graph_rag_request_queue
+from trustgraph.schema import graph_rag_response_queue
+
+from trustgraph.schema import TriplesQueryRequest, TriplesQueryResponse, Value
+from trustgraph.schema import triples_request_queue
+from trustgraph.schema import triples_response_queue
+
+from trustgraph.schema import AgentRequest, AgentResponse
+from trustgraph.schema import agent_request_queue
+from trustgraph.schema import agent_response_queue
+
+from trustgraph.schema import EmbeddingsRequest, EmbeddingsResponse
+from trustgraph.schema import embeddings_request_queue
+from trustgraph.schema import embeddings_response_queue
+
+logger = logging.getLogger("api")
+logger.setLevel(logging.INFO)
+
+pulsar_host = "pulsar://localhost:6650"
+TIME_OUT = 600
+
+class Publisher:
+
+    def __init__(self, pulsar_host, topic, schema=None, max_size=10):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.schema = schema
+        self.q = asyncio.Queue(maxsize=max_size)
+
+    async def run(self):
+        async with aiopulsar.connect(self.pulsar_host) as client:
+            async with client.create_producer(
+                    topic=self.topic,
+                    schema=self.schema,
+            ) as producer:
+                while True:
+                    id, item = await self.q.get()
+                    await producer.send(item, { "id": id })
+#                     print("message out")
+
+    async def send(self, id, msg):
+        await self.q.put((id, msg))
+
+class Subscriber:
+
+    def __init__(self, pulsar_host, topic, subscription, consumer_name,
+                 schema=None, max_size=10):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.subscription = subscription
+        self.consumer_name = consumer_name
+        self.schema = schema
+        self.q = {}
+
+    async def run(self):
+        async with aiopulsar.connect(pulsar_host) as client:
+            async with client.subscribe(
+                topic=self.topic,
+                subscription_name=self.subscription,
+                consumer_name=self.consumer_name,
+                schema=self.schema,
+            ) as consumer:
+                while True:
+                    msg = await consumer.receive()
+#                     print("message in", self.topic)
+                    id = msg.properties()["id"]
+                    value = msg.value()
+                    if id in self.q:
+                        await self.q[id].put(value)
+         
+    async def subscribe(self, id):
+        q = asyncio.Queue()
+        self.q[id] = q
+        return q
+
+    async def unsubscribe(self, id):
+        if id in self.q:
+            del self.q[id]
+    
+class Api:
+
+    def __init__(self, **config):
+
+        self.port = int(config.get("port", "8088"))
+        self.app = web.Application(middlewares=[])
+
+        self.llm_out = Publisher(
+            pulsar_host, text_completion_request_queue,
+            schema=JsonSchema(TextCompletionRequest)
+        )
+
+        self.llm_in = Subscriber(
+            pulsar_host, text_completion_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(TextCompletionResponse)
+        )
+
+        self.prompt_out = Publisher(
+            pulsar_host, prompt_request_queue,
+            schema=JsonSchema(PromptRequest)
+        )
+
+        self.prompt_in = Subscriber(
+            pulsar_host, prompt_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(PromptResponse)
+        )
+
+        self.graph_rag_out = Publisher(
+            pulsar_host, graph_rag_request_queue,
+            schema=JsonSchema(GraphRagQuery)
+        )
+
+        self.graph_rag_in = Subscriber(
+            pulsar_host, graph_rag_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(GraphRagResponse)
+        )
+
+        self.triples_query_out = Publisher(
+            pulsar_host, triples_request_queue,
+            schema=JsonSchema(TriplesQueryRequest)
+        )
+
+        self.triples_query_in = Subscriber(
+            pulsar_host, triples_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(TriplesQueryResponse)
+        )
+
+        self.agent_out = Publisher(
+            pulsar_host, agent_request_queue,
+            schema=JsonSchema(AgentRequest)
+        )
+
+        self.agent_in = Subscriber(
+            pulsar_host, agent_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(AgentResponse)
+        )
+
+        self.embeddings_out = Publisher(
+            pulsar_host, embeddings_request_queue,
+            schema=JsonSchema(EmbeddingsRequest)
+        )
+
+        self.embeddings_in = Subscriber(
+            pulsar_host, embeddings_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(EmbeddingsResponse)
+        )
+
+        self.app.add_routes([
+            web.post("/api/v1/text-completion", self.llm),
+            web.post("/api/v1/prompt", self.prompt),
+            web.post("/api/v1/graph-rag", self.graph_rag),
+            web.post("/api/v1/triples-query", self.triples_query),
+            web.post("/api/v1/agent", self.agent),
+            web.post("/api/v1/embeddings", self.embeddings),
+        ])
+
+    async def llm(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.llm_in.subscribe(id)
+
+            await self.llm_out.send(
+                id,
+                TextCompletionRequest(
+                    system=data["system"],
+                    prompt=data["prompt"]
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), TIME_OUT)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "response": resp.response }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.llm_in.unsubscribe(id)
+
+    async def prompt(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.prompt_in.subscribe(id)
+
+            terms = {
+                k: json.dumps(v)
+                for k, v in data["variables"].items()
+            }
+
+            await self.prompt_out.send(
+                id,
+                PromptRequest(
+                    id=data["id"],
+                    terms=terms
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), TIME_OUT)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            if resp.object:
+                return web.json_response(
+                    { "object": resp.object }
+                )
+
+            return web.json_response(
+                { "text": resp.text }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.prompt_in.unsubscribe(id)
+
+    async def graph_rag(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.graph_rag_in.subscribe(id)
+
+            await self.graph_rag_out.send(
+                id,
+                GraphRagQuery(
+                    query=data["query"],
+                    user=data.get("user", "trustgraph"),
+                    collection=data.get("collection", "default"),
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), TIME_OUT)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "response": resp.response }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.graph_rag_in.unsubscribe(id)
+
+    async def triples_query(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.triples_query_in.subscribe(id)
+
+            if "s" in data:
+                if data["s"].startswith("http:") or data["s"].startswith("https:"):
+                    s = Value(value=data["s"], is_uri=True)
+                else:
+                    s = Value(value=data["s"], is_uri=True)
+            else:
+                s = None
+
+            if "p" in data:
+                if data["p"].startswith("http:") or data["p"].startswith("https:"):
+                    p = Value(value=data["p"], is_uri=True)
+                else:
+                    p = Value(value=data["p"], is_uri=True)
+            else:
+                p = None
+
+            if "o" in data:
+                if data["o"].startswith("http:") or data["o"].startswith("https:"):
+                    o = Value(value=data["o"], is_uri=True)
+                else:
+                    o = Value(value=data["o"], is_uri=True)
+            else:
+                o = None
+
+            limit = int(data.get("limit", 10000))
+
+            await self.triples_query_out.send(
+                id,
+                TriplesQueryRequest(
+                    s = s, p = p, o = o,
+                    limit = limit,
+                    user = data.get("user", "trustgraph"),
+                    collection = data.get("collection", "default"),
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), TIME_OUT)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                {
+                    "response": [
+                        {
+                            "s": {
+                                "v": t.s.value,
+                                "e": t.s.is_uri,
+                            },
+                            "p": {
+                                "v": t.p.value,
+                                "e": t.p.is_uri,
+                            },
+                            "o": {
+                                "v": t.o.value,
+                                "e": t.o.is_uri,
+                            }
+                        }
+                        for t in resp.triples
+                    ]
+                }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.graph_rag_in.unsubscribe(id)
+
+    async def agent(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.agent_in.subscribe(id)
+
+            await self.agent_out.send(
+                id,
+                AgentRequest(
+                    question=data["question"],
+                )
+            )
+
+            while True:
+                try:
+                    resp = await asyncio.wait_for(q.get(), TIME_OUT)
+                except:
+                    raise RuntimeError("Timeout waiting for response")
+
+                if resp.error:
+                    return web.json_response(
+                        { "error": resp.error.message }
+                    )
+
+                if resp.answer: break
+
+                if resp.thought: print("thought:", resp.thought)
+                if resp.observation: print("observation:", resp.observation)
+
+            if resp.answer:
+                return web.json_response(
+                    { "answer": resp.answer }
+                )
+
+            # Can't happen, ook at the logic
+            raise RuntimeError("Strange state")
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.agent_in.unsubscribe(id)
+
+    async def embeddings(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.embeddings_in.subscribe(id)
+
+            await self.embeddings_out.send(
+                id,
+                EmbeddingsRequest(
+                    text=data["text"],
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), TIME_OUT)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "vectors": resp.vectors }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.embeddings_in.unsubscribe(id)
+
+    async def app_factory(self):
+
+        self.llm_pub_task = asyncio.create_task(self.llm_in.run())
+        self.llm_sub_task = asyncio.create_task(self.llm_out.run())
+
+        self.prompt_pub_task = asyncio.create_task(self.prompt_in.run())
+        self.prompt_sub_task = asyncio.create_task(self.prompt_out.run())
+
+        self.graph_rag_pub_task = asyncio.create_task(self.graph_rag_in.run())
+        self.graph_rag_sub_task = asyncio.create_task(self.graph_rag_out.run())
+
+        self.triples_query_pub_task = asyncio.create_task(
+            self.triples_query_in.run()
+        )
+        self.triples_query_sub_task = asyncio.create_task(
+            self.triples_query_out.run()
+        )
+
+        self.agent_pub_task = asyncio.create_task(self.agent_in.run())
+        self.agent_sub_task = asyncio.create_task(self.agent_out.run())
+
+        self.embeddings_pub_task = asyncio.create_task(
+            self.embeddings_in.run()
+        )
+        self.embeddings_sub_task = asyncio.create_task(
+            self.embeddings_out.run()
+        )
+
+        return self.app
+
+    def run(self):
+        web.run_app(self.app_factory(), port=self.port)
+
+a = Api()
+a.run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 8aeb7ce2..4f7b3383 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -58,6 +58,7 @@ setuptools.setup(
         "google-generativeai",
         "ibis",
         "jsonschema",
+        "aiohttp",
     ],
     scripts=[
         "scripts/agent-manager-react",

From ba6d6c13afce518e9fbeba32ccbecdb89412fffd Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 20 Nov 2024 20:56:23 +0000
Subject: [PATCH 03/37] Fix API gateway integration, added to templates (#165)

---
 templates/components/trustgraph.jsonnet | 42 +++++++++++++++++
 trustgraph-flow/scripts/api-gateway     | 61 +++++++++++++++----------
 trustgraph-flow/setup.py                |  2 +
 3 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/templates/components/trustgraph.jsonnet b/templates/components/trustgraph.jsonnet
index e178cc27..37c05dae 100644
--- a/templates/components/trustgraph.jsonnet
+++ b/templates/components/trustgraph.jsonnet
@@ -5,9 +5,51 @@ local prompt = import "prompt-template.jsonnet";
 
 {
 
+    "api-gateway-port":: 8088,
+    "api-gateway-timeout":: 600,
+
     "chunk-size":: 250,
     "chunk-overlap":: 15,
 
+    "api-gateway" +: {
+    
+        create:: function(engine)
+
+            local port = $["api-gateway-port"];
+
+            local container =
+                engine.container("api-gateway")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "api-gateway",
+                        "-p",
+                        url.pulsar,
+                        "--timeout",
+                        std.toString($["api-gateway-timeout"]),
+                        "--port",
+                        std.toString(port),
+                    ])
+                    .with_limits("0.5", "256M")
+                    .with_reservations("0.1", "256M")
+                    .with_port(8000, 8000, "metrics")
+                    .with_port(port, port, "api");
+
+            local containerSet = engine.containers(
+                "api-gateway", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8000, 8000, "metrics")
+                .with_port(port, port, "api");
+
+            engine.resources([
+                containerSet,
+                service,
+            ])
+
+    },
+
     "chunker" +: {
     
         create:: function(engine)
diff --git a/trustgraph-flow/scripts/api-gateway b/trustgraph-flow/scripts/api-gateway
index 748b5c7d..dd7d54ac 100755
--- a/trustgraph-flow/scripts/api-gateway
+++ b/trustgraph-flow/scripts/api-gateway
@@ -1,10 +1,17 @@
 #!/usr/bin/env python3
 
+# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
+# are active listeners
+
+# FIXME: Connection errors in publishers / subscribers cause those threads
+# to fail and are not failed or retried
+
 import asyncio
 from aiohttp import web
 import json
 import logging
 import uuid
+import os
 
 import pulsar
 from pulsar.asyncio import Client
@@ -42,7 +49,7 @@ from trustgraph.schema import embeddings_response_queue
 logger = logging.getLogger("api")
 logger.setLevel(logging.INFO)
 
-pulsar_host = "pulsar://localhost:6650"
+pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
 TIME_OUT = 600
 
 class Publisher:
@@ -54,15 +61,18 @@ class Publisher:
         self.q = asyncio.Queue(maxsize=max_size)
 
     async def run(self):
-        async with aiopulsar.connect(self.pulsar_host) as client:
-            async with client.create_producer(
-                    topic=self.topic,
-                    schema=self.schema,
-            ) as producer:
-                while True:
-                    id, item = await self.q.get()
-                    await producer.send(item, { "id": id })
-#                     print("message out")
+        try:
+            async with aiopulsar.connect(self.pulsar_host) as client:
+                async with client.create_producer(
+                        topic=self.topic,
+                        schema=self.schema,
+                ) as producer:
+                    while True:
+                        id, item = await self.q.get()
+                        await producer.send(item, { "id": id })
+    #                     print("message out")
+        except Exception as e:
+            print("Exception:", e, flush=True)
 
     async def send(self, id, msg):
         await self.q.put((id, msg))
@@ -79,20 +89,23 @@ class Subscriber:
         self.q = {}
 
     async def run(self):
-        async with aiopulsar.connect(pulsar_host) as client:
-            async with client.subscribe(
-                topic=self.topic,
-                subscription_name=self.subscription,
-                consumer_name=self.consumer_name,
-                schema=self.schema,
-            ) as consumer:
-                while True:
-                    msg = await consumer.receive()
-#                     print("message in", self.topic)
-                    id = msg.properties()["id"]
-                    value = msg.value()
-                    if id in self.q:
-                        await self.q[id].put(value)
+        try:
+            async with aiopulsar.connect(pulsar_host) as client:
+                async with client.subscribe(
+                    topic=self.topic,
+                    subscription_name=self.subscription,
+                    consumer_name=self.consumer_name,
+                    schema=self.schema,
+                ) as consumer:
+                    while True:
+                        msg = await consumer.receive()
+    #                     print("message in", self.topic)
+                        id = msg.properties()["id"]
+                        value = msg.value()
+                        if id in self.q:
+                            await self.q[id].put(value)
+        except Exception as e:
+            print("Exception:", e, flush=True)
          
     async def subscribe(self, id):
         q = asyncio.Queue()
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 4f7b3383..44901119 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -59,8 +59,10 @@ setuptools.setup(
         "ibis",
         "jsonschema",
         "aiohttp",
+        "aiopulsar-py",
     ],
     scripts=[
+        "scripts/api-gateway",
         "scripts/agent-manager-react",
         "scripts/chunker-recursive",
         "scripts/chunker-token",

From a1e0edd96f70bbd01cea6a33e0ea671565413b16 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 20 Nov 2024 23:17:55 +0000
Subject: [PATCH 04/37] API gateway in a proper module, restarting publishers &
 subscribers as appropriate (#166)

---
 trustgraph-flow/scripts/api-gateway           | 551 +--------------
 trustgraph-flow/trustgraph/api/__init__.py    |   0
 .../trustgraph/api/gateway/__init__.py        |   3 +
 .../trustgraph/api/gateway/__main__.py        |   7 +
 .../trustgraph/api/gateway/service.py         | 631 ++++++++++++++++++
 5 files changed, 643 insertions(+), 549 deletions(-)
 create mode 100644 trustgraph-flow/trustgraph/api/__init__.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/api/gateway/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/api/gateway/service.py

diff --git a/trustgraph-flow/scripts/api-gateway b/trustgraph-flow/scripts/api-gateway
index dd7d54ac..d28a5b8a 100755
--- a/trustgraph-flow/scripts/api-gateway
+++ b/trustgraph-flow/scripts/api-gateway
@@ -1,553 +1,6 @@
 #!/usr/bin/env python3
 
-# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
-# are active listeners
+from trustgraph.api.gateway import run
 
-# FIXME: Connection errors in publishers / subscribers cause those threads
-# to fail and are not failed or retried
-
-import asyncio
-from aiohttp import web
-import json
-import logging
-import uuid
-import os
-
-import pulsar
-from pulsar.asyncio import Client
-from pulsar.schema import JsonSchema
-import _pulsar
-import aiopulsar
-
-from trustgraph.clients.llm_client import LlmClient
-from trustgraph.clients.prompt_client import PromptClient
-
-from trustgraph.schema import TextCompletionRequest, TextCompletionResponse
-from trustgraph.schema import text_completion_request_queue
-from trustgraph.schema import text_completion_response_queue
-
-from trustgraph.schema import PromptRequest, PromptResponse
-from trustgraph.schema import prompt_request_queue
-from trustgraph.schema import prompt_response_queue
-
-from trustgraph.schema import GraphRagQuery, GraphRagResponse
-from trustgraph.schema import graph_rag_request_queue
-from trustgraph.schema import graph_rag_response_queue
-
-from trustgraph.schema import TriplesQueryRequest, TriplesQueryResponse, Value
-from trustgraph.schema import triples_request_queue
-from trustgraph.schema import triples_response_queue
-
-from trustgraph.schema import AgentRequest, AgentResponse
-from trustgraph.schema import agent_request_queue
-from trustgraph.schema import agent_response_queue
-
-from trustgraph.schema import EmbeddingsRequest, EmbeddingsResponse
-from trustgraph.schema import embeddings_request_queue
-from trustgraph.schema import embeddings_response_queue
-
-logger = logging.getLogger("api")
-logger.setLevel(logging.INFO)
-
-pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
-TIME_OUT = 600
-
-class Publisher:
-
-    def __init__(self, pulsar_host, topic, schema=None, max_size=10):
-        self.pulsar_host = pulsar_host
-        self.topic = topic
-        self.schema = schema
-        self.q = asyncio.Queue(maxsize=max_size)
-
-    async def run(self):
-        try:
-            async with aiopulsar.connect(self.pulsar_host) as client:
-                async with client.create_producer(
-                        topic=self.topic,
-                        schema=self.schema,
-                ) as producer:
-                    while True:
-                        id, item = await self.q.get()
-                        await producer.send(item, { "id": id })
-    #                     print("message out")
-        except Exception as e:
-            print("Exception:", e, flush=True)
-
-    async def send(self, id, msg):
-        await self.q.put((id, msg))
-
-class Subscriber:
-
-    def __init__(self, pulsar_host, topic, subscription, consumer_name,
-                 schema=None, max_size=10):
-        self.pulsar_host = pulsar_host
-        self.topic = topic
-        self.subscription = subscription
-        self.consumer_name = consumer_name
-        self.schema = schema
-        self.q = {}
-
-    async def run(self):
-        try:
-            async with aiopulsar.connect(pulsar_host) as client:
-                async with client.subscribe(
-                    topic=self.topic,
-                    subscription_name=self.subscription,
-                    consumer_name=self.consumer_name,
-                    schema=self.schema,
-                ) as consumer:
-                    while True:
-                        msg = await consumer.receive()
-    #                     print("message in", self.topic)
-                        id = msg.properties()["id"]
-                        value = msg.value()
-                        if id in self.q:
-                            await self.q[id].put(value)
-        except Exception as e:
-            print("Exception:", e, flush=True)
-         
-    async def subscribe(self, id):
-        q = asyncio.Queue()
-        self.q[id] = q
-        return q
-
-    async def unsubscribe(self, id):
-        if id in self.q:
-            del self.q[id]
-    
-class Api:
-
-    def __init__(self, **config):
-
-        self.port = int(config.get("port", "8088"))
-        self.app = web.Application(middlewares=[])
-
-        self.llm_out = Publisher(
-            pulsar_host, text_completion_request_queue,
-            schema=JsonSchema(TextCompletionRequest)
-        )
-
-        self.llm_in = Subscriber(
-            pulsar_host, text_completion_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(TextCompletionResponse)
-        )
-
-        self.prompt_out = Publisher(
-            pulsar_host, prompt_request_queue,
-            schema=JsonSchema(PromptRequest)
-        )
-
-        self.prompt_in = Subscriber(
-            pulsar_host, prompt_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(PromptResponse)
-        )
-
-        self.graph_rag_out = Publisher(
-            pulsar_host, graph_rag_request_queue,
-            schema=JsonSchema(GraphRagQuery)
-        )
-
-        self.graph_rag_in = Subscriber(
-            pulsar_host, graph_rag_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(GraphRagResponse)
-        )
-
-        self.triples_query_out = Publisher(
-            pulsar_host, triples_request_queue,
-            schema=JsonSchema(TriplesQueryRequest)
-        )
-
-        self.triples_query_in = Subscriber(
-            pulsar_host, triples_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(TriplesQueryResponse)
-        )
-
-        self.agent_out = Publisher(
-            pulsar_host, agent_request_queue,
-            schema=JsonSchema(AgentRequest)
-        )
-
-        self.agent_in = Subscriber(
-            pulsar_host, agent_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(AgentResponse)
-        )
-
-        self.embeddings_out = Publisher(
-            pulsar_host, embeddings_request_queue,
-            schema=JsonSchema(EmbeddingsRequest)
-        )
-
-        self.embeddings_in = Subscriber(
-            pulsar_host, embeddings_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(EmbeddingsResponse)
-        )
-
-        self.app.add_routes([
-            web.post("/api/v1/text-completion", self.llm),
-            web.post("/api/v1/prompt", self.prompt),
-            web.post("/api/v1/graph-rag", self.graph_rag),
-            web.post("/api/v1/triples-query", self.triples_query),
-            web.post("/api/v1/agent", self.agent),
-            web.post("/api/v1/embeddings", self.embeddings),
-        ])
-
-    async def llm(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.llm_in.subscribe(id)
-
-            await self.llm_out.send(
-                id,
-                TextCompletionRequest(
-                    system=data["system"],
-                    prompt=data["prompt"]
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), TIME_OUT)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "response": resp.response }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.llm_in.unsubscribe(id)
-
-    async def prompt(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.prompt_in.subscribe(id)
-
-            terms = {
-                k: json.dumps(v)
-                for k, v in data["variables"].items()
-            }
-
-            await self.prompt_out.send(
-                id,
-                PromptRequest(
-                    id=data["id"],
-                    terms=terms
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), TIME_OUT)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            if resp.object:
-                return web.json_response(
-                    { "object": resp.object }
-                )
-
-            return web.json_response(
-                { "text": resp.text }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.prompt_in.unsubscribe(id)
-
-    async def graph_rag(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.graph_rag_in.subscribe(id)
-
-            await self.graph_rag_out.send(
-                id,
-                GraphRagQuery(
-                    query=data["query"],
-                    user=data.get("user", "trustgraph"),
-                    collection=data.get("collection", "default"),
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), TIME_OUT)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "response": resp.response }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.graph_rag_in.unsubscribe(id)
-
-    async def triples_query(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.triples_query_in.subscribe(id)
-
-            if "s" in data:
-                if data["s"].startswith("http:") or data["s"].startswith("https:"):
-                    s = Value(value=data["s"], is_uri=True)
-                else:
-                    s = Value(value=data["s"], is_uri=True)
-            else:
-                s = None
-
-            if "p" in data:
-                if data["p"].startswith("http:") or data["p"].startswith("https:"):
-                    p = Value(value=data["p"], is_uri=True)
-                else:
-                    p = Value(value=data["p"], is_uri=True)
-            else:
-                p = None
-
-            if "o" in data:
-                if data["o"].startswith("http:") or data["o"].startswith("https:"):
-                    o = Value(value=data["o"], is_uri=True)
-                else:
-                    o = Value(value=data["o"], is_uri=True)
-            else:
-                o = None
-
-            limit = int(data.get("limit", 10000))
-
-            await self.triples_query_out.send(
-                id,
-                TriplesQueryRequest(
-                    s = s, p = p, o = o,
-                    limit = limit,
-                    user = data.get("user", "trustgraph"),
-                    collection = data.get("collection", "default"),
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), TIME_OUT)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                {
-                    "response": [
-                        {
-                            "s": {
-                                "v": t.s.value,
-                                "e": t.s.is_uri,
-                            },
-                            "p": {
-                                "v": t.p.value,
-                                "e": t.p.is_uri,
-                            },
-                            "o": {
-                                "v": t.o.value,
-                                "e": t.o.is_uri,
-                            }
-                        }
-                        for t in resp.triples
-                    ]
-                }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.graph_rag_in.unsubscribe(id)
-
-    async def agent(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.agent_in.subscribe(id)
-
-            await self.agent_out.send(
-                id,
-                AgentRequest(
-                    question=data["question"],
-                )
-            )
-
-            while True:
-                try:
-                    resp = await asyncio.wait_for(q.get(), TIME_OUT)
-                except:
-                    raise RuntimeError("Timeout waiting for response")
-
-                if resp.error:
-                    return web.json_response(
-                        { "error": resp.error.message }
-                    )
-
-                if resp.answer: break
-
-                if resp.thought: print("thought:", resp.thought)
-                if resp.observation: print("observation:", resp.observation)
-
-            if resp.answer:
-                return web.json_response(
-                    { "answer": resp.answer }
-                )
-
-            # Can't happen, ook at the logic
-            raise RuntimeError("Strange state")
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.agent_in.unsubscribe(id)
-
-    async def embeddings(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.embeddings_in.subscribe(id)
-
-            await self.embeddings_out.send(
-                id,
-                EmbeddingsRequest(
-                    text=data["text"],
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), TIME_OUT)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "vectors": resp.vectors }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.embeddings_in.unsubscribe(id)
-
-    async def app_factory(self):
-
-        self.llm_pub_task = asyncio.create_task(self.llm_in.run())
-        self.llm_sub_task = asyncio.create_task(self.llm_out.run())
-
-        self.prompt_pub_task = asyncio.create_task(self.prompt_in.run())
-        self.prompt_sub_task = asyncio.create_task(self.prompt_out.run())
-
-        self.graph_rag_pub_task = asyncio.create_task(self.graph_rag_in.run())
-        self.graph_rag_sub_task = asyncio.create_task(self.graph_rag_out.run())
-
-        self.triples_query_pub_task = asyncio.create_task(
-            self.triples_query_in.run()
-        )
-        self.triples_query_sub_task = asyncio.create_task(
-            self.triples_query_out.run()
-        )
-
-        self.agent_pub_task = asyncio.create_task(self.agent_in.run())
-        self.agent_sub_task = asyncio.create_task(self.agent_out.run())
-
-        self.embeddings_pub_task = asyncio.create_task(
-            self.embeddings_in.run()
-        )
-        self.embeddings_sub_task = asyncio.create_task(
-            self.embeddings_out.run()
-        )
-
-        return self.app
-
-    def run(self):
-        web.run_app(self.app_factory(), port=self.port)
-
-a = Api()
-a.run()
+run()
 
diff --git a/trustgraph-flow/trustgraph/api/__init__.py b/trustgraph-flow/trustgraph/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/trustgraph-flow/trustgraph/api/gateway/__init__.py b/trustgraph-flow/trustgraph/api/gateway/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/__main__.py b/trustgraph-flow/trustgraph/api/gateway/__main__.py
new file mode 100755
index 00000000..e9136855
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . service import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
new file mode 100755
index 00000000..b955af1e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -0,0 +1,631 @@
+
+"""
+API gateway.  Offers HTTP services which are translated to interaction on the
+Pulsar bus.
+"""
+
+module = ".".join(__name__.split(".")[1:-1])
+
+# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
+# are active listeners
+
+# FIXME: Connection errors in publishers / subscribers cause those threads
+# to fail and are not failed or retried
+
+import asyncio
+import argparse
+from aiohttp import web
+import json
+import logging
+import uuid
+import os
+
+import pulsar
+from pulsar.asyncio import Client
+from pulsar.schema import JsonSchema
+import _pulsar
+import aiopulsar
+from prometheus_client import start_http_server
+
+from ... log_level import LogLevel
+
+from trustgraph.clients.llm_client import LlmClient
+from trustgraph.clients.prompt_client import PromptClient
+
+from ... schema import TextCompletionRequest, TextCompletionResponse
+from ... schema import text_completion_request_queue
+from ... schema import text_completion_response_queue
+
+from ... schema import PromptRequest, PromptResponse
+from ... schema import prompt_request_queue
+from ... schema import prompt_response_queue
+
+from ... schema import GraphRagQuery, GraphRagResponse
+from ... schema import graph_rag_request_queue
+from ... schema import graph_rag_response_queue
+
+from ... schema import TriplesQueryRequest, TriplesQueryResponse, Value
+from ... schema import triples_request_queue
+from ... schema import triples_response_queue
+
+from ... schema import AgentRequest, AgentResponse
+from ... schema import agent_request_queue
+from ... schema import agent_response_queue
+
+from ... schema import EmbeddingsRequest, EmbeddingsResponse
+from ... schema import embeddings_request_queue
+from ... schema import embeddings_response_queue
+
+logger = logging.getLogger("api")
+logger.setLevel(logging.INFO)
+
+default_pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
+default_timeout = 600
+default_port = 8088
+
+class Publisher:
+
+    def __init__(self, pulsar_host, topic, schema=None, max_size=10):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.schema = schema
+        self.q = asyncio.Queue(maxsize=max_size)
+
+    async def run(self):
+
+        while True:
+
+            try:
+                async with aiopulsar.connect(self.pulsar_host) as client:
+                    async with client.create_producer(
+                            topic=self.topic,
+                            schema=self.schema,
+                    ) as producer:
+                        while True:
+                            id, item = await self.q.get()
+                            await producer.send(item, { "id": id })
+            except Exception as e:
+                print("Exception:", e, flush=True)
+
+            # If handler drops out, sleep a retry
+            await asyncio.sleep(2)
+
+    async def send(self, id, msg):
+        await self.q.put((id, msg))
+
+class Subscriber:
+
+    def __init__(self, pulsar_host, topic, subscription, consumer_name,
+                 schema=None, max_size=10):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.subscription = subscription
+        self.consumer_name = consumer_name
+        self.schema = schema
+        self.q = {}
+
+    async def run(self):
+        while True:
+            try:
+                async with aiopulsar.connect(self.pulsar_host) as client:
+                    async with client.subscribe(
+                        topic=self.topic,
+                        subscription_name=self.subscription,
+                        consumer_name=self.consumer_name,
+                        schema=self.schema,
+                    ) as consumer:
+                        while True:
+                            msg = await consumer.receive()
+                            id = msg.properties()["id"]
+                            value = msg.value()
+                            if id in self.q:
+                                await self.q[id].put(value)
+            except Exception as e:
+                print("Exception:", e, flush=True)
+         
+            # If handler drops out, sleep a retry
+            await asyncio.sleep(2)
+
+    async def subscribe(self, id):
+        q = asyncio.Queue()
+        self.q[id] = q
+        return q
+
+    async def unsubscribe(self, id):
+        if id in self.q:
+            del self.q[id]
+    
+class Api:
+
+    def __init__(self, **config):
+
+        self.app = web.Application(middlewares=[])
+
+        self.port = int(config.get("port", default_port))
+        self.timeout = int(config.get("timeout", default_timeout))
+        self.pulsar_host = config.get("pulsar_host", default_pulsar_host)
+
+        self.llm_out = Publisher(
+            self.pulsar_host, text_completion_request_queue,
+            schema=JsonSchema(TextCompletionRequest)
+        )
+
+        self.llm_in = Subscriber(
+            self.pulsar_host, text_completion_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(TextCompletionResponse)
+        )
+
+        self.prompt_out = Publisher(
+            self.pulsar_host, prompt_request_queue,
+            schema=JsonSchema(PromptRequest)
+        )
+
+        self.prompt_in = Subscriber(
+            self.pulsar_host, prompt_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(PromptResponse)
+        )
+
+        self.graph_rag_out = Publisher(
+            self.pulsar_host, graph_rag_request_queue,
+            schema=JsonSchema(GraphRagQuery)
+        )
+
+        self.graph_rag_in = Subscriber(
+            self.pulsar_host, graph_rag_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(GraphRagResponse)
+        )
+
+        self.triples_query_out = Publisher(
+            self.pulsar_host, triples_request_queue,
+            schema=JsonSchema(TriplesQueryRequest)
+        )
+
+        self.triples_query_in = Subscriber(
+            self.pulsar_host, triples_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(TriplesQueryResponse)
+        )
+
+        self.agent_out = Publisher(
+            self.pulsar_host, agent_request_queue,
+            schema=JsonSchema(AgentRequest)
+        )
+
+        self.agent_in = Subscriber(
+            self.pulsar_host, agent_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(AgentResponse)
+        )
+
+        self.embeddings_out = Publisher(
+            self.pulsar_host, embeddings_request_queue,
+            schema=JsonSchema(EmbeddingsRequest)
+        )
+
+        self.embeddings_in = Subscriber(
+            self.pulsar_host, embeddings_response_queue,
+            "api-gateway", "api-gateway",
+            JsonSchema(EmbeddingsResponse)
+        )
+
+        self.app.add_routes([
+            web.post("/api/v1/text-completion", self.llm),
+            web.post("/api/v1/prompt", self.prompt),
+            web.post("/api/v1/graph-rag", self.graph_rag),
+            web.post("/api/v1/triples-query", self.triples_query),
+            web.post("/api/v1/agent", self.agent),
+            web.post("/api/v1/embeddings", self.embeddings),
+        ])
+
+    async def llm(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.llm_in.subscribe(id)
+
+            await self.llm_out.send(
+                id,
+                TextCompletionRequest(
+                    system=data["system"],
+                    prompt=data["prompt"]
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "response": resp.response }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.llm_in.unsubscribe(id)
+
+    async def prompt(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.prompt_in.subscribe(id)
+
+            terms = {
+                k: json.dumps(v)
+                for k, v in data["variables"].items()
+            }
+
+            await self.prompt_out.send(
+                id,
+                PromptRequest(
+                    id=data["id"],
+                    terms=terms
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            if resp.object:
+                return web.json_response(
+                    { "object": resp.object }
+                )
+
+            return web.json_response(
+                { "text": resp.text }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.prompt_in.unsubscribe(id)
+
+    async def graph_rag(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.graph_rag_in.subscribe(id)
+
+            await self.graph_rag_out.send(
+                id,
+                GraphRagQuery(
+                    query=data["query"],
+                    user=data.get("user", "trustgraph"),
+                    collection=data.get("collection", "default"),
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "response": resp.response }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.graph_rag_in.unsubscribe(id)
+
+    async def triples_query(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.triples_query_in.subscribe(id)
+
+            if "s" in data:
+                if data["s"].startswith("http:") or data["s"].startswith("https:"):
+                    s = Value(value=data["s"], is_uri=True)
+                else:
+                    s = Value(value=data["s"], is_uri=True)
+            else:
+                s = None
+
+            if "p" in data:
+                if data["p"].startswith("http:") or data["p"].startswith("https:"):
+                    p = Value(value=data["p"], is_uri=True)
+                else:
+                    p = Value(value=data["p"], is_uri=True)
+            else:
+                p = None
+
+            if "o" in data:
+                if data["o"].startswith("http:") or data["o"].startswith("https:"):
+                    o = Value(value=data["o"], is_uri=True)
+                else:
+                    o = Value(value=data["o"], is_uri=True)
+            else:
+                o = None
+
+            limit = int(data.get("limit", 10000))
+
+            await self.triples_query_out.send(
+                id,
+                TriplesQueryRequest(
+                    s = s, p = p, o = o,
+                    limit = limit,
+                    user = data.get("user", "trustgraph"),
+                    collection = data.get("collection", "default"),
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                {
+                    "response": [
+                        {
+                            "s": {
+                                "v": t.s.value,
+                                "e": t.s.is_uri,
+                            },
+                            "p": {
+                                "v": t.p.value,
+                                "e": t.p.is_uri,
+                            },
+                            "o": {
+                                "v": t.o.value,
+                                "e": t.o.is_uri,
+                            }
+                        }
+                        for t in resp.triples
+                    ]
+                }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.graph_rag_in.unsubscribe(id)
+
+    async def agent(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.agent_in.subscribe(id)
+
+            await self.agent_out.send(
+                id,
+                AgentRequest(
+                    question=data["question"],
+                )
+            )
+
+            while True:
+                try:
+                    resp = await asyncio.wait_for(q.get(), self.timeout)
+                except:
+                    raise RuntimeError("Timeout waiting for response")
+
+                if resp.error:
+                    return web.json_response(
+                        { "error": resp.error.message }
+                    )
+
+                if resp.answer: break
+
+                if resp.thought: print("thought:", resp.thought)
+                if resp.observation: print("observation:", resp.observation)
+
+            if resp.answer:
+                return web.json_response(
+                    { "answer": resp.answer }
+                )
+
+            # Can't happen, ook at the logic
+            raise RuntimeError("Strange state")
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.agent_in.unsubscribe(id)
+
+    async def embeddings(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.embeddings_in.subscribe(id)
+
+            await self.embeddings_out.send(
+                id,
+                EmbeddingsRequest(
+                    text=data["text"],
+                )
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                { "vectors": resp.vectors }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.embeddings_in.unsubscribe(id)
+
+    async def app_factory(self):
+
+        self.llm_pub_task = asyncio.create_task(self.llm_in.run())
+        self.llm_sub_task = asyncio.create_task(self.llm_out.run())
+
+        self.prompt_pub_task = asyncio.create_task(self.prompt_in.run())
+        self.prompt_sub_task = asyncio.create_task(self.prompt_out.run())
+
+        self.graph_rag_pub_task = asyncio.create_task(self.graph_rag_in.run())
+        self.graph_rag_sub_task = asyncio.create_task(self.graph_rag_out.run())
+
+        self.triples_query_pub_task = asyncio.create_task(
+            self.triples_query_in.run()
+        )
+        self.triples_query_sub_task = asyncio.create_task(
+            self.triples_query_out.run()
+        )
+
+        self.agent_pub_task = asyncio.create_task(self.agent_in.run())
+        self.agent_sub_task = asyncio.create_task(self.agent_out.run())
+
+        self.embeddings_pub_task = asyncio.create_task(
+            self.embeddings_in.run()
+        )
+        self.embeddings_sub_task = asyncio.create_task(
+            self.embeddings_out.run()
+        )
+
+        return self.app
+
+    def run(self):
+        web.run_app(self.app_factory(), port=self.port)
+
+def run():
+
+
+    parser = argparse.ArgumentParser(
+        prog="api-gateway",
+        description=__doc__
+    )
+
+    parser.add_argument(
+        '-p', '--pulsar-host',
+        default=default_pulsar_host,
+        help=f'Pulsar host (default: {default_pulsar_host})',
+    )
+
+    parser.add_argument(
+        '--port',
+        type=int,
+        default=default_port,
+        help=f'Port number to listen on (default: {default_port})',
+    )
+
+    parser.add_argument(
+        '--timeout',
+        type=int,
+        default=default_timeout,
+        help=f'API request timeout in seconds (default: {default_timeout})',
+    )
+
+    parser.add_argument(
+        '-l', '--log-level',
+        type=LogLevel,
+        default=LogLevel.INFO,
+        choices=list(LogLevel),
+        help=f'Output queue (default: info)'
+    )
+
+    parser.add_argument(
+        '--metrics',
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help=f'Metrics enabled (default: true)',
+    )
+
+    parser.add_argument(
+        '-P', '--metrics-port',
+        type=int,
+        default=8000,
+        help=f'Prometheus metrics port (default: 8000)',
+    )
+
+    args = parser.parse_args()
+    args = vars(args)
+
+    if args["metrics"]:
+        start_http_server(args["metrics_port"])
+
+    a = Api(**args)
+    a.run()
+

From dc0f54f236f4bdacfb4515ebaa2c0da0a4dcf4fc Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 21 Nov 2024 14:53:53 +0000
Subject: [PATCH 05/37] API supports doc & text load (#167)

---
 trustgraph-cli/scripts/tg-load-text           |   1 -
 .../trustgraph/api/gateway/service.py         | 159 ++++++++++++++++--
 2 files changed, 143 insertions(+), 17 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-load-text b/trustgraph-cli/scripts/tg-load-text
index 88dc8e17..e49ee7a9 100755
--- a/trustgraph-cli/scripts/tg-load-text
+++ b/trustgraph-cli/scripts/tg-load-text
@@ -6,7 +6,6 @@ Loads a text document into TrustGraph processing.
 
 import pulsar
 from pulsar.schema import JsonSchema
-import base64
 import hashlib
 import argparse
 import os
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index b955af1e..2ac22892 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -19,6 +19,7 @@ import json
 import logging
 import uuid
 import os
+import base64
 
 import pulsar
 from pulsar.asyncio import Client
@@ -32,6 +33,8 @@ from ... log_level import LogLevel
 from trustgraph.clients.llm_client import LlmClient
 from trustgraph.clients.prompt_client import PromptClient
 
+from ... schema import Value, Metadata, Document, TextDocument, Triple
+
 from ... schema import TextCompletionRequest, TextCompletionResponse
 from ... schema import text_completion_request_queue
 from ... schema import text_completion_response_queue
@@ -44,7 +47,7 @@ from ... schema import GraphRagQuery, GraphRagResponse
 from ... schema import graph_rag_request_queue
 from ... schema import graph_rag_response_queue
 
-from ... schema import TriplesQueryRequest, TriplesQueryResponse, Value
+from ... schema import TriplesQueryRequest, TriplesQueryResponse
 from ... schema import triples_request_queue
 from ... schema import triples_response_queue
 
@@ -56,6 +59,8 @@ from ... schema import EmbeddingsRequest, EmbeddingsResponse
 from ... schema import embeddings_request_queue
 from ... schema import embeddings_response_queue
 
+from ... schema import document_ingest_queue, text_ingest_queue
+
 logger = logging.getLogger("api")
 logger.setLevel(logging.INFO)
 
@@ -63,13 +68,31 @@ default_pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
 default_timeout = 600
 default_port = 8088
 
+def to_value(x):
+    if x.startswith("http:") or x.startswith("https:"):
+        return Value(value=x, is_uri=True)
+    else:
+        return Value(value=x, is_uri=True)
+
+def to_subgraph(x):
+    return [
+        Triple(
+            s=to_value(t["s"]),
+            p=to_value(t["p"]),
+            o=to_value(t["o"])
+        )
+        for t in x
+    ]
+
 class Publisher:
 
-    def __init__(self, pulsar_host, topic, schema=None, max_size=10):
+    def __init__(self, pulsar_host, topic, schema=None, max_size=10,
+                 chunking_enabled=False):
         self.pulsar_host = pulsar_host
         self.topic = topic
         self.schema = schema
         self.q = asyncio.Queue(maxsize=max_size)
+        self.chunking_enabled = chunking_enabled
 
     async def run(self):
 
@@ -80,10 +103,16 @@ class Publisher:
                     async with client.create_producer(
                             topic=self.topic,
                             schema=self.schema,
+                            chunking_enabled=self.chunking_enabled,
                     ) as producer:
                         while True:
                             id, item = await self.q.get()
-                            await producer.send(item, { "id": id })
+
+                            if id:
+                                await producer.send(item, { "id": id })
+                            else:
+                                await producer.send(item)
+
             except Exception as e:
                 print("Exception:", e, flush=True)
 
@@ -139,7 +168,10 @@ class Api:
 
     def __init__(self, **config):
 
-        self.app = web.Application(middlewares=[])
+        self.app = web.Application(
+            middlewares=[],
+            client_max_size=256 * 1024 * 1024
+        )
 
         self.port = int(config.get("port", default_port))
         self.timeout = int(config.get("timeout", default_timeout))
@@ -211,6 +243,18 @@ class Api:
             JsonSchema(EmbeddingsResponse)
         )
 
+        self.document_out = Publisher(
+            self.pulsar_host, document_ingest_queue,
+            schema=JsonSchema(Document),
+            chunking_enabled=True,
+        )
+
+        self.text_out = Publisher(
+            self.pulsar_host, text_ingest_queue,
+            schema=JsonSchema(TextDocument),
+            chunking_enabled=True,
+        )
+
         self.app.add_routes([
             web.post("/api/v1/text-completion", self.llm),
             web.post("/api/v1/prompt", self.prompt),
@@ -218,6 +262,8 @@ class Api:
             web.post("/api/v1/triples-query", self.triples_query),
             web.post("/api/v1/agent", self.agent),
             web.post("/api/v1/embeddings", self.embeddings),
+            web.post("/api/v1/load/document", self.load_document),
+            web.post("/api/v1/load/text", self.load_text),
         ])
 
     async def llm(self, request):
@@ -368,26 +414,17 @@ class Api:
             q = await self.triples_query_in.subscribe(id)
 
             if "s" in data:
-                if data["s"].startswith("http:") or data["s"].startswith("https:"):
-                    s = Value(value=data["s"], is_uri=True)
-                else:
-                    s = Value(value=data["s"], is_uri=True)
+                s = to_value(data["s"])
             else:
                 s = None
 
             if "p" in data:
-                if data["p"].startswith("http:") or data["p"].startswith("https:"):
-                    p = Value(value=data["p"], is_uri=True)
-                else:
-                    p = Value(value=data["p"], is_uri=True)
+                p = to_value(data["p"])
             else:
                 p = None
 
             if "o" in data:
-                if data["o"].startswith("http:") or data["o"].startswith("https:"):
-                    o = Value(value=data["o"], is_uri=True)
-                else:
-                    o = Value(value=data["o"], is_uri=True)
+                o = to_value(data["o"])
             else:
                 o = None
 
@@ -537,6 +574,92 @@ class Api:
         finally:
             await self.embeddings_in.unsubscribe(id)
 
+    async def load_document(self, request):
+
+        try:
+
+            data = await request.json()
+
+            if "metadata" in data:
+                metadata = to_subgraph(data["metadata"])
+            else:
+                metadata = []
+
+            # Doing a base64 decode/encode here to make sure the
+            # content is valid base64
+            doc = base64.b64decode(data["data"])
+
+            resp = await self.document_out.send(
+                None,
+                Document(
+                    metadata=Metadata(
+                        id=data.get("id"),
+                        metadata=metadata,
+                        user=data.get("user", "trustgraph"),
+                        collection=data.get("collection", "default"),
+                    ),
+                    data=base64.b64encode(doc).decode("utf-8")
+                )
+            )
+
+            print("Document loaded.")
+
+            return web.json_response(
+                { }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+    async def load_text(self, request):
+
+        try:
+
+            data = await request.json()
+
+            if "metadata" in data:
+                metadata = to_subgraph(data["metadata"])
+            else:
+                metadata = []
+
+            if "charset" in data:
+                charset = data["charset"]
+            else:
+                charset = "utf-8"
+
+            # Text is base64 encoded
+            text = base64.b64decode(data["text"]).decode(charset)
+
+            resp = await self.text_out.send(
+                None,
+                TextDocument(
+                    metadata=Metadata(
+                        id=data.get("id"),
+                        metadata=metadata,
+                        user=data.get("user", "trustgraph"),
+                        collection=data.get("collection", "default"),
+                    ),
+                    text=text,
+                )
+            )
+
+            print("Text document loaded.")
+
+            return web.json_response(
+                { }
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
     async def app_factory(self):
 
         self.llm_pub_task = asyncio.create_task(self.llm_in.run())
@@ -565,6 +688,10 @@ class Api:
             self.embeddings_out.run()
         )
 
+        self.doc_ingest_pub_task = asyncio.create_task(self.document_out.run())
+
+        self.text_ingest_pub_task = asyncio.create_task(self.text_out.run())
+
         return self.app
 
     def run(self):

From 7a64385a575400951f266caeba576ae4bc8da9c7 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 21 Nov 2024 18:02:49 +0000
Subject: [PATCH 06/37] Fix graph query in Cassandra (#168)

---
 trustgraph-flow/trustgraph/api/gateway/service.py | 2 +-
 trustgraph-flow/trustgraph/direct/cassandra.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 2ac22892..148bc321 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -72,7 +72,7 @@ def to_value(x):
     if x.startswith("http:") or x.startswith("https:"):
         return Value(value=x, is_uri=True)
     else:
-        return Value(value=x, is_uri=True)
+        return Value(value=x, is_uri=False)
 
 def to_subgraph(x):
     return [
diff --git a/trustgraph-flow/trustgraph/direct/cassandra.py b/trustgraph-flow/trustgraph/direct/cassandra.py
index 2b577df1..568411a9 100644
--- a/trustgraph-flow/trustgraph/direct/cassandra.py
+++ b/trustgraph-flow/trustgraph/direct/cassandra.py
@@ -97,7 +97,7 @@ class TrustGraph:
 
     def get_po(self, p, o, limit=10):
         return self.session.execute(
-            f"select s from {self.table} where p = %s and o = %s allow filtering limit {limit}",
+            f"select s from {self.table} where p = %s and o = %s limit {limit} allow filtering",
             (p, o)
         )
 

From ae1264f5c4c3c351b60cde1c20986fec8551e6c1 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 22 Nov 2024 15:55:32 +0000
Subject: [PATCH 07/37] Add Python support to calling the API (#169)

---
 trustgraph-base/trustgraph/api/__init__.py |   3 +
 trustgraph-base/trustgraph/api/api.py      | 336 +++++++++++++++++++++
 2 files changed, 339 insertions(+)
 create mode 100644 trustgraph-base/trustgraph/api/__init__.py
 create mode 100644 trustgraph-base/trustgraph/api/api.py

diff --git a/trustgraph-base/trustgraph/api/__init__.py b/trustgraph-base/trustgraph/api/__init__.py
new file mode 100644
index 00000000..daa6a964
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/__init__.py
@@ -0,0 +1,3 @@
+
+from . api import *
+
diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py
new file mode 100644
index 00000000..818e42c3
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/api.py
@@ -0,0 +1,336 @@
+
+import requests
+import json
+import dataclasses
+import base64
+
+from trustgraph.knowledge import hash
+
+class ProtocolException(Exception):
+    pass
+
+class ApplicationException(Exception):
+    pass
+
+class Uri(str):
+    def is_uri(self): return True
+    def is_literal(self): return False
+
+class Literal(str):
+    def is_uri(self): return False
+    def is_literal(self): return True
+
+@dataclasses.dataclass
+class Triple:
+    s : str
+    p : str
+    o : str
+
+class Api:
+
+    def __init__(self, url="http://localhost:8088/"):
+
+        self.url = url
+
+        if not url.endswith("/"):
+            self.url += "/"
+
+        self.url += "api/v1/"
+
+    def check_error(self, response):
+
+        if "error" in response:
+
+            try:
+                msg = response["error"]["message"]
+                tp = response["error"]["message"]
+            except:
+                raise ApplicationException(
+                    "Error, but the error object is broken"
+                )
+
+            raise ApplicationException(f"{tp}: {msg}")
+
+    def text_completion(self, system, prompt):
+
+        # The input consists of system and prompt strings
+        input = {
+            "system": system,
+            "prompt": prompt
+        }
+
+        url = f"{self.url}text-completion"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException(f"Expected JSON response")
+
+        self.check_error(resp)
+
+        try:
+            return object["response"]
+        except:
+            raise ProtocolException(f"Response not formatted correctly")
+
+    def agent(self, question):
+
+        # The input consists of a question
+        input = {
+            "question": question
+        }
+
+        url = f"{self.url}agent"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException(f"Expected JSON response")
+
+        self.check_error(resp)
+
+        try:
+            return object["answer"]
+        except:
+            raise ProtocolException(f"Response not formatted correctly")
+
+    def graph_rag(self, question):
+
+        # The input consists of a question
+        input = {
+            "query": question
+        }
+
+        url = f"{self.url}graph-rag"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException(f"Expected JSON response")
+
+        self.check_error(resp)
+
+        try:
+            return object["response"]
+        except:
+            raise ProtocolException(f"Response not formatted correctly")
+
+    def embeddings(self, text):
+
+        # The input consists of a text block
+        input = {
+            "text": text
+        }
+
+        url = f"{self.url}embeddings"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException(f"Expected JSON response")
+
+        self.check_error(resp)
+
+        try:
+            return object["vectors"]
+        except:
+            raise ProtocolException(f"Response not formatted correctly")
+
+    def prompt(self, id, variables):
+
+        # The input consists of system and prompt strings
+        input = {
+            "id": id,
+            "variables": variables
+        }
+
+        url = f"{self.url}prompt"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException("Expected JSON response")
+
+        self.check_error(resp)
+
+        if "text" in object:
+            return object["text"]
+
+        if "object" in object:
+            try:
+                return json.loads(object["object"])
+            except Exception as e:
+                raise ProtocolException(
+                    "Returned object not well-formed JSON"
+                )
+
+        raise ProtocolException("Response not formatted correctly")
+
+    def triples_query(self, s=None, p=None, o=None, limit=10000):
+
+        # The input consists of system and prompt strings
+        input = {
+            "limit": limit
+        }
+
+        if s: input["s"] = s
+        if p: input["p"] = p
+        if o: input["o"] = o
+
+        url = f"{self.url}triples-query"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+        try:
+            # Parse the response as JSON
+            object = resp.json()
+        except:
+            raise ProtocolException("Expected JSON response")
+
+        self.check_error(resp)
+
+        if "response" not in object:
+            raise ProtocolException("Response not formatted correctly")
+
+        def to_value(x):
+            if x["e"]: return Uri(x["v"])
+            return Literal(x["v"])
+            
+        return [
+            Triple(
+                s=to_value(t["s"]),
+                p=to_value(t["p"]),
+                o=to_value(t["o"])
+            )
+            for t in object["response"]
+        ]
+
+        return object["response"]
+
+    def load_document(self, document, id=None, metadata=None):
+
+        if id is None:
+
+            if metadata is not None:
+
+                # Situation makes no sense.  What can the metadata possibly
+                # mean if the caller doesn't know the document ID.
+                # Metadata should relate to the document by ID
+                raise RuntimeError("Can't specify metadata without id")
+
+            id = hash(document)
+
+        triples = []
+
+        def emit(t):
+            triples.append(t)
+
+        if metadata:
+            metadata.emit(
+                lambda t: triples.append({
+                    "s": t.s.value,
+                    "p": t.p.value,
+                    "o": t.o.value
+                })
+            )
+
+        input = {
+            "id": id,
+            "metadata": triples,
+            "data": base64.b64encode(document).decode("utf-8"),
+        }
+
+        url = f"{self.url}load/document"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+
+    def load_text(self, text, id=None, metadata=None, charset="utf-8"):
+
+        if id is None:
+
+            if metadata is not None:
+
+                # Situation makes no sense.  What can the metadata possibly
+                # mean if the caller doesn't know the document ID.
+                # Metadata should relate to the document by ID
+                raise RuntimeError("Can't specify metadata without id")
+
+            id = hash(text)
+
+        triples = []
+
+        if metadata:
+            metadata.emit(
+                lambda t: triples.append({
+                    "s": t.s.value,
+                    "p": t.p.value,
+                    "o": t.o.value
+                })
+            )
+
+        input = {
+            "id": id,
+            "metadata": triples,
+            "charset": charset,
+            "text": base64.b64encode(text).decode("utf-8"),
+        }
+
+        url = f"{self.url}load/text"
+
+        # Invoke the API, input is passed as JSON
+        resp = requests.post(url, json=input)
+
+        # Should be a 200 status code
+        if resp.status_code != 200:
+            raise ProtocolException(f"Status code {resp.status_code}")
+

From 319f9ac04a0a272f9f710d1e9abb4f77166dbbd3 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 22 Nov 2024 23:48:21 +0000
Subject: [PATCH 08/37] Feature/pinecone integration (#170)

* Added Pinecone for GE write & query

* Add templates

* Doc embedding support
---
 templates/components.jsonnet                  |   2 +
 templates/components/pinecone.jsonnet         | 153 ++++++++++++++++
 trustgraph-flow/scripts/ge-query-pinecone     |   6 +
 trustgraph-flow/scripts/ge-write-pinecone     |   6 +
 trustgraph-flow/setup.py                      |   3 +
 .../query/doc_embeddings/pinecone/__init__.py |   3 +
 .../query/doc_embeddings/pinecone/__main__.py |   7 +
 .../query/doc_embeddings/pinecone/service.py  | 142 +++++++++++++++
 .../graph_embeddings/pinecone/__init__.py     |   3 +
 .../graph_embeddings/pinecone/__main__.py     |   7 +
 .../graph_embeddings/pinecone/service.py      | 156 ++++++++++++++++
 .../doc_embeddings/pinecone/__init__.py       |   3 +
 .../doc_embeddings/pinecone/__main__.py       |   7 +
 .../storage/doc_embeddings/pinecone/write.py  | 167 ++++++++++++++++++
 .../graph_embeddings/pinecone/__init__.py     |   3 +
 .../graph_embeddings/pinecone/__main__.py     |   7 +
 .../graph_embeddings/pinecone/write.py        | 167 ++++++++++++++++++
 17 files changed, 842 insertions(+)
 create mode 100644 templates/components/pinecone.jsonnet
 create mode 100755 trustgraph-flow/scripts/ge-query-pinecone
 create mode 100755 trustgraph-flow/scripts/ge-write-pinecone
 create mode 100644 trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
 create mode 100644 trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
 create mode 100644 trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__init__.py
 create mode 100644 trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__main__.py
 create mode 100644 trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
 create mode 100644 trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py

diff --git a/templates/components.jsonnet b/templates/components.jsonnet
index ec7f862b..26368deb 100644
--- a/templates/components.jsonnet
+++ b/templates/components.jsonnet
@@ -25,6 +25,7 @@
    "trustgraph-base": import "components/trustgraph.jsonnet",
    "vector-store-milvus": import "components/milvus.jsonnet",
    "vector-store-qdrant": import "components/qdrant.jsonnet",
+   "vector-store-pinecone": import "components/pinecone.jsonnet",
    "vertexai": import "components/vertexai.jsonnet",
    "null": {},
 
@@ -34,6 +35,7 @@
    "cassandra": import "components/cassandra.jsonnet",
    "neo4j": import "components/neo4j.jsonnet",
    "qdrant": import "components/qdrant.jsonnet",
+   "pinecone": import "components/pinecone.jsonnet",
    "milvus": import "components/milvus.jsonnet",
    "trustgraph": import "components/trustgraph.jsonnet",
 
diff --git a/templates/components/pinecone.jsonnet b/templates/components/pinecone.jsonnet
new file mode 100644
index 00000000..3422952a
--- /dev/null
+++ b/templates/components/pinecone.jsonnet
@@ -0,0 +1,153 @@
+local base = import "base/base.jsonnet";
+local images = import "values/images.jsonnet";
+local url = import "values/url.jsonnet";
+local cassandra_hosts = "cassandra";
+
+{
+
+    "pinecone-cloud":: "aws",
+    "pinecone-region":: "us-east-1",
+
+    "store-graph-embeddings" +: {
+    
+        create:: function(engine)
+
+            local envSecrets = engine.envSecrets("pinecone-api-key")
+                .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
+
+            local container =
+                engine.container("store-graph-embeddings")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "ge-write-pinecone",
+                        "-p",
+                        url.pulsar,
+                    ])
+                    .with_env_var_secrets(envSecrets)
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "store-graph-embeddings", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                envSecrets,
+                containerSet,
+                service,
+            ])
+
+    },
+
+    "query-graph-embeddings" +: {
+    
+        create:: function(engine)
+
+            local envSecrets = engine.envSecrets("pinecone-api-key")
+                .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
+
+            local container =
+                engine.container("query-graph-embeddings")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "ge-query-pinecone",
+                        "-p",
+                        url.pulsar,
+                    ])
+                    .with_env_var_secrets(envSecrets)
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "query-graph-embeddings", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                envSecrets,
+                containerSet,
+                service,
+            ])
+
+    },
+
+    "store-doc-embeddings" +: {
+    
+        create:: function(engine)
+
+            local envSecrets = engine.envSecrets("pinecone-api-key")
+                .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
+
+            local container =
+                engine.container("store-doc-embeddings")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "de-write-pinecone",
+                        "-p",
+                        url.pulsar,
+                    ])
+                    .with_env_var_secrets(envSecrets)
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "store-doc-embeddings", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                envSecrets,
+                containerSet,
+                service,
+            ])
+
+    },
+
+    "query-doc-embeddings" +: {
+    
+        create:: function(engine)
+
+            local envSecrets = engine.envSecrets("pinecone-api-key")
+                .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
+
+            local container =
+                engine.container("query-doc-embeddings")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "de-query-pinecone",
+                        "-p",
+                        url.pulsar,
+                    ])
+                    .with_env_var_secrets(envSecrets)
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "query-doc-embeddings", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                envSecrets,
+                containerSet,
+                service,
+            ])
+
+
+    }
+
+}
+
diff --git a/trustgraph-flow/scripts/ge-query-pinecone b/trustgraph-flow/scripts/ge-query-pinecone
new file mode 100755
index 00000000..b75aec78
--- /dev/null
+++ b/trustgraph-flow/scripts/ge-query-pinecone
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.query.graph_embeddings.pinecone import run
+
+run()
+
diff --git a/trustgraph-flow/scripts/ge-write-pinecone b/trustgraph-flow/scripts/ge-write-pinecone
new file mode 100755
index 00000000..802a8377
--- /dev/null
+++ b/trustgraph-flow/scripts/ge-write-pinecone
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.storage.graph_embeddings.pinecone import run
+
+run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 44901119..1650122f 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -60,6 +60,7 @@ setuptools.setup(
         "jsonschema",
         "aiohttp",
         "aiopulsar-py",
+        "pinecone[grpc]",
     ],
     scripts=[
         "scripts/api-gateway",
@@ -74,8 +75,10 @@ setuptools.setup(
         "scripts/embeddings-ollama",
         "scripts/embeddings-vectorize",
         "scripts/ge-query-milvus",
+        "scripts/ge-query-pinecone",
         "scripts/ge-query-qdrant",
         "scripts/ge-write-milvus",
+        "scripts/ge-write-pinecone",
         "scripts/ge-write-qdrant",
         "scripts/graph-rag",
         "scripts/kg-extract-definitions",
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__init__.py b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__main__.py b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__main__.py
new file mode 100755
index 00000000..89684e3e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . hf import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
new file mode 100755
index 00000000..3fcbfb21
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
@@ -0,0 +1,142 @@
+
+"""
+Document embeddings query service.  Input is vector, output is an array
+of chunks.  Pinecone implementation.
+"""
+
+from pinecone import Pinecone, ServerlessSpec
+from pinecone.grpc import PineconeGRPC, GRPCClientConfig
+
+import uuid
+import os
+
+from .... schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
+from .... schema import Error, Value
+from .... schema import document_embeddings_request_queue
+from .... schema import document_embeddings_response_queue
+from .... base import ConsumerProducer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = document_embeddings_request_queue
+default_output_queue = document_embeddings_response_queue
+default_subscriber = module
+default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
+
+class Processor(ConsumerProducer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        output_queue = params.get("output_queue", default_output_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+
+        self.url = params.get("url", None)
+        self.api_key = params.get("api_key", default_api_key)
+
+        if self.url:
+
+            self.pinecone = PineconeGRPC(
+                api_key = self.api_key,
+                host = self.url
+            )
+
+        else:
+
+            self.pinecone = Pinecone(api_key = self.api_key)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "output_queue": output_queue,
+                "subscriber": subscriber,
+                "input_schema": DocumentEmbeddingsRequest,
+                "output_schema": DocumentEmbeddingsResponse,
+                "url": self.url,
+            }
+        )
+
+    def handle(self, msg):
+
+        try:
+
+            v = msg.value()
+
+            # Sender-produced ID
+            id = msg.properties()["id"]
+
+            print(f"Handling input {id}...", flush=True)
+
+            chunks = []
+
+            for vec in v.vectors:
+
+                dim = len(vec)
+
+                index_name = (
+                    "d-" + v.user + "-" + str(dim)
+                )
+
+                index = self.pinecone.Index(index_name)
+
+                results = index.query(
+                    namespace=v.collection,
+                    vector=vec,
+                    top_k=v.limit,
+                    include_values=False,
+                    include_metadata=True
+                )
+
+                search_result = self.client.query_points(
+                    collection_name=collection,
+                    query=vec,
+                    limit=v.limit,
+                    with_payload=True,
+                ).points
+
+                for r in results.matches:
+                    doc = r.metadata["doc"]
+                    chunks.add(doc)
+
+            print("Send response...", flush=True)
+            r = DocumentEmbeddingsResponse(documents=chunks, error=None)
+            self.producer.send(r, properties={"id": id})
+
+            print("Done.", flush=True)
+
+        except Exception as e:
+
+            print(f"Exception: {e}")
+
+            print("Send error response...", flush=True)
+
+            r = DocumentEmbeddingsResponse(
+                error=Error(
+                    type = "llm-error",
+                    message = str(e),
+                ),
+                documents=None,
+            )
+
+            self.producer.send(r, properties={"id": id})
+
+            self.consumer.acknowledge(msg)
+
+    @staticmethod
+    def add_args(parser):
+
+        ConsumerProducer.add_args(
+            parser, default_input_queue, default_subscriber,
+            default_output_queue,
+        )
+
+        parser.add_argument(
+            '-t', '--store-uri',
+            default=default_store_uri,
+            help=f'Milvus store URI (default: {default_store_uri})'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__init__.py b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__main__.py b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__main__.py
new file mode 100755
index 00000000..89684e3e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . hf import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
new file mode 100755
index 00000000..64ae4d32
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
@@ -0,0 +1,156 @@
+
+"""
+Graph embeddings query service.  Input is vector, output is list of
+entities.  Pinecone implementation.
+"""
+
+from pinecone import Pinecone, ServerlessSpec
+from pinecone.grpc import PineconeGRPC, GRPCClientConfig
+
+import uuid
+import os
+
+from .... schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
+from .... schema import Error, Value
+from .... schema import graph_embeddings_request_queue
+from .... schema import graph_embeddings_response_queue
+from .... base import ConsumerProducer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = graph_embeddings_request_queue
+default_output_queue = graph_embeddings_response_queue
+default_subscriber = module
+default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
+
+class Processor(ConsumerProducer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        output_queue = params.get("output_queue", default_output_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+
+        self.url = params.get("url", None)
+        self.api_key = params.get("api_key", default_api_key)
+
+        if self.url:
+
+            self.pinecone = PineconeGRPC(
+                api_key = self.api_key,
+                host = self.url
+            )
+
+        else:
+
+            self.pinecone = Pinecone(api_key = self.api_key)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "output_queue": output_queue,
+                "subscriber": subscriber,
+                "input_schema": GraphEmbeddingsRequest,
+                "output_schema": GraphEmbeddingsResponse,
+                "url": self.url,
+            }
+        )
+
+    def create_value(self, ent):
+        if ent.startswith("http://") or ent.startswith("https://"):
+            return Value(value=ent, is_uri=True)
+        else:
+            return Value(value=ent, is_uri=False)
+        
+    def handle(self, msg):
+
+        try:
+
+            v = msg.value()
+
+            # Sender-produced ID
+            id = msg.properties()["id"]
+
+            print(f"Handling input {id}...", flush=True)
+
+            entities = set()
+
+            for vec in v.vectors:
+
+                dim = len(vec)
+
+                index_name = (
+                    "t-" + v.user + "-" + str(dim)
+                )
+
+                index = self.pinecone.Index(index_name)
+
+                results = index.query(
+                    namespace=v.collection,
+                    vector=vec,
+                    top_k=v.limit,
+                    include_values=False,
+                    include_metadata=True
+                )
+
+                for r in results.matches:
+                    ent = r.metadata["entity"]
+                    entities.add(ent)
+
+            # Convert set to list
+            entities = list(entities)
+
+            ents2 = []
+
+            for ent in entities:
+                ents2.append(self.create_value(ent))
+
+            entities = ents2
+
+            print("Send response...", flush=True)
+            r = GraphEmbeddingsResponse(entities=entities, error=None)
+            self.producer.send(r, properties={"id": id})
+
+            print("Done.", flush=True)
+
+        except Exception as e:
+
+            print(f"Exception: {e}")
+
+            print("Send error response...", flush=True)
+
+            r = GraphEmbeddingsResponse(
+                error=Error(
+                    type = "llm-error",
+                    message = str(e),
+                ),
+                entities=None,
+            )
+
+            self.producer.send(r, properties={"id": id})
+
+            self.consumer.acknowledge(msg)
+
+    @staticmethod
+    def add_args(parser):
+
+        ConsumerProducer.add_args(
+            parser, default_input_queue, default_subscriber,
+            default_output_queue,
+        )
+
+        parser.add_argument(
+            '-a', '--api-key',
+            default=default_api_key,
+            help='Pinecone API key. (default from PINECONE_API_KEY)'
+        )
+
+        parser.add_argument(
+            '-u', '--url',
+            help='Pinecone URL.  If unspecified, serverless is used'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__init__.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__init__.py
new file mode 100644
index 00000000..d891d55f
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__init__.py
@@ -0,0 +1,3 @@
+
+from . write import *
+
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__main__.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__main__.py
new file mode 100644
index 00000000..c05d8c6d
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . write import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
new file mode 100644
index 00000000..24cfcb78
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
@@ -0,0 +1,167 @@
+
+"""
+Accepts entity/vector pairs and writes them to a Qdrant store.
+"""
+
+from qdrant_client import QdrantClient
+from qdrant_client.models import PointStruct
+from qdrant_client.models import Distance, VectorParams
+
+import time
+import uuid
+import os
+
+from .... schema import ChunkEmbeddings
+from .... schema import chunk_embeddings_ingest_queue
+from .... log_level import LogLevel
+from .... base import Consumer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = chunk_embeddings_ingest_queue
+default_subscriber = module
+default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
+default_cloud = "aws"
+default_region = "us-east-1"
+
+class Processor(Consumer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+
+        self.url = params.get("url", None)
+        self.cloud = params.get("cloud", default_cloud)
+        self.region = params.get("region", default_region)
+        self.api_key = params.get("api_key", default_api_key)
+
+        if self.api_key is None:
+            raise RuntimeError("Pinecone API key must be specified")
+
+        if self.url:
+
+            self.pinecone = PineconeGRPC(
+                api_key = self.api_key,
+                host = self.url
+            )
+
+        else:
+
+            self.pinecone = Pinecone(api_key = self.api_key)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "subscriber": subscriber,
+                "input_schema": ChunkEmbeddings,
+                "url": self.url,
+            }
+        )
+
+        self.last_index_name = None
+
+    def handle(self, msg):
+
+        v = msg.value()
+
+        chunk = v.chunk.decode("utf-8")
+
+        if chunk == "": return
+
+        for vec in v.vectors:
+
+            dim = len(vec)
+            collection = (
+                "d-" + v.metadata.user + "-" + str(dim)
+            )
+
+            if index_name != self.last_index_name:
+
+                if not self.pinecone.has_index(index_name):
+
+                    try:
+
+                        self.pinecone.create_index(
+                            name = index_name,
+                            dimension = dim,
+                            metric = "cosine",
+                            spec = ServerlessSpec(
+                                cloud = self.cloud,
+                                region = self.region,
+                            )
+                        )
+
+                        for i in range(0, 1000):
+
+                            if self.pinecone.describe_index(
+                                    index_name
+                            ).status["ready"]:
+                                break
+
+                            time.sleep(1)
+
+                        if not self.pinecone.describe_index(
+                                index_name
+                        ).status["ready"]:
+                            raise RuntimeError(
+                                "Gave up waiting for index creation"
+                            )
+
+                    except Exception as e:
+                        print("Pinecone index creation failed")
+                        raise e
+
+                    print(f"Index {index_name} created", flush=True)
+
+                self.last_index_name = index_name
+
+            index = self.pinecone.Index(index_name)
+
+            records = [
+                {
+                    "id": id,
+                    "values": vec,
+                    "metadata": { "doc": chunk },
+                }
+            ]
+
+            index.upsert(
+                vectors = records,
+                namespace = v.metadata.collection,
+            )
+
+    @staticmethod
+    def add_args(parser):
+
+        Consumer.add_args(
+            parser, default_input_queue, default_subscriber,
+        )
+
+        parser.add_argument(
+            '-a', '--api-key',
+            default=default_api_key,
+            help='Pinecone API key. (default from PINECONE_API_KEY)'
+        )
+
+        parser.add_argument(
+            '-u', '--url',
+            help='Pinecone URL.  If unspecified, serverless is used'
+        )
+
+        parser.add_argument(
+            '--cloud',
+            default=default_cloud,
+            help=f'Pinecone cloud, (default: {default_cloud}'
+        )
+
+        parser.add_argument(
+            '--region',
+            default=default_region,
+            help=f'Pinecone region, (default: {default_region}'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__init__.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__init__.py
new file mode 100644
index 00000000..d891d55f
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__init__.py
@@ -0,0 +1,3 @@
+
+from . write import *
+
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__main__.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__main__.py
new file mode 100755
index 00000000..c05d8c6d
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . write import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py
new file mode 100755
index 00000000..b918c10b
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py
@@ -0,0 +1,167 @@
+
+"""
+Accepts entity/vector pairs and writes them to a Pinecone store.
+"""
+
+from pinecone import Pinecone, ServerlessSpec
+from pinecone.grpc import PineconeGRPC, GRPCClientConfig
+
+import time
+import uuid
+import os
+
+from .... schema import GraphEmbeddings
+from .... schema import graph_embeddings_store_queue
+from .... log_level import LogLevel
+from .... base import Consumer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = graph_embeddings_store_queue
+default_subscriber = module
+default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
+default_cloud = "aws"
+default_region = "us-east-1"
+
+class Processor(Consumer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+
+        self.url = params.get("url", None)
+        self.cloud = params.get("cloud", default_cloud)
+        self.region = params.get("region", default_region)
+        self.api_key = params.get("api_key", default_api_key)
+
+        if self.api_key is None:
+            raise RuntimeError("Pinecone API key must be specified")
+
+        if self.url:
+
+            self.pinecone = PineconeGRPC(
+                api_key = self.api_key,
+                host = self.url
+            )
+
+        else:
+
+            self.pinecone = Pinecone(api_key = self.api_key)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "subscriber": subscriber,
+                "input_schema": GraphEmbeddings,
+                "url": self.url,
+            }
+        )
+
+        self.last_index_name = None
+
+    def handle(self, msg):
+
+        v = msg.value()
+
+        id = str(uuid.uuid4())
+
+        if v.entity.value == "" or v.entity.value is None: return
+
+        for vec in v.vectors:
+
+            dim = len(vec)
+
+            index_name = (
+                "t-" + v.metadata.user + "-" + str(dim)
+            )
+
+            if index_name != self.last_index_name:
+
+                if not self.pinecone.has_index(index_name):
+
+                    try:
+
+                        self.pinecone.create_index(
+                            name = index_name,
+                            dimension = dim,
+                            metric = "cosine",
+                            spec = ServerlessSpec(
+                                cloud = self.cloud,
+                                region = self.region,
+                            )
+                        )
+
+                        for i in range(0, 1000):
+
+                            if self.pinecone.describe_index(
+                                    index_name
+                            ).status["ready"]:
+                                break
+
+                            time.sleep(1)
+
+                        if not self.pinecone.describe_index(
+                                index_name
+                        ).status["ready"]:
+                            raise RuntimeError(
+                                "Gave up waiting for index creation"
+                            )
+
+                    except Exception as e:
+                        print("Pinecone index creation failed")
+                        raise e
+
+                    print(f"Index {index_name} created", flush=True)
+
+                self.last_index_name = index_name
+
+            index = self.pinecone.Index(index_name)
+
+            records = [
+                {
+                    "id": id,
+                    "values": vec,
+                    "metadata": { "entity": v.entity.value },
+                }
+            ]
+
+            index.upsert(
+                vectors = records,
+                namespace = v.metadata.collection,
+            )
+
+    @staticmethod
+    def add_args(parser):
+
+        Consumer.add_args(
+            parser, default_input_queue, default_subscriber,
+        )
+
+        parser.add_argument(
+            '-a', '--api-key',
+            default=default_api_key,
+            help='Pinecone API key. (default from PINECONE_API_KEY)'
+        )
+
+        parser.add_argument(
+            '-u', '--url',
+            help='Pinecone URL.  If unspecified, serverless is used'
+        )
+
+        parser.add_argument(
+            '--cloud',
+            default=default_cloud,
+            help=f'Pinecone cloud, (default: {default_cloud}'
+        )
+
+        parser.add_argument(
+            '--region',
+            default=default_region,
+            help=f'Pinecone region, (default: {default_region}'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+

From 340d7a224f51b259cbca9184131dc337b08ef59d Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 25 Nov 2024 20:46:35 +0000
Subject: [PATCH 09/37] Feature/rework kg core (#171)

* Knowledge cores with msgpack
* Put it in the cli package
* Tidy up msgpack dumper
* Created a loader
---
 test-api/test-embeddings-api                  |   3 +
 trustgraph-cli/scripts/tg-dump-msgpack        |  34 ++
 trustgraph-cli/scripts/tg-load-kg-core        | 179 +++++++++++
 trustgraph-cli/scripts/tg-save-kg-core        | 190 +++++++++++
 trustgraph-cli/setup.py                       |   4 +
 .../trustgraph/api/gateway/service.py         | 294 +++++++++++++++++-
 6 files changed, 700 insertions(+), 4 deletions(-)
 create mode 100755 trustgraph-cli/scripts/tg-dump-msgpack
 create mode 100755 trustgraph-cli/scripts/tg-load-kg-core
 create mode 100755 trustgraph-cli/scripts/tg-save-kg-core

diff --git a/test-api/test-embeddings-api b/test-api/test-embeddings-api
index ef9ea099..b1defd01 100755
--- a/test-api/test-embeddings-api
+++ b/test-api/test-embeddings-api
@@ -23,3 +23,6 @@ if "error" in resp:
     print(f"Error: {resp['error']}")
     sys.exit(1)
 
+print(resp["vectors"])
+
+
diff --git a/trustgraph-cli/scripts/tg-dump-msgpack b/trustgraph-cli/scripts/tg-dump-msgpack
new file mode 100755
index 00000000..9f91394f
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-dump-msgpack
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+
+import msgpack
+import sys
+import argparse
+
+def run(input_file):
+
+    with open(input_file, 'rb') as f:
+
+        unpacker = msgpack.Unpacker(f, raw=False)
+
+        for unpacked in unpacker:
+            print(unpacked)
+
+def main():
+    
+    parser = argparse.ArgumentParser(
+        prog='tg-load-pdf',
+        description=__doc__,
+    )
+
+    parser.add_argument(
+        '-i', '--input-file',
+        required=True,
+        help=f'Input file'
+    )
+
+    args = parser.parse_args()
+
+    run(**vars(args))
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
new file mode 100755
index 00000000..2469772d
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+
+import aiohttp
+import asyncio
+import msgpack
+import json
+import sys
+import argparse
+import os
+
+async def load_ge(queue, url):
+
+    async with aiohttp.ClientSession() as session:
+
+        async with session.ws_connect(f"{url}load/graph-embeddings") as ws:
+
+            while True:
+
+                msg = await queue.get()
+
+                msg = {
+                    "metadata": {
+                        "id": msg["m"]["i"], 
+                        "metadata": msg["m"]["m"],
+                        "user": msg["m"]["u"],
+                        "collection": msg["m"]["c"],
+                    },
+                    "vectors": msg["v"],
+                    "entity": msg["e"],
+                }
+
+                await ws.send_json(msg)
+
+async def load_triples(queue, url):
+    async with aiohttp.ClientSession() as session:
+        async with session.ws_connect(f"{url}load/triples") as ws:
+
+            while True:
+
+                msg = await queue.get()
+
+                msg ={
+                    "metadata": {
+                        "id": msg["m"]["i"], 
+                        "metadata": msg["m"]["m"],
+                        "user": msg["m"]["u"],
+                        "collection": msg["m"]["c"],
+                    },
+                    "triples": msg["t"],
+                }
+
+                await ws.send_json(msg)
+
+ge_counts = 0
+t_counts = 0
+
+async def stats():
+
+    global t_counts
+    global ge_counts
+
+    while True:
+        await asyncio.sleep(5)
+        print(
+            f"Graph embeddings: {ge_counts:10d}  Triples: {t_counts:10d}"
+        )
+
+async def loader(ge_queue, t_queue, path, format, user, collection):
+
+    global t_counts
+    global ge_counts
+
+    if format == "json":
+
+        raise RuntimeError("Not implemented")
+
+    else:
+
+        with open(path, "rb") as f:
+
+            unpacker = msgpack.Unpacker(f, raw=False)
+
+            for unpacked in unpacker:
+
+                if user:
+                    unpacked["metadata"]["user"] = user
+
+                if collection:
+                    unpacked["metadata"]["collection"] = collection
+
+
+                if unpacked[0] == "t":
+                    await t_queue.put(unpacked[1])
+                    t_counts += 1
+                else:
+                    if unpacked[0] == "ge":
+                        await ge_queue.put(unpacked[1])
+                        ge_counts += 1
+
+async def run(**args):
+
+    ge_q = asyncio.Queue()
+    t_q = asyncio.Queue()
+
+    load_task = asyncio.create_task(
+        loader(
+            ge_queue=ge_q, t_queue=t_q,
+            path=args["input_file"], format=args["format"],
+            user=args["user"], collection=args["collection"],
+        )
+        
+    )
+
+    ge_task = asyncio.create_task(
+        load_ge(
+            queue=ge_q, url=args["url"] + "api/v1/"
+        )
+    )
+
+    triples_task = asyncio.create_task(
+        load_triples(
+            queue=t_q, url=args["url"] + "api/v1/"
+        )
+    )
+
+    stats_task = asyncio.create_task(stats())
+
+    await load_task
+    await triples_task
+    await ge_task
+    await stats_task
+
+async def main():
+    
+    parser = argparse.ArgumentParser(
+        prog='tg-load-pdf',
+        description=__doc__,
+    )
+
+    default_url = os.getenv("TRUSTGRAPH_API", "http://localhost:8088/")
+    default_user = "trustgraph"
+    collection = "default"
+
+    parser.add_argument(
+        '-u', '--url',
+        default=default_url,
+        help=f'TrustGraph API URL (default: {default_url})',
+    )
+
+    parser.add_argument(
+        '-i', '--input-file',
+        # Make it mandatory, difficult to over-write an existing file
+        required=True,
+        help=f'Output file'
+    )
+
+    parser.add_argument(
+        '--format',
+        default="msgpack",
+        choices=["msgpack", "json"],
+        help=f'Output format (default: msgpack)',
+    )
+
+    parser.add_argument(
+        '--user',
+        help=f'User ID to load as (default: from input)'
+    )
+
+    parser.add_argument(
+        '--collection',
+        help=f'Collection ID to load as (default: from input)'
+    )
+
+    args = parser.parse_args()
+
+    await run(**vars(args))
+
+asyncio.run(main())
+
diff --git a/trustgraph-cli/scripts/tg-save-kg-core b/trustgraph-cli/scripts/tg-save-kg-core
new file mode 100755
index 00000000..feeea1ef
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-save-kg-core
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+
+import aiohttp
+import asyncio
+import msgpack
+import json
+import sys
+import argparse
+import os
+
+async def fetch_ge(queue, user, collection, url):
+    async with aiohttp.ClientSession() as session:
+        async with session.ws_connect(f"{url}stream/graph-embeddings") as ws:
+            async for msg in ws:
+                if msg.type == aiohttp.WSMsgType.TEXT:
+
+                    data = msg.json()
+
+                    if user:
+                        if data["metadata"]["user"] != user:
+                            continue
+
+                    if collection:
+                        if data["metadata"]["collection"] != collection:
+                            continue
+
+                    await queue.put([
+                        "ge",
+                        {
+                            "m": {
+                                "i": data["metadata"]["id"], 
+                                "m": data["metadata"]["metadata"],
+                                "u": data["metadata"]["user"],
+                                "c": data["metadata"]["collection"],
+                            },
+                            "v": data["vectors"],
+                            "e": data["entity"],
+                        }
+                    ])
+                if msg.type == aiohttp.WSMsgType.ERROR:
+                    print("Error")
+                    break
+
+async def fetch_triples(queue, user, collection, url):
+    async with aiohttp.ClientSession() as session:
+        async with session.ws_connect(f"{url}stream/triples") as ws:
+            async for msg in ws:
+                if msg.type == aiohttp.WSMsgType.TEXT:
+
+                    data = msg.json()
+
+                    if user:
+                        if data["metadata"]["user"] != user:
+                            continue
+
+                    if collection:
+                        if data["metadata"]["collection"] != collection:
+                            continue
+
+                    await queue.put((
+                        "t",
+                        {
+                            "m": {
+                                "i": data["metadata"]["id"], 
+                                "m": data["metadata"]["metadata"],
+                                "u": data["metadata"]["user"],
+                                "c": data["metadata"]["collection"],
+                            },
+                            "t": data["triples"],
+                        }
+                    ))
+                if msg.type == aiohttp.WSMsgType.ERROR:
+                    print("Error")
+                    break
+
+ge_counts = 0
+t_counts = 0
+
+async def stats():
+
+    global t_counts
+    global ge_counts
+
+    while True:
+        await asyncio.sleep(5)
+        print(
+            f"Graph embeddings: {ge_counts:10d}  Triples: {t_counts:10d}"
+        )
+
+async def output(queue, path, format):
+
+    global t_counts
+    global ge_counts
+    
+    with open(path, "wb") as f:
+
+        while True:
+
+            msg = await queue.get()
+
+            if format == "msgpack":
+                f.write(msgpack.packb(msg, use_bin_type=True))
+            else:
+                f.write(json.dumps(msg).encode("utf-8"))
+
+            if msg[0] == "t":
+                t_counts += 1
+            else:
+                if msg[0] == "ge":
+                    ge_counts += 1
+
+async def run(**args):
+
+    q = asyncio.Queue()
+
+    ge_task = asyncio.create_task(
+        fetch_ge(
+            queue=q, user=args["user"], collection=args["collection"],
+            url=args["url"] + "api/v1/"
+        )
+    )
+
+    triples_task = asyncio.create_task(
+        fetch_triples(
+            queue=q, user=args["user"], collection=args["collection"],
+            url=args["url"] + "api/v1/"
+        )
+    )
+
+    output_task = asyncio.create_task(
+        output(
+            queue=q, path=args["output_file"], format=args["format"],
+        )
+        
+    )
+
+    stats_task = asyncio.create_task(stats())
+
+    await output_task
+    await triples_task
+    await ge_task
+    await stats_task
+
+async def main():
+    
+    parser = argparse.ArgumentParser(
+        prog='tg-load-pdf',
+        description=__doc__,
+    )
+
+    default_url = os.getenv("TRUSTGRAPH_API", "http://localhost:8088/")
+    default_user = "trustgraph"
+    collection = "default"
+
+    parser.add_argument(
+        '-u', '--url',
+        default=default_url,
+        help=f'TrustGraph API URL (default: {default_url})',
+    )
+
+    parser.add_argument(
+        '-o', '--output-file',
+        # Make it mandatory, difficult to over-write an existing file
+        required=True,
+        help=f'Output file'
+    )
+
+    parser.add_argument(
+        '--format',
+        default="msgpack",
+        choices=["msgpack", "json"],
+        help=f'Output format (default: msgpack)',
+    )
+
+    parser.add_argument(
+        '--user',
+        help=f'User ID to filter on (default: no filter)'
+    )
+
+    parser.add_argument(
+        '--collection',
+        help=f'Collection ID to filter on (default: no filter)'
+    )
+
+    args = parser.parse_args()
+
+    await run(**vars(args))
+
+asyncio.run(main())
+
diff --git a/trustgraph-cli/setup.py b/trustgraph-cli/setup.py
index ec541c8b..1608cfdb 100644
--- a/trustgraph-cli/setup.py
+++ b/trustgraph-cli/setup.py
@@ -39,6 +39,7 @@ setuptools.setup(
         "pulsar-client",
         "rdflib",
         "tabulate",
+        "msgpack",
     ],
     scripts=[
         "scripts/tg-graph-show",
@@ -54,5 +55,8 @@ setuptools.setup(
         "scripts/tg-invoke-agent",
         "scripts/tg-invoke-prompt",
         "scripts/tg-invoke-llm",
+        "scripts/tg-save-kg-core",
+        "scripts/tg-load-kg-core",
+        "scripts/tg-dump-msgpack",
     ]
 )
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 148bc321..6d5f70ce 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -14,7 +14,7 @@ module = ".".join(__name__.split(".")[1:-1])
 
 import asyncio
 import argparse
-from aiohttp import web
+from aiohttp import web, WSMsgType
 import json
 import logging
 import uuid
@@ -47,9 +47,13 @@ from ... schema import GraphRagQuery, GraphRagResponse
 from ... schema import graph_rag_request_queue
 from ... schema import graph_rag_response_queue
 
-from ... schema import TriplesQueryRequest, TriplesQueryResponse
+from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
 from ... schema import triples_request_queue
 from ... schema import triples_response_queue
+from ... schema import triples_store_queue
+
+from ... schema import GraphEmbeddings
+from ... schema import graph_embeddings_store_queue
 
 from ... schema import AgentRequest, AgentResponse
 from ... schema import agent_request_queue
@@ -84,6 +88,11 @@ def to_subgraph(x):
         for t in x
     ]
 
+class Running:
+    def __init__(self): self.running = True
+    def get(self): return self.running
+    def stop(self): self.running = False
+
 class Publisher:
 
     def __init__(self, pulsar_host, topic, schema=None, max_size=10,
@@ -132,6 +141,7 @@ class Subscriber:
         self.consumer_name = consumer_name
         self.schema = schema
         self.q = {}
+        self.full = {}
 
     async def run(self):
         while True:
@@ -145,10 +155,19 @@ class Subscriber:
                     ) as consumer:
                         while True:
                             msg = await consumer.receive()
-                            id = msg.properties()["id"]
+
+                            try:
+                                id = msg.properties()["id"]
+                            except:
+                                id = None
+
                             value = msg.value()
                             if id in self.q:
                                 await self.q[id].put(value)
+
+                            for q in self.full.values():
+                                await q.put(value)
+
             except Exception as e:
                 print("Exception:", e, flush=True)
          
@@ -164,6 +183,59 @@ class Subscriber:
         if id in self.q:
             del self.q[id]
     
+    async def subscribe_all(self, id):
+        q = asyncio.Queue()
+        self.full[id] = q
+        return q
+
+    async def unsubscribe_all(self, id):
+        if id in self.full:
+            del self.full[id]
+
+def serialize_triples(message):
+    return {
+        "metadata": {
+            "id": message.metadata.id,
+            "metadata": [
+                {
+                    "s": t.s.value,
+                    "p": t.p.value,
+                    "o": t.o.value,
+                }
+                for t in message.metadata.metadata
+            ],
+            "user": message.metadata.user,
+            "collection": message.metadata.collection,
+        },
+        "triples": [        
+            {
+                "s": t.s.value,
+                "p": t.p.value,
+                "o": t.o.value,
+            }
+            for t in message.triples
+        ]
+    }
+    
+def serialize_graph_embeddings(message):
+    return {
+        "metadata": {
+            "id": message.metadata.id,
+            "metadata": [
+                {
+                    "s": t.s.value,
+                    "p": t.p.value,
+                    "o": t.o.value,
+                }
+                for t in message.metadata.metadata
+            ],
+            "user": message.metadata.user,
+            "collection": message.metadata.collection,
+        },
+        "vectors": message.vectors,
+        "entity": message.entity.value,
+    }
+    
 class Api:
 
     def __init__(self, **config):
@@ -243,6 +315,28 @@ class Api:
             JsonSchema(EmbeddingsResponse)
         )
 
+        self.triples_tap = Subscriber(
+            self.pulsar_host, triples_store_queue,
+            "api-gateway", "api-gateway",
+            schema=JsonSchema(Triples)
+        )
+
+        self.triples_pub = Publisher(
+            self.pulsar_host, triples_store_queue,
+            schema=JsonSchema(Triples)
+        )
+
+        self.graph_embeddings_tap = Subscriber(
+            self.pulsar_host, graph_embeddings_store_queue,
+            "api-gateway", "api-gateway",
+            schema=JsonSchema(GraphEmbeddings)
+        )
+
+        self.graph_embeddings_pub = Publisher(
+            self.pulsar_host, graph_embeddings_store_queue,
+            schema=JsonSchema(GraphEmbeddings)
+        )
+
         self.document_out = Publisher(
             self.pulsar_host, document_ingest_queue,
             schema=JsonSchema(Document),
@@ -264,6 +358,20 @@ class Api:
             web.post("/api/v1/embeddings", self.embeddings),
             web.post("/api/v1/load/document", self.load_document),
             web.post("/api/v1/load/text", self.load_text),
+            web.get("/api/v1/ws", self.socket),
+
+            web.get("/api/v1/stream/triples", self.stream_triples),
+            web.get(
+                "/api/v1/stream/graph-embeddings",
+                self.stream_graph_embeddings
+            ),
+
+            web.get("/api/v1/load/triples", self.load_triples),
+            web.get(
+                "/api/v1/load/graph-embeddings",
+                self.load_graph_embeddings
+            ),
+
         ])
 
     async def llm(self, request):
@@ -660,6 +768,169 @@ class Api:
                 { "error": str(e) }
             )
 
+    async def socket(self, request):
+
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        async for msg in ws:
+            if msg.type == WSMsgType.TEXT:
+                if msg.data == 'close':
+                    await ws.close()
+                else:
+                    await ws.send_str(msg.data + '/answer')
+            elif msg.type == WSMsgType.ERROR:
+                print('ws connection closed with exception %s' %
+                      ws.exception())
+
+        print('websocket connection closed')
+
+        return ws
+
+    async def stream(self, q, ws, running, fn):
+
+        while running.get():
+            try:
+                resp = await asyncio.wait_for(q.get(), 0.5)
+                await ws.send_json(fn(resp))
+
+            except TimeoutError:
+                continue
+
+            except Exception as e:
+                print(f"Exception: {str(e)}", flush=True)
+
+    async def stream_triples(self, request):
+
+        id = str(uuid.uuid4())
+
+        q = await self.triples_tap.subscribe_all(id)
+        running = Running()
+
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        tsk = asyncio.create_task(self.stream(
+            q,
+            ws,
+            running,
+            serialize_triples,
+        ))
+
+        async for msg in ws:
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+                # Ignore incoming messages
+                pass
+
+        running.stop()
+
+        await self.triples_tap.unsubscribe_all(id)
+        await tsk
+
+        return ws
+
+    async def stream_graph_embeddings(self, request):
+
+        id = str(uuid.uuid4())
+
+        q = await self.graph_embeddings_tap.subscribe_all(id)
+        running = Running()
+
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        tsk = asyncio.create_task(self.stream(
+            q,
+            ws,
+            running,
+            serialize_graph_embeddings,
+        ))
+
+        async for msg in ws:
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+                # Ignore incoming messages
+                pass
+
+        running.stop()
+
+        await self.graph_embeddings_tap.unsubscribe_all(id)
+        await tsk
+
+        return ws
+
+    async def load_triples(self, request):
+
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        async for msg in ws:
+
+            try:
+
+                if msg.type == WSMsgType.TEXT:
+
+                    data = msg.json()
+
+                    elt = Triples(
+                        metadata=Metadata(
+                            id=data["metadata"]["id"],
+                            metadata=to_subgraph(data["metadata"]["metadata"]),
+                            user=data["metadata"]["user"],
+                            collection=data["metadata"]["collection"],
+                        ),
+                        triples=to_subgraph(data["triples"]),
+                    )
+
+                    await self.triples_pub.send(None, elt)
+
+                elif msg.type == WSMsgType.ERROR:
+                    break
+
+            except Exception as e:
+
+                print("Exception:", e)
+
+        return ws
+
+    async def load_graph_embeddings(self, request):
+
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        async for msg in ws:
+
+            try:
+
+                if msg.type == WSMsgType.TEXT:
+
+                    data = msg.json()
+
+                    elt = GraphEmbeddings(
+                        metadata=Metadata(
+                            id=data["metadata"]["id"],
+                            metadata=to_subgraph(data["metadata"]["metadata"]),
+                            user=data["metadata"]["user"],
+                            collection=data["metadata"]["collection"],
+                        ),
+                        entity=to_value(data["entity"]),
+                        vectors=data["vectors"],
+                    )
+
+                    await self.graph_embeddings_pub.send(None, elt)
+
+                elif msg.type == WSMsgType.ERROR:
+                    break
+
+            except Exception as e:
+
+                print("Exception:", e)
+
+        return ws
+
     async def app_factory(self):
 
         self.llm_pub_task = asyncio.create_task(self.llm_in.run())
@@ -688,6 +959,22 @@ class Api:
             self.embeddings_out.run()
         )
 
+        self.triples_tap_task = asyncio.create_task(
+            self.triples_tap.run()
+        )
+
+        self.triples_pub_task = asyncio.create_task(
+            self.triples_pub.run()
+        )
+
+        self.graph_embeddings_tap_task = asyncio.create_task(
+            self.graph_embeddings_tap.run()
+        )
+
+        self.graph_embeddings_pub_task = asyncio.create_task(
+            self.graph_embeddings_pub.run()
+        )
+
         self.doc_ingest_pub_task = asyncio.create_task(self.document_out.run())
 
         self.text_ingest_pub_task = asyncio.create_task(self.text_out.run())
@@ -699,7 +986,6 @@ class Api:
 
 def run():
 
-
     parser = argparse.ArgumentParser(
         prog="api-gateway",
         description=__doc__

From 887fafcf8ca2c3a09df7c5092022406dbb0b4ec4 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 26 Nov 2024 16:46:38 +0000
Subject: [PATCH 10/37] Fix/core save api (#172)

* Acknowledge messaages from Pulsar, doh!
* Change API to deliver a boolean e if value is an entity
* Change loaders to use new API
* Changes, entity-aware API is complete
---
 trustgraph-base/trustgraph/api/api.py         | 35 +++++----
 .../trustgraph/knowledge/__init__.py          |  1 +
 trustgraph-base/trustgraph/knowledge/defs.py  |  8 +++
 .../trustgraph/knowledge/document.py          | 12 +++-
 .../trustgraph/knowledge/organization.py      | 12 +++-
 .../trustgraph/knowledge/publication.py       | 12 +++-
 trustgraph-cli/scripts/tg-load-pdf            | 11 ++-
 trustgraph-cli/scripts/tg-load-text           | 11 ++-
 .../trustgraph/api/gateway/service.py         | 72 +++++++------------
 9 files changed, 104 insertions(+), 70 deletions(-)

diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py
index 818e42c3..7942e081 100644
--- a/trustgraph-base/trustgraph/api/api.py
+++ b/trustgraph-base/trustgraph/api/api.py
@@ -4,7 +4,7 @@ import json
 import dataclasses
 import base64
 
-from trustgraph.knowledge import hash
+from trustgraph.knowledge import hash, Uri, Literal
 
 class ProtocolException(Exception):
     pass
@@ -12,14 +12,6 @@ class ProtocolException(Exception):
 class ApplicationException(Exception):
     pass
 
-class Uri(str):
-    def is_uri(self): return True
-    def is_literal(self): return False
-
-class Literal(str):
-    def is_uri(self): return False
-    def is_literal(self): return True
-
 @dataclasses.dataclass
 class Triple:
     s : str
@@ -213,9 +205,16 @@ class Api:
             "limit": limit
         }
 
-        if s: input["s"] = s
-        if p: input["p"] = p
-        if o: input["o"] = o
+        if not isinstance(s, Uri):
+            raise RuntimeError("s must be Uri")
+        if not isinstance(p, Uri):
+            raise RuntimeError("p must be Uri")
+        if not isinstance(o, Uri) and not isinstance(o, Literal):
+            raise RuntimeError("o must be Uri or Literal")
+
+        if s: input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
+        if p: input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
+        if o: input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
 
         url = f"{self.url}triples-query"
 
@@ -273,9 +272,9 @@ class Api:
         if metadata:
             metadata.emit(
                 lambda t: triples.append({
-                    "s": t.s.value,
-                    "p": t.p.value,
-                    "o": t.o.value
+                    "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+                    "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+                    "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
                 })
             )
 
@@ -312,9 +311,9 @@ class Api:
         if metadata:
             metadata.emit(
                 lambda t: triples.append({
-                    "s": t.s.value,
-                    "p": t.p.value,
-                    "o": t.o.value
+                    "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+                    "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+                    "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
                 })
             )
 
diff --git a/trustgraph-base/trustgraph/knowledge/__init__.py b/trustgraph-base/trustgraph/knowledge/__init__.py
index 0ab6b5db..8349abf0 100644
--- a/trustgraph-base/trustgraph/knowledge/__init__.py
+++ b/trustgraph-base/trustgraph/knowledge/__init__.py
@@ -1,4 +1,5 @@
 
+from . defs import *
 from . identifier import *
 from . publication import *
 from . document import *
diff --git a/trustgraph-base/trustgraph/knowledge/defs.py b/trustgraph-base/trustgraph/knowledge/defs.py
index b95863c6..d6290930 100644
--- a/trustgraph-base/trustgraph/knowledge/defs.py
+++ b/trustgraph-base/trustgraph/knowledge/defs.py
@@ -23,3 +23,11 @@ URL = 'https://schema.org/url'
 IDENTIFIER = 'https://schema.org/identifier'
 KEYWORD = 'https://schema.org/keywords'
 
+class Uri(str):
+    def is_uri(self): return True
+    def is_literal(self): return False
+
+class Literal(str):
+    def is_uri(self): return False
+    def is_literal(self): return True
+
diff --git a/trustgraph-base/trustgraph/knowledge/document.py b/trustgraph-base/trustgraph/knowledge/document.py
index dc2f43e3..99d06c72 100644
--- a/trustgraph-base/trustgraph/knowledge/document.py
+++ b/trustgraph-base/trustgraph/knowledge/document.py
@@ -1,6 +1,16 @@
 
 from . defs import *
-from .. schema import Triple, Value
+
+def Value(value, is_uri):
+    if is_uri:
+        return Uri(value)
+    else:
+        return Literal(value)
+
+def Triple(s, p, o):
+    return {
+        "s": s, "p": p, "o": o,
+    }
 
 class DigitalDocument:
 
diff --git a/trustgraph-base/trustgraph/knowledge/organization.py b/trustgraph-base/trustgraph/knowledge/organization.py
index 1129dd6c..5653aa97 100644
--- a/trustgraph-base/trustgraph/knowledge/organization.py
+++ b/trustgraph-base/trustgraph/knowledge/organization.py
@@ -1,6 +1,16 @@
 
 from . defs import *
-from .. schema import Triple, Value
+
+def Value(value, is_uri):
+    if is_uri:
+        return Uri(value)
+    else:
+        return Literal(value)
+
+def Triple(s, p, o):
+    return {
+        "s": s, "p": p, "o": o,
+    }
 
 class Organization:
     def __init__(self, id, name=None, description=None):
diff --git a/trustgraph-base/trustgraph/knowledge/publication.py b/trustgraph-base/trustgraph/knowledge/publication.py
index 3c9d41c8..d197df93 100644
--- a/trustgraph-base/trustgraph/knowledge/publication.py
+++ b/trustgraph-base/trustgraph/knowledge/publication.py
@@ -1,6 +1,16 @@
 
 from . defs import *
-from .. schema import Triple, Value
+
+def Value(value, is_uri):
+    if is_uri:
+        return Uri(value)
+    else:
+        return Literal(value)
+
+def Triple(s, p, o):
+    return {
+        "s": s, "p": p, "o": o,
+    }
 
 class PublicationEvent:
     def __init__(
diff --git a/trustgraph-cli/scripts/tg-load-pdf b/trustgraph-cli/scripts/tg-load-pdf
index 18ac57cb..0dc8ced6 100755
--- a/trustgraph-cli/scripts/tg-load-pdf
+++ b/trustgraph-cli/scripts/tg-load-pdf
@@ -14,7 +14,7 @@ import time
 import uuid
 
 from trustgraph.schema import Document, document_ingest_queue
-from trustgraph.schema import Metadata
+from trustgraph.schema import Metadata, Triple, Value
 from trustgraph.log_level import LogLevel
 from trustgraph.knowledge import hash, to_uri
 from trustgraph.knowledge import PREF_PUBEV, PREF_DOC, PREF_ORG
@@ -79,7 +79,14 @@ class Loader:
             r = Document(
                 metadata=Metadata(
                     id=id,
-                    metadata=triples,
+                    metadata=[
+                        Triple(
+                            s=Value(value=t["s"]["v"], is_uri=t["s"]["e"]),
+                            p=Value(value=t["p"]["v"], is_uri=t["p"]["e"]),
+                            o=Value(value=t["o"]["v"], is_uri=t["o"]["e"])
+                        )
+                        for t in triples
+                    ],
                     user=self.user,
                     collection=self.collection,
                 ),
diff --git a/trustgraph-cli/scripts/tg-load-text b/trustgraph-cli/scripts/tg-load-text
index e49ee7a9..6ff8d09a 100755
--- a/trustgraph-cli/scripts/tg-load-text
+++ b/trustgraph-cli/scripts/tg-load-text
@@ -13,7 +13,7 @@ import time
 import uuid
 
 from trustgraph.schema import TextDocument, text_ingest_queue
-from trustgraph.schema import Metadata
+from trustgraph.schema import Metadata, Triple, Value
 from trustgraph.log_level import LogLevel
 from trustgraph.knowledge import hash, to_uri
 from trustgraph.knowledge import PREF_PUBEV, PREF_DOC, PREF_ORG
@@ -78,7 +78,14 @@ class Loader:
             r = TextDocument(
                 metadata=Metadata(
                     id=id,
-                    metadata=triples,
+                    metadata=[
+                        Triple(
+                            s=Value(value=t["s"]["v"], is_uri=t["s"]["e"]),
+                            p=Value(value=t["p"]["v"], is_uri=t["p"]["e"]),
+                            o=Value(value=t["o"]["v"], is_uri=t["o"]["e"])
+                        )
+                        for t in triples
+                    ],
                     user=self.user,
                     collection=self.collection,
                 ),
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 6d5f70ce..0ae01d3a 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -73,10 +73,7 @@ default_timeout = 600
 default_port = 8088
 
 def to_value(x):
-    if x.startswith("http:") or x.startswith("https:"):
-        return Value(value=x, is_uri=True)
-    else:
-        return Value(value=x, is_uri=False)
+    return Value(value=x["v"], is_uri=x["e"])
 
 def to_subgraph(x):
     return [
@@ -156,6 +153,9 @@ class Subscriber:
                         while True:
                             msg = await consumer.receive()
 
+                            # Acknowledge successful reception of the message
+                            await consumer.acknowledge(msg)
+
                             try:
                                 id = msg.properties()["id"]
                             except:
@@ -192,43 +192,41 @@ class Subscriber:
         if id in self.full:
             del self.full[id]
 
+def serialize_value(v):
+    return {
+        "v": v.value,
+        "e": v.is_uri,
+    }
+
+def serialize_triple(t):
+    return {
+        "s": serialize_value(t.s),
+        "p": serialize_value(t.p),
+        "o": serialize_value(t.o)
+    }
+
+def serialize_subgraph(sg):
+    return [
+        serialize_triple(t)
+        for t in sg
+    ]
+
 def serialize_triples(message):
     return {
         "metadata": {
             "id": message.metadata.id,
-            "metadata": [
-                {
-                    "s": t.s.value,
-                    "p": t.p.value,
-                    "o": t.o.value,
-                }
-                for t in message.metadata.metadata
-            ],
+            "metadata": serialize_subgraph(message.metadata.metadata),
             "user": message.metadata.user,
             "collection": message.metadata.collection,
         },
-        "triples": [        
-            {
-                "s": t.s.value,
-                "p": t.p.value,
-                "o": t.o.value,
-            }
-            for t in message.triples
-        ]
+        "triples": serialize_subgraph(message.triples),
     }
     
 def serialize_graph_embeddings(message):
     return {
         "metadata": {
             "id": message.metadata.id,
-            "metadata": [
-                {
-                    "s": t.s.value,
-                    "p": t.p.value,
-                    "o": t.o.value,
-                }
-                for t in message.metadata.metadata
-            ],
+            "metadata": serialize_subgraph(message.metadata.metadata),
             "user": message.metadata.user,
             "collection": message.metadata.collection,
         },
@@ -560,23 +558,7 @@ class Api:
 
             return web.json_response(
                 {
-                    "response": [
-                        {
-                            "s": {
-                                "v": t.s.value,
-                                "e": t.s.is_uri,
-                            },
-                            "p": {
-                                "v": t.p.value,
-                                "e": t.p.is_uri,
-                            },
-                            "o": {
-                                "v": t.o.value,
-                                "e": t.o.is_uri,
-                            }
-                        }
-                        for t in resp.triples
-                    ]
+                    "response": serialize_subgraph(resp.triples),
                 }
             )
 

From 99e3e43f7ba621f8e28bab77d6b84d4a996053b5 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 26 Nov 2024 16:58:47 +0000
Subject: [PATCH 11/37] Fix/kg cli help (#173)

* Fix kg-core-help
---
 trustgraph-cli/scripts/tg-dump-msgpack |  8 +++++++-
 trustgraph-cli/scripts/tg-load-kg-core |  7 ++++++-
 trustgraph-cli/scripts/tg-save-kg-core | 11 ++++++++++-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-dump-msgpack b/trustgraph-cli/scripts/tg-dump-msgpack
index 9f91394f..dc4a8139 100755
--- a/trustgraph-cli/scripts/tg-dump-msgpack
+++ b/trustgraph-cli/scripts/tg-dump-msgpack
@@ -1,5 +1,11 @@
 #!/usr/bin/env python3
 
+""
+This utility reads a knowledge core in msgpack format and outputs its
+contents in JSON form to standard output.  This is useful only as a
+diagnostic utility.
+"""
+
 import msgpack
 import sys
 import argparse
@@ -16,7 +22,7 @@ def run(input_file):
 def main():
     
     parser = argparse.ArgumentParser(
-        prog='tg-load-pdf',
+        prog='tg-dump-msgpack',
         description=__doc__,
     )
 
diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index 2469772d..13fac153 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -1,5 +1,10 @@
 #!/usr/bin/env python3
 
+"""This utility takes a knowledge core and loads it into a running TrustGraph
+through the API.  The knowledge core should be in msgpack format, which is the
+default format produce by tg-save-kg-core.
+"""
+
 import aiohttp
 import asyncio
 import msgpack
@@ -133,7 +138,7 @@ async def run(**args):
 async def main():
     
     parser = argparse.ArgumentParser(
-        prog='tg-load-pdf',
+        prog='tg-load-kg-core',
         description=__doc__,
     )
 
diff --git a/trustgraph-cli/scripts/tg-save-kg-core b/trustgraph-cli/scripts/tg-save-kg-core
index feeea1ef..3c03383f 100755
--- a/trustgraph-cli/scripts/tg-save-kg-core
+++ b/trustgraph-cli/scripts/tg-save-kg-core
@@ -1,5 +1,14 @@
 #!/usr/bin/env python3
 
+"""
+This utility connects to a running TrustGraph through the API and creates
+a knowledge core from the data streaming through the processing queues.
+For completeness of data, tg-save-kg-core should be initiated before data
+loading takes place.  The default output  format, msgpack should be used.
+JSON output format is also available - msgpack produces a more compact
+representation, which is also more performant to load.
+"""
+
 import aiohttp
 import asyncio
 import msgpack
@@ -144,7 +153,7 @@ async def run(**args):
 async def main():
     
     parser = argparse.ArgumentParser(
-        prog='tg-load-pdf',
+        prog='tg-save-kg-core',
         description=__doc__,
     )
 

From 2f7ccb2ef85fd70b08b25f5fdc4f9671db193c62 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 27 Nov 2024 17:24:27 +0000
Subject: [PATCH 12/37] - Reduce back-pressure on tg-load-kg-core (#179)

- Save entity in correct format in tg-save-core
---
 trustgraph-cli/scripts/tg-load-kg-core            | 6 ++++--
 trustgraph-flow/trustgraph/api/gateway/service.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index 13fac153..e2d0a405 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -104,8 +104,10 @@ async def loader(ge_queue, t_queue, path, format, user, collection):
 
 async def run(**args):
 
-    ge_q = asyncio.Queue()
-    t_q = asyncio.Queue()
+    # Maxsize on queues reduces back-pressure so tg-load-kg-core doesn't
+    # grow to eat all memory
+    ge_q = asyncio.Queue(maxsize=500)
+    t_q = asyncio.Queue(maxsize=500)
 
     load_task = asyncio.create_task(
         loader(
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 0ae01d3a..7b12e1a2 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -231,7 +231,7 @@ def serialize_graph_embeddings(message):
             "collection": message.metadata.collection,
         },
         "vectors": message.vectors,
-        "entity": message.entity.value,
+        "entity": message.entity,
     }
     
 class Api:

From b2f7b3452926dd6156be9713032609ffb9c5593a Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 27 Nov 2024 17:36:15 +0000
Subject: [PATCH 13/37] Fix/pinecone integration (#180)

* Add missing pinecone references

* Add missing Pinecone executables
---
 trustgraph-flow/scripts/de-query-pinecone | 6 ++++++
 trustgraph-flow/scripts/de-write-pinecone | 6 ++++++
 trustgraph-flow/setup.py                  | 2 ++
 3 files changed, 14 insertions(+)
 create mode 100755 trustgraph-flow/scripts/de-query-pinecone
 create mode 100755 trustgraph-flow/scripts/de-write-pinecone

diff --git a/trustgraph-flow/scripts/de-query-pinecone b/trustgraph-flow/scripts/de-query-pinecone
new file mode 100755
index 00000000..b21d9045
--- /dev/null
+++ b/trustgraph-flow/scripts/de-query-pinecone
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.query.doc_embeddings.pinecone import run
+
+run()
+
diff --git a/trustgraph-flow/scripts/de-write-pinecone b/trustgraph-flow/scripts/de-write-pinecone
new file mode 100755
index 00000000..eb604747
--- /dev/null
+++ b/trustgraph-flow/scripts/de-write-pinecone
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.storage.doc_embeddings.pinecone import run
+
+run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 1650122f..2cbbdee4 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -69,8 +69,10 @@ setuptools.setup(
         "scripts/chunker-token",
         "scripts/de-query-milvus",
         "scripts/de-query-qdrant",
+        "scripts/de-query-pinecone",
         "scripts/de-write-milvus",
         "scripts/de-write-qdrant",
+        "scripts/de-write-pinecone",
         "scripts/document-rag",
         "scripts/embeddings-ollama",
         "scripts/embeddings-vectorize",

From 9c97ca32f6e7792ecd29fbd77093126b5df84253 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 28 Nov 2024 19:21:28 +0000
Subject: [PATCH 14/37] Feature/memgraph (#182)

* Add database override to bolt output, default is neo4j

* Add memgraph templates
---
 templates/components.jsonnet                  |  2 +
 templates/components/memgraph.jsonnet         | 81 +++++++++++++++++++
 templates/stores/memgraph.jsonnet             | 65 +++++++++++++++
 templates/values/images.jsonnet               |  4 +-
 .../trustgraph/query/triples/neo4j/service.py | 12 ++-
 .../trustgraph/storage/triples/neo4j/write.py | 12 ++-
 6 files changed, 171 insertions(+), 5 deletions(-)
 create mode 100644 templates/components/memgraph.jsonnet
 create mode 100644 templates/stores/memgraph.jsonnet

diff --git a/templates/components.jsonnet b/templates/components.jsonnet
index 26368deb..1abf44a4 100644
--- a/templates/components.jsonnet
+++ b/templates/components.jsonnet
@@ -12,6 +12,7 @@
    "graph-rag": import "components/graph-rag.jsonnet",
    "triple-store-cassandra": import "components/cassandra.jsonnet",
    "triple-store-neo4j": import "components/neo4j.jsonnet",
+   "triple-store-memgraph": import "components/memgraph.jsonnet",
    "llamafile": import "components/llamafile.jsonnet",
    "ollama": import "components/ollama.jsonnet",
    "openai": import "components/openai.jsonnet",
@@ -34,6 +35,7 @@
    // FIXME: Dupes
    "cassandra": import "components/cassandra.jsonnet",
    "neo4j": import "components/neo4j.jsonnet",
+   "memgraph": import "components/memgraph.jsonnet",
    "qdrant": import "components/qdrant.jsonnet",
    "pinecone": import "components/pinecone.jsonnet",
    "milvus": import "components/milvus.jsonnet",
diff --git a/templates/components/memgraph.jsonnet b/templates/components/memgraph.jsonnet
new file mode 100644
index 00000000..5ec0a76e
--- /dev/null
+++ b/templates/components/memgraph.jsonnet
@@ -0,0 +1,81 @@
+local base = import "base/base.jsonnet";
+local images = import "values/images.jsonnet";
+local url = import "values/url.jsonnet";
+local memgraph = import "stores/memgraph.jsonnet";
+
+memgraph + {
+
+    "memgraph-url":: "bolt://memgraph:7687",
+    "memgraph-database":: "memgraph",
+
+    "store-triples" +: {
+    
+        create:: function(engine)
+
+            local container =
+                engine.container("store-triples")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "triples-write-neo4j",
+                        "-p",
+                        url.pulsar,
+                        "-g",
+                        $["memgraph-url"],
+                        "--database",
+                        $["memgraph-database"],
+                    ])
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "store-triples", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                containerSet,
+                service,
+            ])
+
+    },
+
+    "query-triples" +: {
+    
+        create:: function(engine)
+
+            local container =
+                engine.container("query-triples")
+                    .with_image(images.trustgraph)
+                    .with_command([
+                        "triples-query-neo4j",
+                        "-p",
+                        url.pulsar,
+                        "-g",
+                        $["memgraph-url"],
+                        "--database",
+                        $["memgraph-database"],
+                    ])
+                    .with_limits("0.5", "128M")
+                    .with_reservations("0.1", "128M");
+
+            local containerSet = engine.containers(
+                "query-triples", [ container ]
+            );
+
+            local service =
+                engine.internalService(containerSet)
+                .with_port(8080, 8080, "metrics");
+
+            engine.resources([
+                containerSet,
+                service,
+            ])
+
+
+    }
+
+}
+
diff --git a/templates/stores/memgraph.jsonnet b/templates/stores/memgraph.jsonnet
new file mode 100644
index 00000000..8f8b6216
--- /dev/null
+++ b/templates/stores/memgraph.jsonnet
@@ -0,0 +1,65 @@
+local base = import "base/base.jsonnet";
+local images = import "values/images.jsonnet";
+
+{
+
+    "memgraph" +: {
+    
+        create:: function(engine)
+
+            local container =
+                engine.container("memgraph")
+                    .with_image(images.memgraph_mage)
+                    .with_limits("1.0", "1000M")
+                    .with_reservations("0.5", "1000M")
+                    .with_port(7474, 7474, "api")
+                    .with_port(7687, 7687, "api2");
+
+            local containerSet = engine.containers(
+                "memgraph", [ container ]
+            );
+
+            local service =
+                engine.service(containerSet)
+                .with_port(7474, 7474, "api")
+                .with_port(7687, 7687, "api2");
+
+            engine.resources([
+                containerSet,
+                service,
+            ])
+
+    },
+
+    "memgraph-lab" +: {
+    
+        create:: function(engine)
+
+            local container =
+                engine.container("lab")
+                    .with_image(images.memgraph_lab)
+                    .with_environment({
+                        QUICK_CONNECT_MG_HOST: "memgraph",
+                        QUICK_CONNECT_MG_PORT: "7687",
+                    })
+                    .with_limits("1.0", "512M")
+                    .with_reservations("0.5", "512M")
+                    .with_port(3010, 3000, "http");
+
+            local containerSet = engine.containers(
+                "lab", [ container ]
+            );
+
+            local service =
+                engine.service(containerSet)
+                .with_port(3010, 3010, "http");
+
+            engine.resources([
+                containerSet,
+                service,
+            ])
+
+    },
+
+}
+
diff --git a/templates/values/images.jsonnet b/templates/values/images.jsonnet
index 01ecee4d..c583815b 100644
--- a/templates/values/images.jsonnet
+++ b/templates/values/images.jsonnet
@@ -10,5 +10,7 @@ local version = import "version.jsonnet";
     prometheus: "docker.io/prom/prometheus:v2.53.2",
     grafana: "docker.io/grafana/grafana:11.1.4",
     trustgraph: "docker.io/trustgraph/trustgraph-flow:" + version,
-    qdrant: "docker.io/qdrant/qdrant:v1.11.1"
+    qdrant: "docker.io/qdrant/qdrant:v1.11.1",
+    memgraph_mage: "docker.io/memgraph/memgraph-mage:1.22-memgraph-2.22",
+    memgraph_lab: "docker.io/memgraph/lab:2.19.1",
 }
diff --git a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
index 9038f76d..2caa0193 100755
--- a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
@@ -21,6 +21,7 @@ default_subscriber = module
 default_graph_host = 'bolt://neo4j:7687'
 default_username = 'neo4j'
 default_password = 'password'
+default_database = 'neo4j'
 
 class Processor(ConsumerProducer):
 
@@ -31,7 +32,8 @@ class Processor(ConsumerProducer):
         subscriber = params.get("subscriber", default_subscriber)
         graph_host = params.get("graph_host", default_graph_host)
         username = params.get("username", default_username)
-        password = params.get("passowrd", default_password)
+        password = params.get("password", default_password)
+        database = params.get("database", default_database)
 
         super(Processor, self).__init__(
             **params | {
@@ -44,7 +46,7 @@ class Processor(ConsumerProducer):
             }
         )
 
-        self.db = "neo4j"
+        self.db = database
 
         self.io = GraphDatabase.driver(graph_host, auth=(username, password))
 
@@ -342,6 +344,12 @@ class Processor(ConsumerProducer):
             help=f'Neo4j password (default: {default_password})'
         )
 
+        parser.add_argument(
+            '--database',
+            default=default_database,
+            help=f'Neo4j database (default: {default_database})'
+        )
+
 def run():
 
     Processor.start(module, __doc__)
diff --git a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
index 82302e96..929333e5 100755
--- a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
+++ b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
@@ -24,6 +24,7 @@ default_subscriber = module
 default_graph_host = 'bolt://neo4j:7687'
 default_username = 'neo4j'
 default_password = 'password'
+default_database = 'neo4j'
 
 class Processor(Consumer):
 
@@ -33,7 +34,8 @@ class Processor(Consumer):
         subscriber = params.get("subscriber", default_subscriber)
         graph_host = params.get("graph_host", default_graph_host)
         username = params.get("username", default_username)
-        password = params.get("passowrd", default_password)
+        password = params.get("password", default_password)
+        database = params.get("database", default_database)
 
         super(Processor, self).__init__(
             **params | {
@@ -44,7 +46,7 @@ class Processor(Consumer):
             }
         )
 
-        self.db = "neo4j"
+        self.db = database
 
         self.io = GraphDatabase.driver(graph_host, auth=(username, password))
 
@@ -152,6 +154,12 @@ class Processor(Consumer):
             help=f'Neo4j password (default: {default_password})'
         )
 
+        parser.add_argument(
+            '--database',
+            default=default_database,
+            help=f'Neo4j database (default: {default_database})'
+        )
+
 def run():
 
     Processor.start(module, __doc__)

From c52b70c2864a6085b45c6e73432b921f539d3d80 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 28 Nov 2024 19:21:41 +0000
Subject: [PATCH 15/37] Fix metadata load format (#181)

---
 trustgraph-cli/scripts/tg-load-pdf  | 15 ++++++++++++---
 trustgraph-cli/scripts/tg-load-text | 17 +++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-load-pdf b/trustgraph-cli/scripts/tg-load-pdf
index 0dc8ced6..08ce6f91 100755
--- a/trustgraph-cli/scripts/tg-load-pdf
+++ b/trustgraph-cli/scripts/tg-load-pdf
@@ -81,9 +81,18 @@ class Loader:
                     id=id,
                     metadata=[
                         Triple(
-                            s=Value(value=t["s"]["v"], is_uri=t["s"]["e"]),
-                            p=Value(value=t["p"]["v"], is_uri=t["p"]["e"]),
-                            o=Value(value=t["o"]["v"], is_uri=t["o"]["e"])
+                            s=Value(
+                                value=t["s"],
+                                is_uri=isinstance(t["s"], Uri)
+                            ),
+                            p=Value(
+                                value=t["p"],
+                                is_uri=isinstance(t["p"], Uri)
+                            ),
+                            o=Value(
+                                value=t["o"],
+                                is_uri=isinstance(t["o"], Uri)
+                            ),
                         )
                         for t in triples
                     ],
diff --git a/trustgraph-cli/scripts/tg-load-text b/trustgraph-cli/scripts/tg-load-text
index 6ff8d09a..51664a1b 100755
--- a/trustgraph-cli/scripts/tg-load-text
+++ b/trustgraph-cli/scripts/tg-load-text
@@ -15,7 +15,7 @@ import uuid
 from trustgraph.schema import TextDocument, text_ingest_queue
 from trustgraph.schema import Metadata, Triple, Value
 from trustgraph.log_level import LogLevel
-from trustgraph.knowledge import hash, to_uri
+from trustgraph.knowledge import hash, to_uri, Literal, Uri
 from trustgraph.knowledge import PREF_PUBEV, PREF_DOC, PREF_ORG
 from trustgraph.knowledge import Organization, PublicationEvent
 from trustgraph.knowledge import DigitalDocument
@@ -80,9 +80,18 @@ class Loader:
                     id=id,
                     metadata=[
                         Triple(
-                            s=Value(value=t["s"]["v"], is_uri=t["s"]["e"]),
-                            p=Value(value=t["p"]["v"], is_uri=t["p"]["e"]),
-                            o=Value(value=t["o"]["v"], is_uri=t["o"]["e"])
+                            s=Value(
+                                value=t["s"],
+                                is_uri=isinstance(t["s"], Uri)
+                            ),
+                            p=Value(
+                                value=t["p"],
+                                is_uri=isinstance(t["p"], Uri)
+                            ),
+                            o=Value(
+                                value=t["o"],
+                                is_uri=isinstance(t["o"], Uri)
+                            ),
                         )
                         for t in triples
                     ],

From cf564ed1473dfa100fe3bbab50c95300eaeec855 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 28 Nov 2024 21:26:36 +0000
Subject: [PATCH 16/37] PDF loader symbol error fixed (#183)

---
 trustgraph-cli/scripts/tg-load-pdf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trustgraph-cli/scripts/tg-load-pdf b/trustgraph-cli/scripts/tg-load-pdf
index 08ce6f91..a0d2b3bc 100755
--- a/trustgraph-cli/scripts/tg-load-pdf
+++ b/trustgraph-cli/scripts/tg-load-pdf
@@ -16,7 +16,7 @@ import uuid
 from trustgraph.schema import Document, document_ingest_queue
 from trustgraph.schema import Metadata, Triple, Value
 from trustgraph.log_level import LogLevel
-from trustgraph.knowledge import hash, to_uri
+from trustgraph.knowledge import hash, to_uri, Uri
 from trustgraph.knowledge import PREF_PUBEV, PREF_DOC, PREF_ORG
 from trustgraph.knowledge import Organization, PublicationEvent
 from trustgraph.knowledge import DigitalDocument

From c844d805e52c44b6a010244cd3f892497babba4c Mon Sep 17 00:00:00 2001
From: Cyber MacGeddon <cybermaggedon@gmail.com>
Date: Fri, 29 Nov 2024 17:03:31 +0000
Subject: [PATCH 17/37] Setup for release 0.17 branch

---
 .github/workflows/release.yaml    |  2 +-
 trustgraph-bedrock/setup.py       |  2 +-
 trustgraph-cli/setup.py           |  2 +-
 trustgraph-embeddings-hf/setup.py |  4 ++--
 trustgraph-flow/setup.py          |  2 +-
 trustgraph-parquet/setup.py       |  2 +-
 trustgraph-vertexai/setup.py      |  2 +-
 trustgraph/setup.py               | 14 +++++++-------
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 0d6d2d29..fc85a6a8 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -5,7 +5,7 @@ on:
   workflow_dispatch:
   push:
     tags:
-      - v0.16.*
+      - v0.17.*
 
 permissions:
   contents: read
diff --git a/trustgraph-bedrock/setup.py b/trustgraph-bedrock/setup.py
index 1a99e227..d92cc9c7 100644
--- a/trustgraph-bedrock/setup.py
+++ b/trustgraph-bedrock/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
         "pulsar-client",
         "prometheus-client",
         "boto3",
diff --git a/trustgraph-cli/setup.py b/trustgraph-cli/setup.py
index 1608cfdb..e9de429a 100644
--- a/trustgraph-cli/setup.py
+++ b/trustgraph-cli/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
         "requests",
         "pulsar-client",
         "rdflib",
diff --git a/trustgraph-embeddings-hf/setup.py b/trustgraph-embeddings-hf/setup.py
index 2fbe079e..25ccfeab 100644
--- a/trustgraph-embeddings-hf/setup.py
+++ b/trustgraph-embeddings-hf/setup.py
@@ -34,8 +34,8 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
-        "trustgraph-flow>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
+        "trustgraph-flow>=0.17,<0.18",
         "torch",
         "urllib3",
         "transformers",
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 2cbbdee4..8e81e12c 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
         "urllib3",
         "rdflib",
         "pymilvus",
diff --git a/trustgraph-parquet/setup.py b/trustgraph-parquet/setup.py
index 7dab60ac..dfe29653 100644
--- a/trustgraph-parquet/setup.py
+++ b/trustgraph-parquet/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
         "pulsar-client",
         "prometheus-client",
         "pyarrow",
diff --git a/trustgraph-vertexai/setup.py b/trustgraph-vertexai/setup.py
index d19e8c0d..3ce10305 100644
--- a/trustgraph-vertexai/setup.py
+++ b/trustgraph-vertexai/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
         "pulsar-client",
         "google-cloud-aiplatform",
         "prometheus-client",
diff --git a/trustgraph/setup.py b/trustgraph/setup.py
index 7bb8dfd3..5f9f1f2c 100644
--- a/trustgraph/setup.py
+++ b/trustgraph/setup.py
@@ -34,13 +34,13 @@ setuptools.setup(
     python_requires='>=3.8',
     download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
     install_requires=[
-        "trustgraph-base>=0.16,<0.17",
-        "trustgraph-bedrock>=0.16,<0.17",
-        "trustgraph-cli>=0.16,<0.17",
-        "trustgraph-embeddings-hf>=0.16,<0.17",
-        "trustgraph-flow>=0.16,<0.17",
-        "trustgraph-parquet>=0.16,<0.17",
-        "trustgraph-vertexai>=0.16,<0.17",
+        "trustgraph-base>=0.17,<0.18",
+        "trustgraph-bedrock>=0.17,<0.18",
+        "trustgraph-cli>=0.17,<0.18",
+        "trustgraph-embeddings-hf>=0.17,<0.18",
+        "trustgraph-flow>=0.17,<0.18",
+        "trustgraph-parquet>=0.17,<0.18",
+        "trustgraph-vertexai>=0.17,<0.18",
     ],
     scripts=[
     ]

From 212102c61c485d4dc3a5a0bf1751539e4653e616 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 29 Nov 2024 17:17:20 +0000
Subject: [PATCH 18/37] Tweak queue names (#184)

---
 trustgraph-base/trustgraph/schema/documents.py | 2 +-
 trustgraph-base/trustgraph/schema/graph.py     | 4 ++--
 trustgraph-base/trustgraph/schema/models.py    | 4 ++--
 trustgraph-base/trustgraph/schema/prompt.py    | 2 +-
 trustgraph-base/trustgraph/schema/retrieval.py | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/trustgraph-base/trustgraph/schema/documents.py b/trustgraph-base/trustgraph/schema/documents.py
index 59aba287..2a3d3d0c 100644
--- a/trustgraph-base/trustgraph/schema/documents.py
+++ b/trustgraph-base/trustgraph/schema/documents.py
@@ -60,5 +60,5 @@ document_embeddings_request_queue = topic(
     'doc-embeddings', kind='non-persistent', namespace='request'
 )
 document_embeddings_response_queue = topic(
-    'doc-embeddings-response', kind='non-persistent', namespace='response', 
+    'doc-embeddings', kind='non-persistent', namespace='response',
 )
diff --git a/trustgraph-base/trustgraph/schema/graph.py b/trustgraph-base/trustgraph/schema/graph.py
index 2d108a30..78c1a99c 100644
--- a/trustgraph-base/trustgraph/schema/graph.py
+++ b/trustgraph-base/trustgraph/schema/graph.py
@@ -34,7 +34,7 @@ graph_embeddings_request_queue = topic(
     'graph-embeddings', kind='non-persistent', namespace='request'
 )
 graph_embeddings_response_queue = topic(
-    'graph-embeddings-response', kind='non-persistent', namespace='response', 
+    'graph-embeddings', kind='non-persistent', namespace='response'
 )
 
 ############################################################################
@@ -67,5 +67,5 @@ triples_request_queue = topic(
     'triples', kind='non-persistent', namespace='request'
 )
 triples_response_queue = topic(
-    'triples-response', kind='non-persistent', namespace='response',
+    'triples', kind='non-persistent', namespace='response'
 )
diff --git a/trustgraph-base/trustgraph/schema/models.py b/trustgraph-base/trustgraph/schema/models.py
index 70cb2c8f..a634e1c4 100644
--- a/trustgraph-base/trustgraph/schema/models.py
+++ b/trustgraph-base/trustgraph/schema/models.py
@@ -23,7 +23,7 @@ text_completion_request_queue = topic(
     'text-completion', kind='non-persistent', namespace='request'
 )
 text_completion_response_queue = topic(
-    'text-completion-response', kind='non-persistent', namespace='response', 
+    'text-completion', kind='non-persistent', namespace='response'
 )
 
 ############################################################################
@@ -41,5 +41,5 @@ embeddings_request_queue = topic(
     'embeddings', kind='non-persistent', namespace='request'
 )
 embeddings_response_queue = topic(
-    'embeddings-response', kind='non-persistent', namespace='response'
+    'embeddings', kind='non-persistent', namespace='response'
 )
diff --git a/trustgraph-base/trustgraph/schema/prompt.py b/trustgraph-base/trustgraph/schema/prompt.py
index 9bcdf117..15eddea8 100644
--- a/trustgraph-base/trustgraph/schema/prompt.py
+++ b/trustgraph-base/trustgraph/schema/prompt.py
@@ -59,7 +59,7 @@ prompt_request_queue = topic(
     'prompt', kind='non-persistent', namespace='request'
 )
 prompt_response_queue = topic(
-    'prompt-response', kind='non-persistent', namespace='response'
+    'prompt', kind='non-persistent', namespace='response'
 )
 
 ############################################################################
diff --git a/trustgraph-base/trustgraph/schema/retrieval.py b/trustgraph-base/trustgraph/schema/retrieval.py
index ad860c3c..9c4361a1 100644
--- a/trustgraph-base/trustgraph/schema/retrieval.py
+++ b/trustgraph-base/trustgraph/schema/retrieval.py
@@ -20,7 +20,7 @@ graph_rag_request_queue = topic(
     'graph-rag', kind='non-persistent', namespace='request'
 )
 graph_rag_response_queue = topic(
-    'graph-rag-response', kind='non-persistent', namespace='response'
+    'graph-rag', kind='non-persistent', namespace='response'
 )
 
 ############################################################################
@@ -40,5 +40,5 @@ document_rag_request_queue = topic(
     'doc-rag', kind='non-persistent', namespace='request'
 )
 document_rag_response_queue = topic(
-    'doc-rag-response', kind='non-persistent', namespace='response'
+    'doc-rag', kind='non-persistent', namespace='response'
 )

From 6d200c79c5796de8fd9e04b7802c83041335a711 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 2 Dec 2024 17:41:30 +0000
Subject: [PATCH 19/37] Feature/wikipedia ddg (#185)

API-side support for Wikipedia, DBpedia and internet search functions  This incorporates a refactor of the API code to break it up, separate classes for endpoints to reduce duplication
---
 templates/components/azure-openai.jsonnet     |   2 +-
 templates/components/azure.jsonnet            |   2 +-
 templates/components/bedrock.jsonnet          |   2 +-
 templates/components/claude.jsonnet           |   2 +-
 templates/components/cohere.jsonnet           |   2 +-
 templates/components/document-rag.jsonnet     |   2 +-
 templates/components/googleaistudio.jsonnet   |   2 +-
 templates/components/graph-rag.jsonnet        |   2 +-
 templates/components/llamafile.jsonnet        |   2 +-
 templates/components/ollama.jsonnet           |   2 +-
 templates/components/openai.jsonnet           |   2 +-
 templates/components/prompt-template.jsonnet  |   6 +-
 templates/components/trustgraph.jsonnet       |   2 +-
 templates/components/vertexai.jsonnet         |   2 +-
 test-api/test-agent2-api                      |  28 +
 test-api/test-dbpedia                         |  30 +
 test-api/test-encyclopedia                    |  30 +
 test-api/test-internet-search                 |  30 +
 test-api/test-prompt-api                      |   1 -
 test-api/test-prompt2-api                     |   1 -
 test-api/test-triples-query-api               |   5 +-
 trustgraph-base/trustgraph/schema/__init__.py |   2 +
 trustgraph-base/trustgraph/schema/lookup.py   |  42 +
 trustgraph-cli/scripts/tg-load-kg-core        |   1 -
 trustgraph-flow/scripts/wikipedia-lookup      |   6 +
 trustgraph-flow/setup.py                      |   1 +
 .../trustgraph/api/gateway/agent.py           |  30 +
 .../trustgraph/api/gateway/dbpedia.py         |  29 +
 .../trustgraph/api/gateway/embeddings.py      |  27 +
 .../trustgraph/api/gateway/encyclopedia.py    |  29 +
 .../trustgraph/api/gateway/endpoint.py        | 153 +++
 .../api/gateway/graph_embeddings_load.py      |  60 ++
 .../api/gateway/graph_embeddings_stream.py    |  56 ++
 .../trustgraph/api/gateway/graph_rag.py       |  30 +
 .../trustgraph/api/gateway/internet_search.py |  29 +
 .../trustgraph/api/gateway/prompt.py          |  41 +
 .../trustgraph/api/gateway/publisher.py       |  41 +
 .../trustgraph/api/gateway/running.py         |   5 +
 .../trustgraph/api/gateway/serialize.py       |  57 ++
 .../trustgraph/api/gateway/service.py         | 873 ++----------------
 .../trustgraph/api/gateway/socket.py          |  68 ++
 .../trustgraph/api/gateway/subscriber.py      |  68 ++
 .../trustgraph/api/gateway/text_completion.py |  28 +
 .../trustgraph/api/gateway/triples_load.py    |  59 ++
 .../trustgraph/api/gateway/triples_query.py   |  53 ++
 .../trustgraph/api/gateway/triples_stream.py  |  56 ++
 .../trustgraph/external/__init__.py           |   0
 .../trustgraph/external/wikipedia/__init__.py |   3 +
 .../trustgraph/external/wikipedia/__main__.py |   7 +
 .../trustgraph/external/wikipedia/service.py  | 102 ++
 50 files changed, 1287 insertions(+), 826 deletions(-)
 create mode 100755 test-api/test-agent2-api
 create mode 100755 test-api/test-dbpedia
 create mode 100755 test-api/test-encyclopedia
 create mode 100755 test-api/test-internet-search
 create mode 100644 trustgraph-base/trustgraph/schema/lookup.py
 create mode 100755 trustgraph-flow/scripts/wikipedia-lookup
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/agent.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/dbpedia.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/embeddings.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/endpoint.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/graph_rag.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/internet_search.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/prompt.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/publisher.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/running.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/serialize.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/socket.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/subscriber.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/text_completion.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/triples_load.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/triples_query.py
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/triples_stream.py
 create mode 100644 trustgraph-flow/trustgraph/external/__init__.py
 create mode 100644 trustgraph-flow/trustgraph/external/wikipedia/__init__.py
 create mode 100644 trustgraph-flow/trustgraph/external/wikipedia/__main__.py
 create mode 100644 trustgraph-flow/trustgraph/external/wikipedia/service.py

diff --git a/templates/components/azure-openai.jsonnet b/templates/components/azure-openai.jsonnet
index cc3847c0..8afcaf11 100644
--- a/templates/components/azure-openai.jsonnet
+++ b/templates/components/azure-openai.jsonnet
@@ -48,7 +48,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/azure.jsonnet b/templates/components/azure.jsonnet
index 82b79133..cf10dc66 100644
--- a/templates/components/azure.jsonnet
+++ b/templates/components/azure.jsonnet
@@ -46,7 +46,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/bedrock.jsonnet b/templates/components/bedrock.jsonnet
index 93978a59..6ccaa1c5 100644
--- a/templates/components/bedrock.jsonnet
+++ b/templates/components/bedrock.jsonnet
@@ -53,7 +53,7 @@ local chunker = import "chunker-recursive.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
               	    ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/claude.jsonnet b/templates/components/claude.jsonnet
index c6c94e21..00e4ec79 100644
--- a/templates/components/claude.jsonnet
+++ b/templates/components/claude.jsonnet
@@ -45,7 +45,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/cohere.jsonnet b/templates/components/cohere.jsonnet
index 11c30fbd..5bc9b39c 100644
--- a/templates/components/cohere.jsonnet
+++ b/templates/components/cohere.jsonnet
@@ -43,7 +43,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_limits("0.5", "128M")
                     .with_reservations("0.1", "128M");
diff --git a/templates/components/document-rag.jsonnet b/templates/components/document-rag.jsonnet
index ac5c11ec..0a68dd52 100644
--- a/templates/components/document-rag.jsonnet
+++ b/templates/components/document-rag.jsonnet
@@ -19,7 +19,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "--prompt-request-queue",
                         "non-persistent://tg/request/prompt-rag",
                         "--prompt-response-queue",
-                        "non-persistent://tg/response/prompt-rag-response",
+                        "non-persistent://tg/response/prompt-rag",
                     ])
                     .with_limits("0.5", "128M")
                     .with_reservations("0.1", "128M");
diff --git a/templates/components/googleaistudio.jsonnet b/templates/components/googleaistudio.jsonnet
index b6ee1d85..4088ceef 100644
--- a/templates/components/googleaistudio.jsonnet
+++ b/templates/components/googleaistudio.jsonnet
@@ -50,7 +50,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/graph-rag.jsonnet b/templates/components/graph-rag.jsonnet
index c0200d1e..860152c9 100644
--- a/templates/components/graph-rag.jsonnet
+++ b/templates/components/graph-rag.jsonnet
@@ -112,7 +112,7 @@ local url = import "values/url.jsonnet";
                         "--prompt-request-queue",
                         "non-persistent://tg/request/prompt-rag",
                         "--prompt-response-queue",
-                        "non-persistent://tg/response/prompt-rag-response",
+                        "non-persistent://tg/response/prompt-rag",
                         "--entity-limit",
                         std.toString($["graph-rag-entity-limit"]),
                         "--triple-limit",
diff --git a/templates/components/llamafile.jsonnet b/templates/components/llamafile.jsonnet
index d51cda61..bc1a011c 100644
--- a/templates/components/llamafile.jsonnet
+++ b/templates/components/llamafile.jsonnet
@@ -40,7 +40,7 @@ local prompts = import "prompts/slm.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/ollama.jsonnet b/templates/components/ollama.jsonnet
index 2ae696b4..8da00848 100644
--- a/templates/components/ollama.jsonnet
+++ b/templates/components/ollama.jsonnet
@@ -40,7 +40,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/openai.jsonnet b/templates/components/openai.jsonnet
index 83cbd406..27725cb6 100644
--- a/templates/components/openai.jsonnet
+++ b/templates/components/openai.jsonnet
@@ -50,7 +50,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "128M")
diff --git a/templates/components/prompt-template.jsonnet b/templates/components/prompt-template.jsonnet
index ac820df6..3dadf337 100644
--- a/templates/components/prompt-template.jsonnet
+++ b/templates/components/prompt-template.jsonnet
@@ -53,7 +53,7 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                         "--text-completion-request-queue",
                         "non-persistent://tg/request/text-completion",
                         "--text-completion-response-queue",
-                        "non-persistent://tg/response/text-completion-response",
+                        "non-persistent://tg/response/text-completion",
 
                         "--system-prompt",
                         $["prompts"]["system-template"],
@@ -92,11 +92,11 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                         "-i",
                         "non-persistent://tg/request/prompt-rag",
                         "-o",
-                        "non-persistent://tg/response/prompt-rag-response",
+                        "non-persistent://tg/response/prompt-rag",
                         "--text-completion-request-queue",
                         "non-persistent://tg/request/text-completion-rag",
                         "--text-completion-response-queue",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
 
                         "--system-prompt",
                         $["prompts"]["system-template"],
diff --git a/templates/components/trustgraph.jsonnet b/templates/components/trustgraph.jsonnet
index 37c05dae..6c60921c 100644
--- a/templates/components/trustgraph.jsonnet
+++ b/templates/components/trustgraph.jsonnet
@@ -186,7 +186,7 @@ local prompt = import "prompt-template.jsonnet";
                         "-p",
                         url.pulsar,
                         "-i",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_limits("0.5", "128M")
                     .with_reservations("0.1", "128M");
diff --git a/templates/components/vertexai.jsonnet b/templates/components/vertexai.jsonnet
index 44fe27c6..ef193156 100644
--- a/templates/components/vertexai.jsonnet
+++ b/templates/components/vertexai.jsonnet
@@ -93,7 +93,7 @@ local prompts = import "prompts/mixtral.jsonnet";
                         "-i",
                         "non-persistent://tg/request/text-completion-rag",
                         "-o",
-                        "non-persistent://tg/response/text-completion-rag-response",
+                        "non-persistent://tg/response/text-completion-rag",
                     ])
                     .with_limits("0.5", "256M")
                     .with_reservations("0.1", "256M")
diff --git a/test-api/test-agent2-api b/test-api/test-agent2-api
new file mode 100755
index 00000000..766b16c9
--- /dev/null
+++ b/test-api/test-agent2-api
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "question": "What is 14 plus 12. Justify your answer.",
+}
+
+resp = requests.post(
+    f"{url}agent",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["answer"])
+
+
diff --git a/test-api/test-dbpedia b/test-api/test-dbpedia
new file mode 100755
index 00000000..e361f533
--- /dev/null
+++ b/test-api/test-dbpedia
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "term": "Cornwall",
+}
+
+resp = requests.post(
+    f"{url}dbpedia",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["text"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-encyclopedia b/test-api/test-encyclopedia
new file mode 100755
index 00000000..ad4e5b36
--- /dev/null
+++ b/test-api/test-encyclopedia
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "term": "Cornwall",
+}
+
+resp = requests.post(
+    f"{url}encyclopedia",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["text"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-internet-search b/test-api/test-internet-search
new file mode 100755
index 00000000..8c854c77
--- /dev/null
+++ b/test-api/test-internet-search
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+    "term": "Cornwall",
+}
+
+resp = requests.post(
+    f"{url}internet-search",
+    json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+    print(f"Error: {resp['error']}")
+    sys.exit(1)
+
+print(resp["text"])
+
+sys.exit(0)
+############################################################################
+
diff --git a/test-api/test-prompt-api b/test-api/test-prompt-api
index 1005bc90..4f69f09a 100755
--- a/test-api/test-prompt-api
+++ b/test-api/test-prompt-api
@@ -22,7 +22,6 @@ resp = requests.post(
 
 resp = resp.json()
 
-print(resp)
 if "error" in resp:
     print(f"Error: {resp['error']}")
     sys.exit(1)
diff --git a/test-api/test-prompt2-api b/test-api/test-prompt2-api
index f1b80c48..1e641439 100755
--- a/test-api/test-prompt2-api
+++ b/test-api/test-prompt2-api
@@ -22,7 +22,6 @@ resp = requests.post(
 
 resp = resp.json()
 
-print(resp)
 if "error" in resp:
     print(f"Error: {resp['error']}")
     sys.exit(1)
diff --git a/test-api/test-triples-query-api b/test-api/test-triples-query-api
index e2895a28..1aa8a0b1 100755
--- a/test-api/test-triples-query-api
+++ b/test-api/test-triples-query-api
@@ -9,7 +9,10 @@ url = "http://localhost:8088/api/v1/"
 ############################################################################
 
 input = {
-    "p": "http://www.w3.org/2000/01/rdf-schema#label",
+    "p": {
+        "v": "http://www.w3.org/2000/01/rdf-schema#label",
+        "e": True,
+    },
     "limit": 10
 }
 
diff --git a/trustgraph-base/trustgraph/schema/__init__.py b/trustgraph-base/trustgraph/schema/__init__.py
index 3196691b..be41b670 100644
--- a/trustgraph-base/trustgraph/schema/__init__.py
+++ b/trustgraph-base/trustgraph/schema/__init__.py
@@ -9,4 +9,6 @@ from . graph import *
 from . retrieval import *
 from . metadata import *
 from . agent import *
+from . lookup import *
+
 
diff --git a/trustgraph-base/trustgraph/schema/lookup.py b/trustgraph-base/trustgraph/schema/lookup.py
new file mode 100644
index 00000000..d0a0517c
--- /dev/null
+++ b/trustgraph-base/trustgraph/schema/lookup.py
@@ -0,0 +1,42 @@
+
+from pulsar.schema import Record, String
+
+from . types import Error, Value, Triple
+from . topic import topic
+from . metadata import Metadata
+
+############################################################################
+
+# Lookups
+
+class LookupRequest(Record):
+    kind = String()
+    term = String()
+
+class LookupResponse(Record):
+    text = String()
+    error = Error()
+
+encyclopedia_lookup_request_queue = topic(
+    'encyclopedia', kind='non-persistent', namespace='request'
+)
+encyclopedia_lookup_response_queue = topic(
+    'encyclopedia', kind='non-persistent', namespace='response', 
+)
+
+dbpedia_lookup_request_queue = topic(
+    'dbpedia', kind='non-persistent', namespace='request'
+)
+dbpedia_lookup_response_queue = topic(
+    'dbpedia', kind='non-persistent', namespace='response', 
+)
+
+internet_search_request_queue = topic(
+    'internet-search', kind='non-persistent', namespace='request'
+)
+internet_search_response_queue = topic(
+    'internet-search', kind='non-persistent', namespace='response', 
+)
+
+############################################################################
+
diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index e2d0a405..4e207cf1 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -93,7 +93,6 @@ async def loader(ge_queue, t_queue, path, format, user, collection):
                 if collection:
                     unpacked["metadata"]["collection"] = collection
 
-
                 if unpacked[0] == "t":
                     await t_queue.put(unpacked[1])
                     t_counts += 1
diff --git a/trustgraph-flow/scripts/wikipedia-lookup b/trustgraph-flow/scripts/wikipedia-lookup
new file mode 100755
index 00000000..a89b1009
--- /dev/null
+++ b/trustgraph-flow/scripts/wikipedia-lookup
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.external.wikipedia import run
+
+run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 8e81e12c..65bb7326 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -106,5 +106,6 @@ setuptools.setup(
         "scripts/triples-query-neo4j",
         "scripts/triples-write-cassandra",
         "scripts/triples-write-neo4j",
+        "scripts/wikipedia-lookup",
     ]
 )
diff --git a/trustgraph-flow/trustgraph/api/gateway/agent.py b/trustgraph-flow/trustgraph/api/gateway/agent.py
new file mode 100644
index 00000000..28a1e185
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/agent.py
@@ -0,0 +1,30 @@
+
+from ... schema import AgentRequest, AgentResponse
+from ... schema import agent_request_queue
+from ... schema import agent_response_queue
+
+from . endpoint import MultiResponseServiceEndpoint
+
+class AgentEndpoint(MultiResponseServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(AgentEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=agent_request_queue,
+            response_queue=agent_response_queue,
+            request_schema=AgentRequest,
+            response_schema=AgentResponse,
+            endpoint_path="/api/v1/agent",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return AgentRequest(
+            question=body["question"]
+        )
+
+    def from_response(self, message):
+        if message.answer:
+            return { "answer": message.answer }, True
+        else:
+            return {}, False
diff --git a/trustgraph-flow/trustgraph/api/gateway/dbpedia.py b/trustgraph-flow/trustgraph/api/gateway/dbpedia.py
new file mode 100644
index 00000000..0ccb3d6b
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/dbpedia.py
@@ -0,0 +1,29 @@
+
+from ... schema import LookupRequest, LookupResponse
+from ... schema import dbpedia_lookup_request_queue
+from ... schema import dbpedia_lookup_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class DbpediaEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(DbpediaEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=dbpedia_lookup_request_queue,
+            response_queue=dbpedia_lookup_response_queue,
+            request_schema=LookupRequest,
+            response_schema=LookupResponse,
+            endpoint_path="/api/v1/dbpedia",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return LookupRequest(
+            term=body["term"],
+            kind=body.get("kind", None),
+        )
+
+    def from_response(self, message):
+        return { "text": message.text }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/embeddings.py b/trustgraph-flow/trustgraph/api/gateway/embeddings.py
new file mode 100644
index 00000000..b5fcc0a4
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/embeddings.py
@@ -0,0 +1,27 @@
+
+from ... schema import EmbeddingsRequest, EmbeddingsResponse
+from ... schema import embeddings_request_queue
+from ... schema import embeddings_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class EmbeddingsEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(EmbeddingsEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=embeddings_request_queue,
+            response_queue=embeddings_response_queue,
+            request_schema=EmbeddingsRequest,
+            response_schema=EmbeddingsResponse,
+            endpoint_path="/api/v1/embeddings",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return EmbeddingsRequest(
+            text=body["text"]
+        )
+
+    def from_response(self, message):
+        return { "vectors": message.vectors }
diff --git a/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
new file mode 100644
index 00000000..e379d7d4
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
@@ -0,0 +1,29 @@
+
+from ... schema import LookupRequest, LookupResponse
+from ... schema import encyclopedia_lookup_request_queue
+from ... schema import encyclopedia_lookup_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class EncyclopediaEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(EncyclopediaEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=encyclopedia_lookup_request_queue,
+            response_queue=encyclopedia_lookup_response_queue,
+            request_schema=LookupRequest,
+            response_schema=LookupResponse,
+            endpoint_path="/api/v1/encyclopedia",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return LookupRequest(
+            term=body["term"],
+            kind=body.get("kind", None),
+        )
+
+    def from_response(self, message):
+        return { "text": message.text }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
new file mode 100644
index 00000000..075e4a0e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
@@ -0,0 +1,153 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+from aiohttp import web
+import uuid
+import logging
+
+from . publisher import Publisher
+from . subscriber import Subscriber
+
+logger = logging.getLogger("endpoint")
+logger.setLevel(logging.INFO)
+
+class ServiceEndpoint:
+
+    def __init__(
+            self,
+            pulsar_host,
+            request_queue, request_schema,
+            response_queue, response_schema,
+            endpoint_path,
+            subscription="api-gateway", consumer_name="api-gateway",
+            timeout=600,
+    ):
+
+        self.pub = Publisher(
+            pulsar_host, request_queue,
+            schema=JsonSchema(request_schema)
+        )
+
+        self.sub = Subscriber(
+            pulsar_host, response_queue,
+            subscription, consumer_name,
+            JsonSchema(response_schema)
+        )
+
+        self.path = endpoint_path
+        self.timeout = timeout
+
+    async def start(self):
+
+        self.pub_task = asyncio.create_task(self.pub.run())
+        self.sub_task = asyncio.create_task(self.sub.run())
+
+    def add_routes(self, app):
+
+        app.add_routes([
+            web.post(self.path, self.handle),
+        ])
+
+    def to_request(self, request):
+        raise RuntimeError("Not defined")
+
+    def from_response(self, response):
+        raise RuntimeError("Not defined")
+
+    async def handle(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.sub.subscribe(id)
+
+            print(data)
+
+            await self.pub.send(
+                id,
+                self.to_request(data),
+            )
+
+            try:
+                resp = await asyncio.wait_for(q.get(), self.timeout)
+            except:
+                raise RuntimeError("Timeout waiting for response")
+
+            print(resp)
+
+            if resp.error:
+                return web.json_response(
+                    { "error": resp.error.message }
+                )
+
+            return web.json_response(
+                self.from_response(resp)
+            )
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.sub.unsubscribe(id)
+
+
+class MultiResponseServiceEndpoint(ServiceEndpoint):
+
+    async def handle(self, request):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            data = await request.json()
+
+            q = await self.sub.subscribe(id)
+
+            print(data)
+
+            await self.pub.send(
+                id,
+                self.to_request(data),
+            )
+
+            # Keeps looking at responses...
+
+            while True:
+
+                try:
+                    resp = await asyncio.wait_for(q.get(), self.timeout)
+                except:
+                    raise RuntimeError("Timeout waiting for response")
+
+                print(resp)
+
+                if resp.error:
+                    return web.json_response(
+                        { "error": resp.error.message }
+                    )
+
+                # Until from_response says we have a finished answer
+                resp, fin = self.from_response(resp)
+
+
+                if fin:
+                    return web.json_response(resp)
+
+                # Not finished, so loop round and continue
+
+        except Exception as e:
+            logging.error(f"Exception: {e}")
+
+            return web.json_response(
+                { "error": str(e) }
+            )
+
+        finally:
+            await self.sub.unsubscribe(id)
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
new file mode 100644
index 00000000..3cc3f533
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
@@ -0,0 +1,60 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+import uuid
+from aiohttp import WSMsgType
+
+from ... schema import Metadata
+from ... schema import GraphEmbeddings
+from ... schema import graph_embeddings_store_queue
+
+from . publisher import Publisher
+from . socket import SocketEndpoint
+from . serialize import to_subgraph, to_value
+
+class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
+
+    def __init__(self, pulsar_host, path="/api/v1/load/graph-embeddings"):
+
+        super(GraphEmbeddingsLoadEndpoint, self).__init__(
+            endpoint_path=path
+        )
+
+        self.pulsar_host=pulsar_host
+
+        self.publisher = Publisher(
+            self.pulsar_host, graph_embeddings_store_queue,
+            schema=JsonSchema(GraphEmbeddings)
+        )
+
+    async def start(self):
+
+        self.task = asyncio.create_task(
+            self.publisher.run()
+        )
+
+    async def listener(self, ws, running):
+        
+        async for msg in ws:
+            # On error, finish
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+
+                data = msg.json()
+
+                elt = GraphEmbeddings(
+                    metadata=Metadata(
+                        id=data["metadata"]["id"],
+                        metadata=to_subgraph(data["metadata"]["metadata"]),
+                        user=data["metadata"]["user"],
+                        collection=data["metadata"]["collection"],
+                    ),
+                    entity=to_value(data["entity"]),
+                    vectors=data["vectors"],
+                )
+
+                await self.publisher.send(None, elt)
+
+
+        running.stop()
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
new file mode 100644
index 00000000..978684cf
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
@@ -0,0 +1,56 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+import uuid
+
+from ... schema import GraphEmbeddings
+from ... schema import graph_embeddings_store_queue
+
+from . subscriber import Subscriber
+from . socket import SocketEndpoint
+from . serialize import serialize_graph_embeddings
+
+class GraphEmbeddingsStreamEndpoint(SocketEndpoint):
+
+    def __init__(self, pulsar_host, path="/api/v1/stream/graph-embeddings"):
+
+        super(GraphEmbeddingsStreamEndpoint, self).__init__(
+            endpoint_path=path
+        )
+
+        self.pulsar_host=pulsar_host
+
+        self.subscriber = Subscriber(
+            self.pulsar_host, graph_embeddings_store_queue,
+            "api-gateway", "api-gateway",
+            schema=JsonSchema(GraphEmbeddings)
+        )
+
+    async def start(self):
+
+        self.task = asyncio.create_task(
+            self.subscriber.run()
+        )
+
+    async def async_thread(self, ws, running):
+
+        id = str(uuid.uuid4())
+
+        q = await self.subscriber.subscribe_all(id)
+
+        while running.get():
+            try:
+                resp = await asyncio.wait_for(q.get(), 0.5)
+                await ws.send_json(serialize_graph_embeddings(resp))
+
+            except TimeoutError:
+                continue
+
+            except Exception as e:
+                print(f"Exception: {str(e)}", flush=True)
+                break
+
+        await self.subscriber.unsubscribe_all(id)
+
+        running.stop()
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_rag.py b/trustgraph-flow/trustgraph/api/gateway/graph_rag.py
new file mode 100644
index 00000000..1381dc23
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_rag.py
@@ -0,0 +1,30 @@
+
+from ... schema import GraphRagQuery, GraphRagResponse
+from ... schema import graph_rag_request_queue
+from ... schema import graph_rag_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class GraphRagEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(GraphRagEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=graph_rag_request_queue,
+            response_queue=graph_rag_response_queue,
+            request_schema=GraphRagQuery,
+            response_schema=GraphRagResponse,
+            endpoint_path="/api/v1/graph-rag",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return GraphRagQuery(
+            query=body["query"],
+            user=body.get("user", "trustgraph"),
+            collection=body.get("collection", "default"),
+        )
+
+    def from_response(self, message):
+        return { "response": message.response }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/internet_search.py b/trustgraph-flow/trustgraph/api/gateway/internet_search.py
new file mode 100644
index 00000000..c84ed82a
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/internet_search.py
@@ -0,0 +1,29 @@
+
+from ... schema import LookupRequest, LookupResponse
+from ... schema import internet_search_request_queue
+from ... schema import internet_search_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class InternetSearchEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(InternetSearchEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=internet_search_request_queue,
+            response_queue=internet_search_response_queue,
+            request_schema=LookupRequest,
+            response_schema=LookupResponse,
+            endpoint_path="/api/v1/internet-search",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return LookupRequest(
+            term=body["term"],
+            kind=body.get("kind", None),
+        )
+
+    def from_response(self, message):
+        return { "text": message.text }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/prompt.py b/trustgraph-flow/trustgraph/api/gateway/prompt.py
new file mode 100644
index 00000000..e02effb9
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/prompt.py
@@ -0,0 +1,41 @@
+
+import json
+
+from ... schema import PromptRequest, PromptResponse
+from ... schema import prompt_request_queue
+from ... schema import prompt_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class PromptEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(PromptEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=prompt_request_queue,
+            response_queue=prompt_response_queue,
+            request_schema=PromptRequest,
+            response_schema=PromptResponse,
+            endpoint_path="/api/v1/prompt",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return PromptRequest(
+            id=body["id"],
+            terms={
+                k: json.dumps(v)
+                for k, v in body["variables"].items()
+            }
+        )
+
+    def from_response(self, message):
+        if message.object:
+            return {
+                "object": message.object
+            }
+        else:
+            return {
+                "text": message.text
+            }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/publisher.py b/trustgraph-flow/trustgraph/api/gateway/publisher.py
new file mode 100644
index 00000000..1bff44dd
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/publisher.py
@@ -0,0 +1,41 @@
+
+import asyncio
+import aiopulsar
+
+class Publisher:
+
+    def __init__(self, pulsar_host, topic, schema=None, max_size=10,
+                 chunking_enabled=False):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.schema = schema
+        self.q = asyncio.Queue(maxsize=max_size)
+        self.chunking_enabled = chunking_enabled
+
+    async def run(self):
+
+        while True:
+
+            try:
+                async with aiopulsar.connect(self.pulsar_host) as client:
+                    async with client.create_producer(
+                            topic=self.topic,
+                            schema=self.schema,
+                            chunking_enabled=self.chunking_enabled,
+                    ) as producer:
+                        while True:
+                            id, item = await self.q.get()
+
+                            if id:
+                                await producer.send(item, { "id": id })
+                            else:
+                                await producer.send(item)
+
+            except Exception as e:
+                print("Exception:", e, flush=True)
+
+            # If handler drops out, sleep a retry
+            await asyncio.sleep(2)
+
+    async def send(self, id, msg):
+        await self.q.put((id, msg))
diff --git a/trustgraph-flow/trustgraph/api/gateway/running.py b/trustgraph-flow/trustgraph/api/gateway/running.py
new file mode 100644
index 00000000..e6a91e66
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/running.py
@@ -0,0 +1,5 @@
+
+class Running:
+    def __init__(self): self.running = True
+    def get(self): return self.running
+    def stop(self): self.running = False
diff --git a/trustgraph-flow/trustgraph/api/gateway/serialize.py b/trustgraph-flow/trustgraph/api/gateway/serialize.py
new file mode 100644
index 00000000..2b955645
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/serialize.py
@@ -0,0 +1,57 @@
+from ... schema import Value, Triple
+
+def to_value(x):
+    return Value(value=x["v"], is_uri=x["e"])
+
+def to_subgraph(x):
+    return [
+        Triple(
+            s=to_value(t["s"]),
+            p=to_value(t["p"]),
+            o=to_value(t["o"])
+        )
+        for t in x
+    ]
+
+def serialize_value(v):
+    return {
+        "v": v.value,
+        "e": v.is_uri,
+    }
+
+def serialize_triple(t):
+    return {
+        "s": serialize_value(t.s),
+        "p": serialize_value(t.p),
+        "o": serialize_value(t.o)
+    }
+
+def serialize_subgraph(sg):
+    return [
+        serialize_triple(t)
+        for t in sg
+    ]
+
+def serialize_triples(message):
+    return {
+        "metadata": {
+            "id": message.metadata.id,
+            "metadata": serialize_subgraph(message.metadata.metadata),
+            "user": message.metadata.user,
+            "collection": message.metadata.collection,
+        },
+        "triples": serialize_subgraph(message.triples),
+    }
+    
+def serialize_graph_embeddings(message):
+    return {
+        "metadata": {
+            "id": message.metadata.id,
+            "metadata": serialize_subgraph(message.metadata.metadata),
+            "user": message.metadata.user,
+            "collection": message.metadata.collection,
+        },
+        "vectors": message.vectors,
+        "entity": serialize_value(message.entity),
+    }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 7b12e1a2..dcdd9779 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -1,4 +1,3 @@
-
 """
 API gateway.  Offers HTTP services which are translated to interaction on the
 Pulsar bus.
@@ -14,57 +13,39 @@ module = ".".join(__name__.split(".")[1:-1])
 
 import asyncio
 import argparse
-from aiohttp import web, WSMsgType
-import json
+from aiohttp import web
 import logging
-import uuid
 import os
 import base64
 
 import pulsar
-from pulsar.asyncio import Client
 from pulsar.schema import JsonSchema
-import _pulsar
-import aiopulsar
 from prometheus_client import start_http_server
 
 from ... log_level import LogLevel
 
-from trustgraph.clients.llm_client import LlmClient
-from trustgraph.clients.prompt_client import PromptClient
-
-from ... schema import Value, Metadata, Document, TextDocument, Triple
-
-from ... schema import TextCompletionRequest, TextCompletionResponse
-from ... schema import text_completion_request_queue
-from ... schema import text_completion_response_queue
-
-from ... schema import PromptRequest, PromptResponse
-from ... schema import prompt_request_queue
-from ... schema import prompt_response_queue
-
-from ... schema import GraphRagQuery, GraphRagResponse
-from ... schema import graph_rag_request_queue
-from ... schema import graph_rag_response_queue
-
-from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
-from ... schema import triples_request_queue
-from ... schema import triples_response_queue
-from ... schema import triples_store_queue
-
-from ... schema import GraphEmbeddings
-from ... schema import graph_embeddings_store_queue
-
-from ... schema import AgentRequest, AgentResponse
-from ... schema import agent_request_queue
-from ... schema import agent_response_queue
-
-from ... schema import EmbeddingsRequest, EmbeddingsResponse
-from ... schema import embeddings_request_queue
-from ... schema import embeddings_response_queue
-
+from ... schema import Metadata, Document, TextDocument
 from ... schema import document_ingest_queue, text_ingest_queue
 
+from . serialize import to_subgraph
+from . running import Running
+from . publisher import Publisher
+from . subscriber import Subscriber
+from . endpoint import ServiceEndpoint, MultiResponseServiceEndpoint
+from . text_completion import TextCompletionEndpoint
+from . prompt import PromptEndpoint
+from . graph_rag import GraphRagEndpoint
+from . triples_query import TriplesQueryEndpoint
+from . embeddings import EmbeddingsEndpoint
+from . encyclopedia import EncyclopediaEndpoint
+from . agent import AgentEndpoint
+from . dbpedia import DbpediaEndpoint
+from . internet_search import InternetSearchEndpoint
+from . triples_stream import TriplesStreamEndpoint
+from . graph_embeddings_stream import GraphEmbeddingsStreamEndpoint
+from . triples_load import TriplesLoadEndpoint
+from . graph_embeddings_load import GraphEmbeddingsLoadEndpoint
+
 logger = logging.getLogger("api")
 logger.setLevel(logging.INFO)
 
@@ -72,168 +53,6 @@ default_pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
 default_timeout = 600
 default_port = 8088
 
-def to_value(x):
-    return Value(value=x["v"], is_uri=x["e"])
-
-def to_subgraph(x):
-    return [
-        Triple(
-            s=to_value(t["s"]),
-            p=to_value(t["p"]),
-            o=to_value(t["o"])
-        )
-        for t in x
-    ]
-
-class Running:
-    def __init__(self): self.running = True
-    def get(self): return self.running
-    def stop(self): self.running = False
-
-class Publisher:
-
-    def __init__(self, pulsar_host, topic, schema=None, max_size=10,
-                 chunking_enabled=False):
-        self.pulsar_host = pulsar_host
-        self.topic = topic
-        self.schema = schema
-        self.q = asyncio.Queue(maxsize=max_size)
-        self.chunking_enabled = chunking_enabled
-
-    async def run(self):
-
-        while True:
-
-            try:
-                async with aiopulsar.connect(self.pulsar_host) as client:
-                    async with client.create_producer(
-                            topic=self.topic,
-                            schema=self.schema,
-                            chunking_enabled=self.chunking_enabled,
-                    ) as producer:
-                        while True:
-                            id, item = await self.q.get()
-
-                            if id:
-                                await producer.send(item, { "id": id })
-                            else:
-                                await producer.send(item)
-
-            except Exception as e:
-                print("Exception:", e, flush=True)
-
-            # If handler drops out, sleep a retry
-            await asyncio.sleep(2)
-
-    async def send(self, id, msg):
-        await self.q.put((id, msg))
-
-class Subscriber:
-
-    def __init__(self, pulsar_host, topic, subscription, consumer_name,
-                 schema=None, max_size=10):
-        self.pulsar_host = pulsar_host
-        self.topic = topic
-        self.subscription = subscription
-        self.consumer_name = consumer_name
-        self.schema = schema
-        self.q = {}
-        self.full = {}
-
-    async def run(self):
-        while True:
-            try:
-                async with aiopulsar.connect(self.pulsar_host) as client:
-                    async with client.subscribe(
-                        topic=self.topic,
-                        subscription_name=self.subscription,
-                        consumer_name=self.consumer_name,
-                        schema=self.schema,
-                    ) as consumer:
-                        while True:
-                            msg = await consumer.receive()
-
-                            # Acknowledge successful reception of the message
-                            await consumer.acknowledge(msg)
-
-                            try:
-                                id = msg.properties()["id"]
-                            except:
-                                id = None
-
-                            value = msg.value()
-                            if id in self.q:
-                                await self.q[id].put(value)
-
-                            for q in self.full.values():
-                                await q.put(value)
-
-            except Exception as e:
-                print("Exception:", e, flush=True)
-         
-            # If handler drops out, sleep a retry
-            await asyncio.sleep(2)
-
-    async def subscribe(self, id):
-        q = asyncio.Queue()
-        self.q[id] = q
-        return q
-
-    async def unsubscribe(self, id):
-        if id in self.q:
-            del self.q[id]
-    
-    async def subscribe_all(self, id):
-        q = asyncio.Queue()
-        self.full[id] = q
-        return q
-
-    async def unsubscribe_all(self, id):
-        if id in self.full:
-            del self.full[id]
-
-def serialize_value(v):
-    return {
-        "v": v.value,
-        "e": v.is_uri,
-    }
-
-def serialize_triple(t):
-    return {
-        "s": serialize_value(t.s),
-        "p": serialize_value(t.p),
-        "o": serialize_value(t.o)
-    }
-
-def serialize_subgraph(sg):
-    return [
-        serialize_triple(t)
-        for t in sg
-    ]
-
-def serialize_triples(message):
-    return {
-        "metadata": {
-            "id": message.metadata.id,
-            "metadata": serialize_subgraph(message.metadata.metadata),
-            "user": message.metadata.user,
-            "collection": message.metadata.collection,
-        },
-        "triples": serialize_subgraph(message.triples),
-    }
-    
-def serialize_graph_embeddings(message):
-    return {
-        "metadata": {
-            "id": message.metadata.id,
-            "metadata": serialize_subgraph(message.metadata.metadata),
-            "user": message.metadata.user,
-            "collection": message.metadata.collection,
-        },
-        "vectors": message.vectors,
-        "entity": message.entity,
-    }
-    
 class Api:
 
     def __init__(self, **config):
@@ -247,93 +66,47 @@ class Api:
         self.timeout = int(config.get("timeout", default_timeout))
         self.pulsar_host = config.get("pulsar_host", default_pulsar_host)
 
-        self.llm_out = Publisher(
-            self.pulsar_host, text_completion_request_queue,
-            schema=JsonSchema(TextCompletionRequest)
-        )
-
-        self.llm_in = Subscriber(
-            self.pulsar_host, text_completion_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(TextCompletionResponse)
-        )
-
-        self.prompt_out = Publisher(
-            self.pulsar_host, prompt_request_queue,
-            schema=JsonSchema(PromptRequest)
-        )
-
-        self.prompt_in = Subscriber(
-            self.pulsar_host, prompt_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(PromptResponse)
-        )
-
-        self.graph_rag_out = Publisher(
-            self.pulsar_host, graph_rag_request_queue,
-            schema=JsonSchema(GraphRagQuery)
-        )
-
-        self.graph_rag_in = Subscriber(
-            self.pulsar_host, graph_rag_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(GraphRagResponse)
-        )
-
-        self.triples_query_out = Publisher(
-            self.pulsar_host, triples_request_queue,
-            schema=JsonSchema(TriplesQueryRequest)
-        )
-
-        self.triples_query_in = Subscriber(
-            self.pulsar_host, triples_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(TriplesQueryResponse)
-        )
-
-        self.agent_out = Publisher(
-            self.pulsar_host, agent_request_queue,
-            schema=JsonSchema(AgentRequest)
-        )
-
-        self.agent_in = Subscriber(
-            self.pulsar_host, agent_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(AgentResponse)
-        )
-
-        self.embeddings_out = Publisher(
-            self.pulsar_host, embeddings_request_queue,
-            schema=JsonSchema(EmbeddingsRequest)
-        )
-
-        self.embeddings_in = Subscriber(
-            self.pulsar_host, embeddings_response_queue,
-            "api-gateway", "api-gateway",
-            JsonSchema(EmbeddingsResponse)
-        )
-
-        self.triples_tap = Subscriber(
-            self.pulsar_host, triples_store_queue,
-            "api-gateway", "api-gateway",
-            schema=JsonSchema(Triples)
-        )
-
-        self.triples_pub = Publisher(
-            self.pulsar_host, triples_store_queue,
-            schema=JsonSchema(Triples)
-        )
-
-        self.graph_embeddings_tap = Subscriber(
-            self.pulsar_host, graph_embeddings_store_queue,
-            "api-gateway", "api-gateway",
-            schema=JsonSchema(GraphEmbeddings)
-        )
-
-        self.graph_embeddings_pub = Publisher(
-            self.pulsar_host, graph_embeddings_store_queue,
-            schema=JsonSchema(GraphEmbeddings)
-        )
+        self.endpoints = [
+            TextCompletionEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            PromptEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            GraphRagEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            TriplesQueryEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            EmbeddingsEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            AgentEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            EncyclopediaEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            DbpediaEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            InternetSearchEndpoint(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+            ),
+            TriplesStreamEndpoint(
+                pulsar_host=self.pulsar_host
+            ),
+            GraphEmbeddingsStreamEndpoint(
+                pulsar_host=self.pulsar_host
+            ),
+            TriplesLoadEndpoint(
+                pulsar_host=self.pulsar_host
+            ),
+            GraphEmbeddingsLoadEndpoint(
+                pulsar_host=self.pulsar_host
+            ),
+        ]
 
         self.document_out = Publisher(
             self.pulsar_host, document_ingest_queue,
@@ -347,323 +120,14 @@ class Api:
             chunking_enabled=True,
         )
 
+        for ep in self.endpoints:
+            ep.add_routes(self.app)
+
         self.app.add_routes([
-            web.post("/api/v1/text-completion", self.llm),
-            web.post("/api/v1/prompt", self.prompt),
-            web.post("/api/v1/graph-rag", self.graph_rag),
-            web.post("/api/v1/triples-query", self.triples_query),
-            web.post("/api/v1/agent", self.agent),
-            web.post("/api/v1/embeddings", self.embeddings),
             web.post("/api/v1/load/document", self.load_document),
             web.post("/api/v1/load/text", self.load_text),
-            web.get("/api/v1/ws", self.socket),
-
-            web.get("/api/v1/stream/triples", self.stream_triples),
-            web.get(
-                "/api/v1/stream/graph-embeddings",
-                self.stream_graph_embeddings
-            ),
-
-            web.get("/api/v1/load/triples", self.load_triples),
-            web.get(
-                "/api/v1/load/graph-embeddings",
-                self.load_graph_embeddings
-            ),
-
         ])
 
-    async def llm(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.llm_in.subscribe(id)
-
-            await self.llm_out.send(
-                id,
-                TextCompletionRequest(
-                    system=data["system"],
-                    prompt=data["prompt"]
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "response": resp.response }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.llm_in.unsubscribe(id)
-
-    async def prompt(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.prompt_in.subscribe(id)
-
-            terms = {
-                k: json.dumps(v)
-                for k, v in data["variables"].items()
-            }
-
-            await self.prompt_out.send(
-                id,
-                PromptRequest(
-                    id=data["id"],
-                    terms=terms
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            if resp.object:
-                return web.json_response(
-                    { "object": resp.object }
-                )
-
-            return web.json_response(
-                { "text": resp.text }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.prompt_in.unsubscribe(id)
-
-    async def graph_rag(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.graph_rag_in.subscribe(id)
-
-            await self.graph_rag_out.send(
-                id,
-                GraphRagQuery(
-                    query=data["query"],
-                    user=data.get("user", "trustgraph"),
-                    collection=data.get("collection", "default"),
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "response": resp.response }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.graph_rag_in.unsubscribe(id)
-
-    async def triples_query(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.triples_query_in.subscribe(id)
-
-            if "s" in data:
-                s = to_value(data["s"])
-            else:
-                s = None
-
-            if "p" in data:
-                p = to_value(data["p"])
-            else:
-                p = None
-
-            if "o" in data:
-                o = to_value(data["o"])
-            else:
-                o = None
-
-            limit = int(data.get("limit", 10000))
-
-            await self.triples_query_out.send(
-                id,
-                TriplesQueryRequest(
-                    s = s, p = p, o = o,
-                    limit = limit,
-                    user = data.get("user", "trustgraph"),
-                    collection = data.get("collection", "default"),
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                {
-                    "response": serialize_subgraph(resp.triples),
-                }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.graph_rag_in.unsubscribe(id)
-
-    async def agent(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.agent_in.subscribe(id)
-
-            await self.agent_out.send(
-                id,
-                AgentRequest(
-                    question=data["question"],
-                )
-            )
-
-            while True:
-                try:
-                    resp = await asyncio.wait_for(q.get(), self.timeout)
-                except:
-                    raise RuntimeError("Timeout waiting for response")
-
-                if resp.error:
-                    return web.json_response(
-                        { "error": resp.error.message }
-                    )
-
-                if resp.answer: break
-
-                if resp.thought: print("thought:", resp.thought)
-                if resp.observation: print("observation:", resp.observation)
-
-            if resp.answer:
-                return web.json_response(
-                    { "answer": resp.answer }
-                )
-
-            # Can't happen, ook at the logic
-            raise RuntimeError("Strange state")
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.agent_in.unsubscribe(id)
-
-    async def embeddings(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = await self.embeddings_in.subscribe(id)
-
-            await self.embeddings_out.send(
-                id,
-                EmbeddingsRequest(
-                    text=data["text"],
-                )
-            )
-
-            try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
-
-            if resp.error:
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                { "vectors": resp.vectors }
-            )
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            await self.embeddings_in.unsubscribe(id)
-
     async def load_document(self, request):
 
         try:
@@ -750,215 +214,12 @@ class Api:
                 { "error": str(e) }
             )
 
-    async def socket(self, request):
-
-        ws = web.WebSocketResponse()
-        await ws.prepare(request)
-
-        async for msg in ws:
-            if msg.type == WSMsgType.TEXT:
-                if msg.data == 'close':
-                    await ws.close()
-                else:
-                    await ws.send_str(msg.data + '/answer')
-            elif msg.type == WSMsgType.ERROR:
-                print('ws connection closed with exception %s' %
-                      ws.exception())
-
-        print('websocket connection closed')
-
-        return ws
-
-    async def stream(self, q, ws, running, fn):
-
-        while running.get():
-            try:
-                resp = await asyncio.wait_for(q.get(), 0.5)
-                await ws.send_json(fn(resp))
-
-            except TimeoutError:
-                continue
-
-            except Exception as e:
-                print(f"Exception: {str(e)}", flush=True)
-
-    async def stream_triples(self, request):
-
-        id = str(uuid.uuid4())
-
-        q = await self.triples_tap.subscribe_all(id)
-        running = Running()
-
-        ws = web.WebSocketResponse()
-        await ws.prepare(request)
-
-        tsk = asyncio.create_task(self.stream(
-            q,
-            ws,
-            running,
-            serialize_triples,
-        ))
-
-        async for msg in ws:
-            if msg.type == WSMsgType.ERROR:
-                break
-            else:
-                # Ignore incoming messages
-                pass
-
-        running.stop()
-
-        await self.triples_tap.unsubscribe_all(id)
-        await tsk
-
-        return ws
-
-    async def stream_graph_embeddings(self, request):
-
-        id = str(uuid.uuid4())
-
-        q = await self.graph_embeddings_tap.subscribe_all(id)
-        running = Running()
-
-        ws = web.WebSocketResponse()
-        await ws.prepare(request)
-
-        tsk = asyncio.create_task(self.stream(
-            q,
-            ws,
-            running,
-            serialize_graph_embeddings,
-        ))
-
-        async for msg in ws:
-            if msg.type == WSMsgType.ERROR:
-                break
-            else:
-                # Ignore incoming messages
-                pass
-
-        running.stop()
-
-        await self.graph_embeddings_tap.unsubscribe_all(id)
-        await tsk
-
-        return ws
-
-    async def load_triples(self, request):
-
-        ws = web.WebSocketResponse()
-        await ws.prepare(request)
-
-        async for msg in ws:
-
-            try:
-
-                if msg.type == WSMsgType.TEXT:
-
-                    data = msg.json()
-
-                    elt = Triples(
-                        metadata=Metadata(
-                            id=data["metadata"]["id"],
-                            metadata=to_subgraph(data["metadata"]["metadata"]),
-                            user=data["metadata"]["user"],
-                            collection=data["metadata"]["collection"],
-                        ),
-                        triples=to_subgraph(data["triples"]),
-                    )
-
-                    await self.triples_pub.send(None, elt)
-
-                elif msg.type == WSMsgType.ERROR:
-                    break
-
-            except Exception as e:
-
-                print("Exception:", e)
-
-        return ws
-
-    async def load_graph_embeddings(self, request):
-
-        ws = web.WebSocketResponse()
-        await ws.prepare(request)
-
-        async for msg in ws:
-
-            try:
-
-                if msg.type == WSMsgType.TEXT:
-
-                    data = msg.json()
-
-                    elt = GraphEmbeddings(
-                        metadata=Metadata(
-                            id=data["metadata"]["id"],
-                            metadata=to_subgraph(data["metadata"]["metadata"]),
-                            user=data["metadata"]["user"],
-                            collection=data["metadata"]["collection"],
-                        ),
-                        entity=to_value(data["entity"]),
-                        vectors=data["vectors"],
-                    )
-
-                    await self.graph_embeddings_pub.send(None, elt)
-
-                elif msg.type == WSMsgType.ERROR:
-                    break
-
-            except Exception as e:
-
-                print("Exception:", e)
-
-        return ws
-
     async def app_factory(self):
 
-        self.llm_pub_task = asyncio.create_task(self.llm_in.run())
-        self.llm_sub_task = asyncio.create_task(self.llm_out.run())
-
-        self.prompt_pub_task = asyncio.create_task(self.prompt_in.run())
-        self.prompt_sub_task = asyncio.create_task(self.prompt_out.run())
-
-        self.graph_rag_pub_task = asyncio.create_task(self.graph_rag_in.run())
-        self.graph_rag_sub_task = asyncio.create_task(self.graph_rag_out.run())
-
-        self.triples_query_pub_task = asyncio.create_task(
-            self.triples_query_in.run()
-        )
-        self.triples_query_sub_task = asyncio.create_task(
-            self.triples_query_out.run()
-        )
-
-        self.agent_pub_task = asyncio.create_task(self.agent_in.run())
-        self.agent_sub_task = asyncio.create_task(self.agent_out.run())
-
-        self.embeddings_pub_task = asyncio.create_task(
-            self.embeddings_in.run()
-        )
-        self.embeddings_sub_task = asyncio.create_task(
-            self.embeddings_out.run()
-        )
-
-        self.triples_tap_task = asyncio.create_task(
-            self.triples_tap.run()
-        )
-
-        self.triples_pub_task = asyncio.create_task(
-            self.triples_pub.run()
-        )
-
-        self.graph_embeddings_tap_task = asyncio.create_task(
-            self.graph_embeddings_tap.run()
-        )
-
-        self.graph_embeddings_pub_task = asyncio.create_task(
-            self.graph_embeddings_pub.run()
-        )
+        for ep in self.endpoints:
+            await ep.start()
 
         self.doc_ingest_pub_task = asyncio.create_task(self.document_out.run())
-
         self.text_ingest_pub_task = asyncio.create_task(self.text_out.run())
 
         return self.app
diff --git a/trustgraph-flow/trustgraph/api/gateway/socket.py b/trustgraph-flow/trustgraph/api/gateway/socket.py
new file mode 100644
index 00000000..235bfd21
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/socket.py
@@ -0,0 +1,68 @@
+
+import asyncio
+from aiohttp import web, WSMsgType
+import logging
+
+from . running import Running
+
+logger = logging.getLogger("socket")
+logger.setLevel(logging.INFO)
+
+class SocketEndpoint:
+
+    def __init__(
+            self,
+            endpoint_path="/api/v1/socket",
+    ):
+
+        self.path = endpoint_path
+
+    async def listener(self, ws, running):
+        
+        async for msg in ws:
+            # On error, finish
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+                # Ignore incoming messages
+                pass
+
+        running.stop()
+
+    async def async_thread(self, ws, running):
+
+        while running.get():
+            try:
+                await asyncio.sleep(1)
+
+            except TimeoutError:
+                continue
+
+            except Exception as e:
+                print(f"Exception: {str(e)}", flush=True)
+        
+    async def handle(self, request):
+
+        running = Running()
+        ws = web.WebSocketResponse()
+        await ws.prepare(request)
+
+        task = asyncio.create_task(self.async_thread(ws, running))
+
+        await self.listener(ws, running)
+
+        await task
+
+        running.stop()
+
+        return ws
+
+    async def start(self):
+        pass
+
+    def add_routes(self, app):
+
+        app.add_routes([
+            web.get(self.path, self.handle),
+        ])
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/subscriber.py b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
new file mode 100644
index 00000000..3d8840f6
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
@@ -0,0 +1,68 @@
+
+import asyncio
+import aiopulsar
+
+class Subscriber:
+
+    def __init__(self, pulsar_host, topic, subscription, consumer_name,
+                 schema=None, max_size=10):
+        self.pulsar_host = pulsar_host
+        self.topic = topic
+        self.subscription = subscription
+        self.consumer_name = consumer_name
+        self.schema = schema
+        self.q = {}
+        self.full = {}
+
+    async def run(self):
+        while True:
+            try:
+                async with aiopulsar.connect(self.pulsar_host) as client:
+                    async with client.subscribe(
+                        topic=self.topic,
+                        subscription_name=self.subscription,
+                        consumer_name=self.consumer_name,
+                        schema=self.schema,
+                    ) as consumer:
+                        while True:
+                            msg = await consumer.receive()
+
+                            # Acknowledge successful reception of the message
+                            await consumer.acknowledge(msg)
+
+                            try:
+                                id = msg.properties()["id"]
+                            except:
+                                id = None
+
+                            value = msg.value()
+                            if id in self.q:
+                                await self.q[id].put(value)
+
+                            for q in self.full.values():
+                                await q.put(value)
+
+            except Exception as e:
+                print("Exception:", e, flush=True)
+         
+            # If handler drops out, sleep a retry
+            await asyncio.sleep(2)
+
+    async def subscribe(self, id):
+        q = asyncio.Queue()
+        self.q[id] = q
+        return q
+
+    async def unsubscribe(self, id):
+        if id in self.q:
+            del self.q[id]
+    
+    async def subscribe_all(self, id):
+        q = asyncio.Queue()
+        self.full[id] = q
+        return q
+
+    async def unsubscribe_all(self, id):
+        if id in self.full:
+            del self.full[id]
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/text_completion.py b/trustgraph-flow/trustgraph/api/gateway/text_completion.py
new file mode 100644
index 00000000..04dbc9c8
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/text_completion.py
@@ -0,0 +1,28 @@
+
+from ... schema import TextCompletionRequest, TextCompletionResponse
+from ... schema import text_completion_request_queue
+from ... schema import text_completion_response_queue
+
+from . endpoint import ServiceEndpoint
+
+class TextCompletionEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(TextCompletionEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=text_completion_request_queue,
+            response_queue=text_completion_response_queue,
+            request_schema=TextCompletionRequest,
+            response_schema=TextCompletionResponse,
+            endpoint_path="/api/v1/text-completion",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+        return TextCompletionRequest(
+            system=body["system"],
+            prompt=body["prompt"]
+        )
+
+    def from_response(self, message):
+        return { "response": message.response }
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_load.py b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
new file mode 100644
index 00000000..d835a363
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
@@ -0,0 +1,59 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+import uuid
+from aiohttp import WSMsgType
+
+from ... schema import Metadata
+from ... schema import Triples
+from ... schema import triples_store_queue
+
+from . publisher import Publisher
+from . socket import SocketEndpoint
+from . serialize import to_subgraph
+
+class TriplesLoadEndpoint(SocketEndpoint):
+
+    def __init__(self, pulsar_host, path="/api/v1/load/triples"):
+
+        super(TriplesLoadEndpoint, self).__init__(
+            endpoint_path=path
+        )
+
+        self.pulsar_host=pulsar_host
+
+        self.publisher = Publisher(
+            self.pulsar_host, triples_store_queue,
+            schema=JsonSchema(Triples)
+        )
+
+    async def start(self):
+
+        self.task = asyncio.create_task(
+            self.publisher.run()
+        )
+
+    async def listener(self, ws, running):
+        
+        async for msg in ws:
+            # On error, finish
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+
+                data = msg.json()
+
+                elt = Triples(
+                    metadata=Metadata(
+                        id=data["metadata"]["id"],
+                        metadata=to_subgraph(data["metadata"]["metadata"]),
+                        user=data["metadata"]["user"],
+                        collection=data["metadata"]["collection"],
+                    ),
+                    triples=to_subgraph(data["triples"]),
+                )
+
+                await self.publisher.send(None, elt)
+
+
+        running.stop()
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_query.py b/trustgraph-flow/trustgraph/api/gateway/triples_query.py
new file mode 100644
index 00000000..8b4192d8
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_query.py
@@ -0,0 +1,53 @@
+
+from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
+from ... schema import triples_request_queue
+from ... schema import triples_response_queue
+
+from . endpoint import ServiceEndpoint
+from . serialize import to_value, serialize_subgraph
+
+class TriplesQueryEndpoint(ServiceEndpoint):
+    def __init__(self, pulsar_host, timeout):
+
+        super(TriplesQueryEndpoint, self).__init__(
+            pulsar_host=pulsar_host,
+            request_queue=triples_request_queue,
+            response_queue=triples_response_queue,
+            request_schema=TriplesQueryRequest,
+            response_schema=TriplesQueryResponse,
+            endpoint_path="/api/v1/triples-query",
+            timeout=timeout,
+        )
+
+    def to_request(self, body):
+
+        if "s" in body:
+            s = to_value(body["s"])
+        else:
+            s = None
+
+        if "p" in body:
+            p = to_value(body["p"])
+        else:
+            p = None
+
+        if "o" in body:
+            o = to_value(body["o"])
+        else:
+            o = None
+
+        limit = int(body.get("limit", 10000))
+
+        return TriplesQueryRequest(
+            s = s, p = p, o = o,
+            limit = limit,
+            user = body.get("user", "trustgraph"),
+            collection = body.get("collection", "default"),
+        )
+
+    def from_response(self, message):
+        print(message)
+        return {
+            "response": serialize_subgraph(message.triples)
+        }
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
new file mode 100644
index 00000000..e8b538a4
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
@@ -0,0 +1,56 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+import uuid
+
+from ... schema import Triples
+from ... schema import triples_store_queue
+
+from . subscriber import Subscriber
+from . socket import SocketEndpoint
+from . serialize import serialize_triples
+
+class TriplesStreamEndpoint(SocketEndpoint):
+
+    def __init__(self, pulsar_host, path="/api/v1/stream/triples"):
+
+        super(TriplesStreamEndpoint, self).__init__(
+            endpoint_path=path
+        )
+
+        self.pulsar_host=pulsar_host
+
+        self.subscriber = Subscriber(
+            self.pulsar_host, triples_store_queue,
+            "api-gateway", "api-gateway",
+            schema=JsonSchema(Triples)
+        )
+
+    async def start(self):
+
+        self.task = asyncio.create_task(
+            self.subscriber.run()
+        )
+
+    async def async_thread(self, ws, running):
+
+        id = str(uuid.uuid4())
+
+        q = await self.subscriber.subscribe_all(id)
+
+        while running.get():
+            try:
+                resp = await asyncio.wait_for(q.get(), 0.5)
+                await ws.send_json(serialize_triples(resp))
+
+            except TimeoutError:
+                continue
+
+            except Exception as e:
+                print(f"Exception: {str(e)}", flush=True)
+                break
+
+        await self.subscriber.unsubscribe_all(id)
+
+        running.stop()
+
diff --git a/trustgraph-flow/trustgraph/external/__init__.py b/trustgraph-flow/trustgraph/external/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/trustgraph-flow/trustgraph/external/wikipedia/__init__.py b/trustgraph-flow/trustgraph/external/wikipedia/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/external/wikipedia/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/external/wikipedia/__main__.py b/trustgraph-flow/trustgraph/external/wikipedia/__main__.py
new file mode 100644
index 00000000..e9136855
--- /dev/null
+++ b/trustgraph-flow/trustgraph/external/wikipedia/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . service import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/external/wikipedia/service.py b/trustgraph-flow/trustgraph/external/wikipedia/service.py
new file mode 100644
index 00000000..932e1213
--- /dev/null
+++ b/trustgraph-flow/trustgraph/external/wikipedia/service.py
@@ -0,0 +1,102 @@
+
+"""
+Wikipedia lookup service.  Fetchs an extract from the Wikipedia page
+using the API.
+"""
+
+from trustgraph.schema import LookupRequest, LookupResponse, Error
+from trustgraph.schema import encyclopedia_lookup_request_queue
+from trustgraph.schema import encyclopedia_lookup_response_queue
+from trustgraph.log_level import LogLevel
+from trustgraph.base import ConsumerProducer
+import requests
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = encyclopedia_lookup_request_queue
+default_output_queue = encyclopedia_lookup_response_queue
+default_subscriber = module
+default_url="https://en.wikipedia.org/"
+
+class Processor(ConsumerProducer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        output_queue = params.get("output_queue", default_output_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+        url = params.get("url", default_url)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "output_queue": output_queue,
+                "subscriber": subscriber,
+                "input_schema": LookupRequest,
+                "output_schema": LookupResponse,
+            }
+        )
+
+        self.url = url
+
+    def handle(self, msg):
+
+        v = msg.value()
+
+        # Sender-produced ID
+        id = msg.properties()["id"]
+
+        print(f"Handling {v.kind} / {v.term}...", flush=True)
+
+        try:
+
+            url = f"{self.url}/api/rest_v1/page/summary/{v.term}"
+
+            resp = Result = requests.get(url).json()
+            resp = resp["extract"]
+
+            r = LookupResponse(
+                error=None,
+                text=resp
+            )
+
+            self.producer.send(r, properties={"id": id})
+
+            self.consumer.acknowledge(msg)
+
+            return
+
+        except Exception as e:
+                
+            r = LookupResponse(
+                error=Error(
+                    type = "lookup-error",
+                    message = str(e),
+                ),
+                text=None,
+            )
+            self.producer.send(r, properties={"id": id})
+
+            self.consumer.acknowledge(msg)
+
+            return
+            
+
+    @staticmethod
+    def add_args(parser):
+
+        ConsumerProducer.add_args(
+            parser, default_input_queue, default_subscriber,
+            default_output_queue,
+        )
+
+        parser.add_argument(
+            '-u', '--url',
+            default=default_url,
+            help=f'LLM model (default: {default_url})'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+

From 1b9c6be4fc3175e90c11719d820ddc3b146cd33c Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 2 Dec 2024 19:57:21 +0000
Subject: [PATCH 20/37] Feature/gateway auth (#186)

* Added auth module, just a simple token at this stage
* Pass auth token GATEWAY_SECRET through
* Auth token not mandatory, can be provided in env var
---
 templates/components/trustgraph.jsonnet       |  5 +++
 .../trustgraph/api/gateway/agent.py           |  3 +-
 .../trustgraph/api/gateway/auth.py            | 22 +++++++++++
 .../trustgraph/api/gateway/dbpedia.py         |  3 +-
 .../trustgraph/api/gateway/embeddings.py      |  3 +-
 .../trustgraph/api/gateway/encyclopedia.py    |  3 +-
 .../trustgraph/api/gateway/endpoint.py        | 24 ++++++++----
 .../api/gateway/graph_embeddings_load.py      |  6 ++-
 .../api/gateway/graph_embeddings_stream.py    |  6 ++-
 .../trustgraph/api/gateway/graph_rag.py       |  3 +-
 .../trustgraph/api/gateway/internet_search.py |  3 +-
 .../trustgraph/api/gateway/prompt.py          |  3 +-
 .../trustgraph/api/gateway/service.py         | 37 +++++++++++++++++--
 .../trustgraph/api/gateway/socket.py          | 24 ++++++++++--
 .../trustgraph/api/gateway/text_completion.py |  3 +-
 .../trustgraph/api/gateway/triples_load.py    |  4 +-
 .../trustgraph/api/gateway/triples_query.py   |  3 +-
 .../trustgraph/api/gateway/triples_stream.py  |  4 +-
 18 files changed, 126 insertions(+), 33 deletions(-)
 create mode 100644 trustgraph-flow/trustgraph/api/gateway/auth.py

diff --git a/templates/components/trustgraph.jsonnet b/templates/components/trustgraph.jsonnet
index 6c60921c..31ae420e 100644
--- a/templates/components/trustgraph.jsonnet
+++ b/templates/components/trustgraph.jsonnet
@@ -15,6 +15,9 @@ local prompt = import "prompt-template.jsonnet";
     
         create:: function(engine)
 
+            local envSecrets = engine.envSecrets("gateway-secret")
+                .with_env_var("GATEWAY_SECRET", "gateway-secret");
+
             local port = $["api-gateway-port"];
 
             local container =
@@ -29,6 +32,7 @@ local prompt = import "prompt-template.jsonnet";
                         "--port",
                         std.toString(port),
                     ])
+                    .with_env_var_secrets(envSecrets)
                     .with_limits("0.5", "256M")
                     .with_reservations("0.1", "256M")
                     .with_port(8000, 8000, "metrics")
@@ -44,6 +48,7 @@ local prompt = import "prompt-template.jsonnet";
                 .with_port(port, port, "api");
 
             engine.resources([
+                envSecrets,
                 containerSet,
                 service,
             ])
diff --git a/trustgraph-flow/trustgraph/api/gateway/agent.py b/trustgraph-flow/trustgraph/api/gateway/agent.py
index 28a1e185..40586133 100644
--- a/trustgraph-flow/trustgraph/api/gateway/agent.py
+++ b/trustgraph-flow/trustgraph/api/gateway/agent.py
@@ -6,7 +6,7 @@ from ... schema import agent_response_queue
 from . endpoint import MultiResponseServiceEndpoint
 
 class AgentEndpoint(MultiResponseServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(AgentEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class AgentEndpoint(MultiResponseServiceEndpoint):
             response_schema=AgentResponse,
             endpoint_path="/api/v1/agent",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/auth.py b/trustgraph-flow/trustgraph/api/gateway/auth.py
new file mode 100644
index 00000000..a693ca32
--- /dev/null
+++ b/trustgraph-flow/trustgraph/api/gateway/auth.py
@@ -0,0 +1,22 @@
+
+class Authenticator:
+
+    def __init__(self, token=None, allow_all=False):
+
+        if not allow_all and token is None:
+            raise RuntimeError("Need a token")
+
+        if not allow_all and token == "":
+            raise RuntimeError("Need a token")
+
+        self.token = token
+        self.allow_all = allow_all
+
+    def permitted(self, token, roles):
+
+        if self.allow_all: return True
+
+        if self.token != token: return False
+
+        return True
+
diff --git a/trustgraph-flow/trustgraph/api/gateway/dbpedia.py b/trustgraph-flow/trustgraph/api/gateway/dbpedia.py
index 0ccb3d6b..4fa7336b 100644
--- a/trustgraph-flow/trustgraph/api/gateway/dbpedia.py
+++ b/trustgraph-flow/trustgraph/api/gateway/dbpedia.py
@@ -6,7 +6,7 @@ from ... schema import dbpedia_lookup_response_queue
 from . endpoint import ServiceEndpoint
 
 class DbpediaEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(DbpediaEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class DbpediaEndpoint(ServiceEndpoint):
             response_schema=LookupResponse,
             endpoint_path="/api/v1/dbpedia",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/embeddings.py b/trustgraph-flow/trustgraph/api/gateway/embeddings.py
index b5fcc0a4..7c4b578d 100644
--- a/trustgraph-flow/trustgraph/api/gateway/embeddings.py
+++ b/trustgraph-flow/trustgraph/api/gateway/embeddings.py
@@ -6,7 +6,7 @@ from ... schema import embeddings_response_queue
 from . endpoint import ServiceEndpoint
 
 class EmbeddingsEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(EmbeddingsEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class EmbeddingsEndpoint(ServiceEndpoint):
             response_schema=EmbeddingsResponse,
             endpoint_path="/api/v1/embeddings",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
index e379d7d4..c6041cb2 100644
--- a/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
+++ b/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
@@ -6,7 +6,7 @@ from ... schema import encyclopedia_lookup_response_queue
 from . endpoint import ServiceEndpoint
 
 class EncyclopediaEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(EncyclopediaEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class EncyclopediaEndpoint(ServiceEndpoint):
             response_schema=LookupResponse,
             endpoint_path="/api/v1/encyclopedia",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
index 075e4a0e..af7a5070 100644
--- a/trustgraph-flow/trustgraph/api/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
@@ -19,6 +19,7 @@ class ServiceEndpoint:
             request_queue, request_schema,
             response_queue, response_schema,
             endpoint_path,
+            auth,
             subscription="api-gateway", consumer_name="api-gateway",
             timeout=600,
     ):
@@ -36,6 +37,9 @@ class ServiceEndpoint:
 
         self.path = endpoint_path
         self.timeout = timeout
+        self.auth = auth
+
+        self.operation = "service"
 
     async def start(self):
 
@@ -58,14 +62,24 @@ class ServiceEndpoint:
 
         id = str(uuid.uuid4())
 
+        try:
+            ht = request.headers["Authorization"]
+            tokens = ht.split(" ", 2)
+            if tokens[0] != "Bearer":
+                return web.HTTPUnauthorized()
+            token = tokens[1]
+        except:
+            token = ""
+
+        if not self.auth.permitted(token, self.operation):
+            return web.HTTPUnauthorized()
+
         try:
 
             data = await request.json()
 
             q = await self.sub.subscribe(id)
 
-            print(data)
-
             await self.pub.send(
                 id,
                 self.to_request(data),
@@ -76,8 +90,6 @@ class ServiceEndpoint:
             except:
                 raise RuntimeError("Timeout waiting for response")
 
-            print(resp)
-
             if resp.error:
                 return web.json_response(
                     { "error": resp.error.message }
@@ -110,8 +122,6 @@ class MultiResponseServiceEndpoint(ServiceEndpoint):
 
             q = await self.sub.subscribe(id)
 
-            print(data)
-
             await self.pub.send(
                 id,
                 self.to_request(data),
@@ -126,8 +136,6 @@ class MultiResponseServiceEndpoint(ServiceEndpoint):
                 except:
                     raise RuntimeError("Timeout waiting for response")
 
-                print(resp)
-
                 if resp.error:
                     return web.json_response(
                         { "error": resp.error.message }
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
index 3cc3f533..15efdf5b 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
@@ -14,10 +14,12 @@ from . serialize import to_subgraph, to_value
 
 class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
 
-    def __init__(self, pulsar_host, path="/api/v1/load/graph-embeddings"):
+    def __init__(
+            self, pulsar_host, auth, path="/api/v1/load/graph-embeddings",
+    ):
 
         super(GraphEmbeddingsLoadEndpoint, self).__init__(
-            endpoint_path=path
+            endpoint_path=path, auth=auth,
         )
 
         self.pulsar_host=pulsar_host
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
index 978684cf..7f3e5e18 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
@@ -12,10 +12,12 @@ from . serialize import serialize_graph_embeddings
 
 class GraphEmbeddingsStreamEndpoint(SocketEndpoint):
 
-    def __init__(self, pulsar_host, path="/api/v1/stream/graph-embeddings"):
+    def __init__(
+            self, pulsar_host, auth, path="/api/v1/stream/graph-embeddings"
+    ):
 
         super(GraphEmbeddingsStreamEndpoint, self).__init__(
-            endpoint_path=path
+            endpoint_path=path, auth=auth,
         )
 
         self.pulsar_host=pulsar_host
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_rag.py b/trustgraph-flow/trustgraph/api/gateway/graph_rag.py
index 1381dc23..d33090ca 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_rag.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_rag.py
@@ -6,7 +6,7 @@ from ... schema import graph_rag_response_queue
 from . endpoint import ServiceEndpoint
 
 class GraphRagEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(GraphRagEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class GraphRagEndpoint(ServiceEndpoint):
             response_schema=GraphRagResponse,
             endpoint_path="/api/v1/graph-rag",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/internet_search.py b/trustgraph-flow/trustgraph/api/gateway/internet_search.py
index c84ed82a..f55a4a3e 100644
--- a/trustgraph-flow/trustgraph/api/gateway/internet_search.py
+++ b/trustgraph-flow/trustgraph/api/gateway/internet_search.py
@@ -6,7 +6,7 @@ from ... schema import internet_search_response_queue
 from . endpoint import ServiceEndpoint
 
 class InternetSearchEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(InternetSearchEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class InternetSearchEndpoint(ServiceEndpoint):
             response_schema=LookupResponse,
             endpoint_path="/api/v1/internet-search",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/prompt.py b/trustgraph-flow/trustgraph/api/gateway/prompt.py
index e02effb9..d19005bc 100644
--- a/trustgraph-flow/trustgraph/api/gateway/prompt.py
+++ b/trustgraph-flow/trustgraph/api/gateway/prompt.py
@@ -8,7 +8,7 @@ from ... schema import prompt_response_queue
 from . endpoint import ServiceEndpoint
 
 class PromptEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(PromptEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -18,6 +18,7 @@ class PromptEndpoint(ServiceEndpoint):
             response_schema=PromptResponse,
             endpoint_path="/api/v1/prompt",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index dcdd9779..a25dd9dc 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -45,6 +45,7 @@ from . triples_stream import TriplesStreamEndpoint
 from . graph_embeddings_stream import GraphEmbeddingsStreamEndpoint
 from . triples_load import TriplesLoadEndpoint
 from . graph_embeddings_load import GraphEmbeddingsLoadEndpoint
+from . auth import Authenticator
 
 logger = logging.getLogger("api")
 logger.setLevel(logging.INFO)
@@ -52,6 +53,7 @@ logger.setLevel(logging.INFO)
 default_pulsar_host = os.getenv("PULSAR_HOST", "pulsar://pulsar:6650")
 default_timeout = 600
 default_port = 8088
+default_api_token = os.getenv("GATEWAY_SECRET", "")
 
 class Api:
 
@@ -66,45 +68,66 @@ class Api:
         self.timeout = int(config.get("timeout", default_timeout))
         self.pulsar_host = config.get("pulsar_host", default_pulsar_host)
 
+        api_token = config.get("api_token", default_api_token)
+
+        # Token not set, or token equal empty string means no auth
+        if api_token:
+            self.auth = Authenticator(token=api_token)
+        else:
+            self.auth = Authenticator(allow_all=True)
+
         self.endpoints = [
             TextCompletionEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             PromptEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             GraphRagEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             TriplesQueryEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             EmbeddingsEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             AgentEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             EncyclopediaEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             DbpediaEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             InternetSearchEndpoint(
                 pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
             ),
             TriplesStreamEndpoint(
-                pulsar_host=self.pulsar_host
+                pulsar_host=self.pulsar_host,
+                auth = self.auth,
             ),
             GraphEmbeddingsStreamEndpoint(
-                pulsar_host=self.pulsar_host
+                pulsar_host=self.pulsar_host,
+                auth = self.auth,
             ),
             TriplesLoadEndpoint(
-                pulsar_host=self.pulsar_host
+                pulsar_host=self.pulsar_host,
+                auth = self.auth,
             ),
             GraphEmbeddingsLoadEndpoint(
-                pulsar_host=self.pulsar_host
+                pulsar_host=self.pulsar_host,
+                auth = self.auth,
             ),
         ]
 
@@ -254,6 +277,12 @@ def run():
         help=f'API request timeout in seconds (default: {default_timeout})',
     )
 
+    parser.add_argument(
+        '--api-token',
+        default=default_api_token,
+        help=f'Secret API token (default: no auth)',
+    )
+
     parser.add_argument(
         '-l', '--log-level',
         type=LogLevel,
diff --git a/trustgraph-flow/trustgraph/api/gateway/socket.py b/trustgraph-flow/trustgraph/api/gateway/socket.py
index 235bfd21..869792b7 100644
--- a/trustgraph-flow/trustgraph/api/gateway/socket.py
+++ b/trustgraph-flow/trustgraph/api/gateway/socket.py
@@ -11,11 +11,12 @@ logger.setLevel(logging.INFO)
 class SocketEndpoint:
 
     def __init__(
-            self,
-            endpoint_path="/api/v1/socket",
+            self, endpoint_path, auth,
     ):
 
         self.path = endpoint_path
+        self.auth = auth
+        self.operation = "socket"
 
     async def listener(self, ws, running):
         
@@ -43,18 +44,33 @@ class SocketEndpoint:
         
     async def handle(self, request):
 
+        try:
+            token = request.query['token']
+        except:
+            token = ""
+
+        if not self.auth.permitted(token, self.operation):
+            return web.HTTPUnauthorized()
+        
         running = Running()
         ws = web.WebSocketResponse()
         await ws.prepare(request)
 
         task = asyncio.create_task(self.async_thread(ws, running))
 
-        await self.listener(ws, running)
+        try:
 
-        await task
+            await self.listener(ws, running)
+
+        except Exception as e:
+            print(e, flush=True)
 
         running.stop()
 
+        await ws.close()
+
+        await task
+
         return ws
 
     async def start(self):
diff --git a/trustgraph-flow/trustgraph/api/gateway/text_completion.py b/trustgraph-flow/trustgraph/api/gateway/text_completion.py
index 04dbc9c8..d9f69b7e 100644
--- a/trustgraph-flow/trustgraph/api/gateway/text_completion.py
+++ b/trustgraph-flow/trustgraph/api/gateway/text_completion.py
@@ -6,7 +6,7 @@ from ... schema import text_completion_response_queue
 from . endpoint import ServiceEndpoint
 
 class TextCompletionEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(TextCompletionEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -16,6 +16,7 @@ class TextCompletionEndpoint(ServiceEndpoint):
             response_schema=TextCompletionResponse,
             endpoint_path="/api/v1/text-completion",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_load.py b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
index d835a363..7f4561b1 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
@@ -14,10 +14,10 @@ from . serialize import to_subgraph
 
 class TriplesLoadEndpoint(SocketEndpoint):
 
-    def __init__(self, pulsar_host, path="/api/v1/load/triples"):
+    def __init__(self, pulsar_host, auth, path="/api/v1/load/triples"):
 
         super(TriplesLoadEndpoint, self).__init__(
-            endpoint_path=path
+            endpoint_path=path, auth=auth,
         )
 
         self.pulsar_host=pulsar_host
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_query.py b/trustgraph-flow/trustgraph/api/gateway/triples_query.py
index 8b4192d8..9c5939c8 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_query.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_query.py
@@ -7,7 +7,7 @@ from . endpoint import ServiceEndpoint
 from . serialize import to_value, serialize_subgraph
 
 class TriplesQueryEndpoint(ServiceEndpoint):
-    def __init__(self, pulsar_host, timeout):
+    def __init__(self, pulsar_host, timeout, auth):
 
         super(TriplesQueryEndpoint, self).__init__(
             pulsar_host=pulsar_host,
@@ -17,6 +17,7 @@ class TriplesQueryEndpoint(ServiceEndpoint):
             response_schema=TriplesQueryResponse,
             endpoint_path="/api/v1/triples-query",
             timeout=timeout,
+            auth=auth,
         )
 
     def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
index e8b538a4..6ecd2bdb 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
@@ -12,10 +12,10 @@ from . serialize import serialize_triples
 
 class TriplesStreamEndpoint(SocketEndpoint):
 
-    def __init__(self, pulsar_host, path="/api/v1/stream/triples"):
+    def __init__(self, pulsar_host, auth, path="/api/v1/stream/triples"):
 
         super(TriplesStreamEndpoint, self).__init__(
-            endpoint_path=path
+            endpoint_path=path, auth=auth,
         )
 
         self.pulsar_host=pulsar_host

From f24eed3023412a99f2baad6d3a67f7bac5de05af Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 3 Dec 2024 09:51:33 +0000
Subject: [PATCH 21/37] Fix/pinecone de (#187)

* Fix Goog AI studio settings
* Fix pinecone startup params
---
 templates/components/googleaistudio.jsonnet           |  2 +-
 .../query/doc_embeddings/pinecone/service.py          | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/templates/components/googleaistudio.jsonnet b/templates/components/googleaistudio.jsonnet
index 4088ceef..c2a40f2c 100644
--- a/templates/components/googleaistudio.jsonnet
+++ b/templates/components/googleaistudio.jsonnet
@@ -13,7 +13,7 @@ local prompts = import "prompts/mixtral.jsonnet";
     
         create:: function(engine)
 
-            local envSecrets = engine.envSecrets("bedrock-credentials")
+            local envSecrets = engine.envSecrets("googleaistudio-key")
                 .with_env_var("GOOGLE_AI_STUDIO_KEY", "googleaistudio-key");
 
             local container =
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
index 3fcbfb21..b8502143 100755
--- a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
@@ -131,9 +131,14 @@ class Processor(ConsumerProducer):
         )
 
         parser.add_argument(
-            '-t', '--store-uri',
-            default=default_store_uri,
-            help=f'Milvus store URI (default: {default_store_uri})'
+            '-a', '--api-key',
+            default=default_api_key,
+            help='Pinecone API key. (default from PINECONE_API_KEY)'
+        )
+
+        parser.add_argument(
+            '-u', '--url',
+            help='Pinecone URL.  If unspecified, serverless is used'
         )
 
 def run():

From df23e29971ceaea0cd198160b27654488631a1e9 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 3 Dec 2024 09:52:00 +0000
Subject: [PATCH 22/37] Add debug to endpoint (#188)

---
 trustgraph-flow/trustgraph/api/gateway/endpoint.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
index af7a5070..dc380f4b 100644
--- a/trustgraph-flow/trustgraph/api/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
@@ -62,6 +62,8 @@ class ServiceEndpoint:
 
         id = str(uuid.uuid4())
 
+        print(request.path, "...")
+
         try:
             ht = request.headers["Authorization"]
             tokens = ht.split(" ", 2)
@@ -78,23 +80,31 @@ class ServiceEndpoint:
 
             data = await request.json()
 
+            print(data)
+
             q = await self.sub.subscribe(id)
 
             await self.pub.send(
                 id,
                 self.to_request(data),
             )
+            print("Request sent")
 
             try:
                 resp = await asyncio.wait_for(q.get(), self.timeout)
             except:
                 raise RuntimeError("Timeout waiting for response")
 
+            print("Response got")
+
             if resp.error:
+                print("Error")
                 return web.json_response(
                     { "error": resp.error.message }
                 )
 
+            print("Send response")
+
             return web.json_response(
                 self.from_response(resp)
             )

From 7e78aa6d91aba84fcfc66db157a89b76847b1586 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 3 Dec 2024 14:13:40 +0000
Subject: [PATCH 23/37] Reduc pulsar connections (#189)

---
 .../trustgraph/api/gateway/endpoint.py        | 11 +++--
 .../api/gateway/graph_embeddings_load.py      |  4 +-
 .../api/gateway/graph_embeddings_stream.py    |  4 +-
 .../trustgraph/api/gateway/publisher.py       | 26 ++++++------
 .../trustgraph/api/gateway/service.py         | 33 ++++++++++++---
 .../trustgraph/api/gateway/socket.py          |  6 +++
 .../trustgraph/api/gateway/subscriber.py      | 42 +++++++++----------
 .../trustgraph/api/gateway/triples_load.py    |  4 +-
 .../trustgraph/api/gateway/triples_stream.py  |  4 +-
 9 files changed, 82 insertions(+), 52 deletions(-)

diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
index dc380f4b..c7cd6b04 100644
--- a/trustgraph-flow/trustgraph/api/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
@@ -41,10 +41,15 @@ class ServiceEndpoint:
 
         self.operation = "service"
 
-    async def start(self):
+    async def start(self, client):
 
-        self.pub_task = asyncio.create_task(self.pub.run())
-        self.sub_task = asyncio.create_task(self.sub.run())
+        self.pub_task = asyncio.create_task(self.pub.run(client))
+        self.sub_task = asyncio.create_task(self.sub.run(client))
+
+    async def join(self):
+        
+        await self.pub_task
+        await self.sub_task
 
     def add_routes(self, app):
 
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
index 15efdf5b..764e7210 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
@@ -29,10 +29,10 @@ class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
             schema=JsonSchema(GraphEmbeddings)
         )
 
-    async def start(self):
+    async def start(self, client):
 
         self.task = asyncio.create_task(
-            self.publisher.run()
+            self.publisher.run(client)
         )
 
     async def listener(self, ws, running):
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
index 7f3e5e18..12647547 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
@@ -28,10 +28,10 @@ class GraphEmbeddingsStreamEndpoint(SocketEndpoint):
             schema=JsonSchema(GraphEmbeddings)
         )
 
-    async def start(self):
+    async def start(self, client):
 
         self.task = asyncio.create_task(
-            self.subscriber.run()
+            self.subscriber.run(client)
         )
 
     async def async_thread(self, ws, running):
diff --git a/trustgraph-flow/trustgraph/api/gateway/publisher.py b/trustgraph-flow/trustgraph/api/gateway/publisher.py
index 1bff44dd..2bbf05d9 100644
--- a/trustgraph-flow/trustgraph/api/gateway/publisher.py
+++ b/trustgraph-flow/trustgraph/api/gateway/publisher.py
@@ -1,6 +1,5 @@
 
 import asyncio
-import aiopulsar
 
 class Publisher:
 
@@ -12,24 +11,23 @@ class Publisher:
         self.q = asyncio.Queue(maxsize=max_size)
         self.chunking_enabled = chunking_enabled
 
-    async def run(self):
+    async def run(self, client):
 
         while True:
 
             try:
-                async with aiopulsar.connect(self.pulsar_host) as client:
-                    async with client.create_producer(
-                            topic=self.topic,
-                            schema=self.schema,
-                            chunking_enabled=self.chunking_enabled,
-                    ) as producer:
-                        while True:
-                            id, item = await self.q.get()
+                async with client.create_producer(
+                        topic=self.topic,
+                        schema=self.schema,
+                        chunking_enabled=self.chunking_enabled,
+                ) as producer:
+                    while True:
+                        id, item = await self.q.get()
 
-                            if id:
-                                await producer.send(item, { "id": id })
-                            else:
-                                await producer.send(item)
+                        if id:
+                            await producer.send(item, { "id": id })
+                        else:
+                            await producer.send(item)
 
             except Exception as e:
                 print("Exception:", e, flush=True)
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index a25dd9dc..38a86a51 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -17,6 +17,7 @@ from aiohttp import web
 import logging
 import os
 import base64
+import aiopulsar
 
 import pulsar
 from pulsar.schema import JsonSchema
@@ -237,13 +238,35 @@ class Api:
                 { "error": str(e) }
             )
 
+    async def run_endpoints(self):
+
+        async with aiopulsar.connect(self.pulsar_host) as client:
+
+            for ep in self.endpoints:
+                await ep.start(client)
+
+                self.doc_ingest_pub_task = asyncio.create_task(
+                    self.document_out.run(client)
+                )
+
+                self.text_ingest_pub_task = asyncio.create_task(
+                    self.text_out.run(client)
+                )
+
+            print("Endpoints are running...")
+
+            # They never exit
+            for ep in self.endpoints:
+                await ep.join()
+
+            await self.doc_ingest_pub_task
+            await self.text_ingest_pub_task
+
+            print("Endpoints are stopped.")
+
     async def app_factory(self):
 
-        for ep in self.endpoints:
-            await ep.start()
-
-        self.doc_ingest_pub_task = asyncio.create_task(self.document_out.run())
-        self.text_ingest_pub_task = asyncio.create_task(self.text_out.run())
+        self.endpoint_task = asyncio.create_task(self.run_endpoints())
 
         return self.app
 
diff --git a/trustgraph-flow/trustgraph/api/gateway/socket.py b/trustgraph-flow/trustgraph/api/gateway/socket.py
index 869792b7..a4cb0feb 100644
--- a/trustgraph-flow/trustgraph/api/gateway/socket.py
+++ b/trustgraph-flow/trustgraph/api/gateway/socket.py
@@ -76,6 +76,12 @@ class SocketEndpoint:
     async def start(self):
         pass
 
+    async def join(self):
+
+        # Nothing to wait for
+        while True:
+            await asyncio.sleep(100)
+
     def add_routes(self, app):
 
         app.add_routes([
diff --git a/trustgraph-flow/trustgraph/api/gateway/subscriber.py b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
index 3d8840f6..ba53bab6 100644
--- a/trustgraph-flow/trustgraph/api/gateway/subscriber.py
+++ b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
@@ -1,6 +1,5 @@
 
 import asyncio
-import aiopulsar
 
 class Subscriber:
 
@@ -14,33 +13,32 @@ class Subscriber:
         self.q = {}
         self.full = {}
 
-    async def run(self):
+    async def run(self, client):
         while True:
             try:
-                async with aiopulsar.connect(self.pulsar_host) as client:
-                    async with client.subscribe(
-                        topic=self.topic,
-                        subscription_name=self.subscription,
-                        consumer_name=self.consumer_name,
-                        schema=self.schema,
-                    ) as consumer:
-                        while True:
-                            msg = await consumer.receive()
+                async with client.subscribe(
+                    topic=self.topic,
+                    subscription_name=self.subscription,
+                    consumer_name=self.consumer_name,
+                    schema=self.schema,
+                ) as consumer:
+                    while True:
+                        msg = await consumer.receive()
 
-                            # Acknowledge successful reception of the message
-                            await consumer.acknowledge(msg)
+                        # Acknowledge successful reception of the message
+                        await consumer.acknowledge(msg)
 
-                            try:
-                                id = msg.properties()["id"]
-                            except:
-                                id = None
+                        try:
+                            id = msg.properties()["id"]
+                        except:
+                            id = None
 
-                            value = msg.value()
-                            if id in self.q:
-                                await self.q[id].put(value)
+                        value = msg.value()
+                        if id in self.q:
+                            await self.q[id].put(value)
 
-                            for q in self.full.values():
-                                await q.put(value)
+                        for q in self.full.values():
+                            await q.put(value)
 
             except Exception as e:
                 print("Exception:", e, flush=True)
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_load.py b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
index 7f4561b1..0460d1e4 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
@@ -27,10 +27,10 @@ class TriplesLoadEndpoint(SocketEndpoint):
             schema=JsonSchema(Triples)
         )
 
-    async def start(self):
+    async def start(self, client):
 
         self.task = asyncio.create_task(
-            self.publisher.run()
+            self.publisher.run(client)
         )
 
     async def listener(self, ws, running):
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
index 6ecd2bdb..571d5e61 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
@@ -26,10 +26,10 @@ class TriplesStreamEndpoint(SocketEndpoint):
             schema=JsonSchema(Triples)
         )
 
-    async def start(self):
+    async def start(self, client):
 
         self.task = asyncio.create_task(
-            self.subscriber.run()
+            self.subscriber.run(client)
         )
 
     async def async_thread(self, ws, running):

From 26865a515caae6a260834dfddbcce1aaea47e4b4 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 3 Dec 2024 18:03:00 +0000
Subject: [PATCH 24/37] Fix/async problem (#190)

* Back out previous change
* To multithreads
* Remove aiopulsar dependency
---
 trustgraph-flow/setup.py                      |   1 -
 .../trustgraph/api/gateway/endpoint.py        |  44 +++-----
 .../api/gateway/graph_embeddings_load.py      |   8 +-
 .../api/gateway/graph_embeddings_stream.py    |  15 ++-
 .../trustgraph/api/gateway/publisher.py       |  48 ++++++---
 .../trustgraph/api/gateway/service.py         |  39 ++-----
 .../trustgraph/api/gateway/socket.py          |   6 --
 .../trustgraph/api/gateway/subscriber.py      | 101 +++++++++++++-----
 .../trustgraph/api/gateway/triples_load.py    |   8 +-
 .../trustgraph/api/gateway/triples_stream.py  |  15 ++-
 10 files changed, 149 insertions(+), 136 deletions(-)

diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 65bb7326..e6c732a3 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -59,7 +59,6 @@ setuptools.setup(
         "ibis",
         "jsonschema",
         "aiohttp",
-        "aiopulsar-py",
         "pinecone[grpc]",
     ],
     scripts=[
diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
index c7cd6b04..2b246361 100644
--- a/trustgraph-flow/trustgraph/api/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/api/gateway/endpoint.py
@@ -41,15 +41,10 @@ class ServiceEndpoint:
 
         self.operation = "service"
 
-    async def start(self, client):
+    async def start(self):
 
-        self.pub_task = asyncio.create_task(self.pub.run(client))
-        self.sub_task = asyncio.create_task(self.sub.run(client))
-
-    async def join(self):
-        
-        await self.pub_task
-        await self.sub_task
+        self.pub.start()
+        self.sub.start()
 
     def add_routes(self, app):
 
@@ -87,20 +82,18 @@ class ServiceEndpoint:
 
             print(data)
 
-            q = await self.sub.subscribe(id)
+            q = self.sub.subscribe(id)
 
-            await self.pub.send(
-                id,
-                self.to_request(data),
+            await asyncio.to_thread(
+                self.pub.send, id, self.to_request(data)
             )
-            print("Request sent")
 
             try:
-                resp = await asyncio.wait_for(q.get(), self.timeout)
-            except:
-                raise RuntimeError("Timeout waiting for response")
+                resp = await asyncio.to_thread(q.get, timeout=self.timeout)
+            except Exception as e:
+                raise RuntimeError("Timeout")
 
-            print("Response got")
+            print(resp)
 
             if resp.error:
                 print("Error")
@@ -108,8 +101,6 @@ class ServiceEndpoint:
                     { "error": resp.error.message }
                 )
 
-            print("Send response")
-
             return web.json_response(
                 self.from_response(resp)
             )
@@ -122,7 +113,7 @@ class ServiceEndpoint:
             )
 
         finally:
-            await self.sub.unsubscribe(id)
+            self.sub.unsubscribe(id)
 
 
 class MultiResponseServiceEndpoint(ServiceEndpoint):
@@ -135,11 +126,10 @@ class MultiResponseServiceEndpoint(ServiceEndpoint):
 
             data = await request.json()
 
-            q = await self.sub.subscribe(id)
+            q = self.sub.subscribe(id)
 
-            await self.pub.send(
-                id,
-                self.to_request(data),
+            await asyncio.to_thread(
+                self.pub.send, id, self.to_request(data)
             )
 
             # Keeps looking at responses...
@@ -147,8 +137,8 @@ class MultiResponseServiceEndpoint(ServiceEndpoint):
             while True:
 
                 try:
-                    resp = await asyncio.wait_for(q.get(), self.timeout)
-                except:
+                    resp = await asyncio.to_thread(q.get, timeout=self.timeout)
+                except Exception as e:
                     raise RuntimeError("Timeout waiting for response")
 
                 if resp.error:
@@ -173,4 +163,4 @@ class MultiResponseServiceEndpoint(ServiceEndpoint):
             )
 
         finally:
-            await self.sub.unsubscribe(id)
+            self.sub.unsubscribe(id)
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
index 764e7210..81fb6647 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
@@ -29,11 +29,9 @@ class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
             schema=JsonSchema(GraphEmbeddings)
         )
 
-    async def start(self, client):
+    async def start(self):
 
-        self.task = asyncio.create_task(
-            self.publisher.run(client)
-        )
+        self.publisher.start()
 
     async def listener(self, ws, running):
         
@@ -56,7 +54,7 @@ class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
                     vectors=data["vectors"],
                 )
 
-                await self.publisher.send(None, elt)
+                self.publisher.send(None, elt)
 
 
         running.stop()
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
index 12647547..3d4efd45 100644
--- a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
@@ -1,5 +1,6 @@
 
 import asyncio
+import queue
 from pulsar.schema import JsonSchema
 import uuid
 
@@ -28,31 +29,29 @@ class GraphEmbeddingsStreamEndpoint(SocketEndpoint):
             schema=JsonSchema(GraphEmbeddings)
         )
 
-    async def start(self, client):
+    async def start(self):
 
-        self.task = asyncio.create_task(
-            self.subscriber.run(client)
-        )
+        self.subscriber.start()
 
     async def async_thread(self, ws, running):
 
         id = str(uuid.uuid4())
 
-        q = await self.subscriber.subscribe_all(id)
+        q = self.subscriber.subscribe_all(id)
 
         while running.get():
             try:
-                resp = await asyncio.wait_for(q.get(), 0.5)
+                resp = await asyncio.to_thread(q.get, timeout=0.5)
                 await ws.send_json(serialize_graph_embeddings(resp))
 
-            except TimeoutError:
+            except queue.Empty:
                 continue
 
             except Exception as e:
                 print(f"Exception: {str(e)}", flush=True)
                 break
 
-        await self.subscriber.unsubscribe_all(id)
+        self.subscriber.unsubscribe_all(id)
 
         running.stop()
 
diff --git a/trustgraph-flow/trustgraph/api/gateway/publisher.py b/trustgraph-flow/trustgraph/api/gateway/publisher.py
index 2bbf05d9..89c612ce 100644
--- a/trustgraph-flow/trustgraph/api/gateway/publisher.py
+++ b/trustgraph-flow/trustgraph/api/gateway/publisher.py
@@ -1,5 +1,8 @@
 
-import asyncio
+import queue
+import time
+import pulsar
+import threading
 
 class Publisher:
 
@@ -8,32 +11,43 @@ class Publisher:
         self.pulsar_host = pulsar_host
         self.topic = topic
         self.schema = schema
-        self.q = asyncio.Queue(maxsize=max_size)
+        self.q = queue.Queue(maxsize=max_size)
         self.chunking_enabled = chunking_enabled
 
-    async def run(self, client):
+    def start(self):
+        self.task = threading.Thread(target=self.run)
+        self.task.start()
+
+    def run(self):
 
         while True:
 
             try:
-                async with client.create_producer(
-                        topic=self.topic,
-                        schema=self.schema,
-                        chunking_enabled=self.chunking_enabled,
-                ) as producer:
-                    while True:
-                        id, item = await self.q.get()
 
-                        if id:
-                            await producer.send(item, { "id": id })
-                        else:
-                            await producer.send(item)
+                client = pulsar.Client(
+                    self.pulsar_host,
+                )
+
+                producer = client.create_producer(
+                    topic=self.topic,
+                    schema=self.schema,
+                    chunking_enabled=self.chunking_enabled,
+                )
+
+                while True:
+
+                    id, item = self.q.get()
+
+                    if id:
+                        producer.send(item, { "id": id })
+                    else:
+                        producer.send(item)
 
             except Exception as e:
                 print("Exception:", e, flush=True)
 
             # If handler drops out, sleep a retry
-            await asyncio.sleep(2)
+            time.sleep(2)
 
-    async def send(self, id, msg):
-        await self.q.put((id, msg))
+    def send(self, id, msg):
+        self.q.put((id, msg))
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 38a86a51..38ff8291 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -17,7 +17,6 @@ from aiohttp import web
 import logging
 import os
 import base64
-import aiopulsar
 
 import pulsar
 from pulsar.schema import JsonSchema
@@ -167,7 +166,8 @@ class Api:
             # content is valid base64
             doc = base64.b64decode(data["data"])
 
-            resp = await self.document_out.send(
+            resp = await asyncio.to_thread(
+                self.document_out.send,
                 None,
                 Document(
                     metadata=Metadata(
@@ -212,7 +212,8 @@ class Api:
             # Text is base64 encoded
             text = base64.b64decode(data["text"]).decode(charset)
 
-            resp = await self.text_out.send(
+            resp = asyncio.to_thread(
+                self.text_out.send,
                 None,
                 TextDocument(
                     metadata=Metadata(
@@ -238,35 +239,13 @@ class Api:
                 { "error": str(e) }
             )
 
-    async def run_endpoints(self):
-
-        async with aiopulsar.connect(self.pulsar_host) as client:
-
-            for ep in self.endpoints:
-                await ep.start(client)
-
-                self.doc_ingest_pub_task = asyncio.create_task(
-                    self.document_out.run(client)
-                )
-
-                self.text_ingest_pub_task = asyncio.create_task(
-                    self.text_out.run(client)
-                )
-
-            print("Endpoints are running...")
-
-            # They never exit
-            for ep in self.endpoints:
-                await ep.join()
-
-            await self.doc_ingest_pub_task
-            await self.text_ingest_pub_task
-
-            print("Endpoints are stopped.")
-
     async def app_factory(self):
 
-        self.endpoint_task = asyncio.create_task(self.run_endpoints())
+        for ep in self.endpoints:
+            await ep.start()
+
+        self.document_out.start()
+        self.text_out.start()
 
         return self.app
 
diff --git a/trustgraph-flow/trustgraph/api/gateway/socket.py b/trustgraph-flow/trustgraph/api/gateway/socket.py
index a4cb0feb..869792b7 100644
--- a/trustgraph-flow/trustgraph/api/gateway/socket.py
+++ b/trustgraph-flow/trustgraph/api/gateway/socket.py
@@ -76,12 +76,6 @@ class SocketEndpoint:
     async def start(self):
         pass
 
-    async def join(self):
-
-        # Nothing to wait for
-        while True:
-            await asyncio.sleep(100)
-
     def add_routes(self, app):
 
         app.add_routes([
diff --git a/trustgraph-flow/trustgraph/api/gateway/subscriber.py b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
index ba53bab6..cccfc5b4 100644
--- a/trustgraph-flow/trustgraph/api/gateway/subscriber.py
+++ b/trustgraph-flow/trustgraph/api/gateway/subscriber.py
@@ -1,10 +1,13 @@
 
-import asyncio
+import queue
+import pulsar
+import threading
+import time
 
 class Subscriber:
 
     def __init__(self, pulsar_host, topic, subscription, consumer_name,
-                 schema=None, max_size=10):
+                 schema=None, max_size=100):
         self.pulsar_host = pulsar_host
         self.topic = topic
         self.subscription = subscription
@@ -12,55 +15,95 @@ class Subscriber:
         self.schema = schema
         self.q = {}
         self.full = {}
+        self.max_size = max_size
+        self.lock = threading.Lock()
+
+    def start(self):
+        self.task = threading.Thread(target=self.run)
+        self.task.start()
+
+    def run(self):
 
-    async def run(self, client):
         while True:
+
             try:
-                async with client.subscribe(
+
+                client = pulsar.Client(
+                    self.pulsar_host,
+                )
+
+                consumer = client.subscribe(
                     topic=self.topic,
                     subscription_name=self.subscription,
                     consumer_name=self.consumer_name,
                     schema=self.schema,
-                ) as consumer:
-                    while True:
-                        msg = await consumer.receive()
+                )
 
-                        # Acknowledge successful reception of the message
-                        await consumer.acknowledge(msg)
+                while True:
 
-                        try:
-                            id = msg.properties()["id"]
-                        except:
-                            id = None
+                    msg = consumer.receive()
+
+                    # Acknowledge successful reception of the message
+                    consumer.acknowledge(msg)
+
+                    try:
+                        id = msg.properties()["id"]
+                    except:
+                        id = None
+
+                    value = msg.value()
+
+                    with self.lock:
 
-                        value = msg.value()
                         if id in self.q:
-                            await self.q[id].put(value)
+                            try:
+                                self.q[id].put(value, timeout=0.5)
+                            except:
+                                pass
 
                         for q in self.full.values():
-                            await q.put(value)
+                            try:
+                                q.put(value, timeout=0.5)
+                            except:
+                                pass
 
             except Exception as e:
                 print("Exception:", e, flush=True)
          
             # If handler drops out, sleep a retry
-            await asyncio.sleep(2)
+            time.sleep(2)
+
+    def subscribe(self, id):
+
+        with self.lock:
+
+            q = queue.Queue(maxsize=self.max_size)
+            self.q[id] = q
 
-    async def subscribe(self, id):
-        q = asyncio.Queue()
-        self.q[id] = q
         return q
 
-    async def unsubscribe(self, id):
-        if id in self.q:
-            del self.q[id]
+    def unsubscribe(self, id):
+        
+        with self.lock:
+
+            if id in self.q:
+#                self.q[id].shutdown(immediate=True)
+                del self.q[id]
     
-    async def subscribe_all(self, id):
-        q = asyncio.Queue()
-        self.full[id] = q
+    def subscribe_all(self, id):
+
+        with self.lock:
+
+            q = queue.Queue(maxsize=self.max_size)
+            self.full[id] = q
+
         return q
 
-    async def unsubscribe_all(self, id):
-        if id in self.full:
-            del self.full[id]
+    def unsubscribe_all(self, id):
+
+        with self.lock:
+
+            if id in self.full:
+#                self.full[id].shutdown(immediate=True)
+                del self.full[id]
 
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_load.py b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
index 0460d1e4..dbb3e617 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_load.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_load.py
@@ -27,11 +27,9 @@ class TriplesLoadEndpoint(SocketEndpoint):
             schema=JsonSchema(Triples)
         )
 
-    async def start(self, client):
+    async def start(self):
 
-        self.task = asyncio.create_task(
-            self.publisher.run(client)
-        )
+        self.publisher.start()
 
     async def listener(self, ws, running):
         
@@ -53,7 +51,7 @@ class TriplesLoadEndpoint(SocketEndpoint):
                     triples=to_subgraph(data["triples"]),
                 )
 
-                await self.publisher.send(None, elt)
+                self.publisher.send(None, elt)
 
 
         running.stop()
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
index 571d5e61..4638e08d 100644
--- a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
+++ b/trustgraph-flow/trustgraph/api/gateway/triples_stream.py
@@ -1,5 +1,6 @@
 
 import asyncio
+import queue
 from pulsar.schema import JsonSchema
 import uuid
 
@@ -26,31 +27,29 @@ class TriplesStreamEndpoint(SocketEndpoint):
             schema=JsonSchema(Triples)
         )
 
-    async def start(self, client):
+    async def start(self):
 
-        self.task = asyncio.create_task(
-            self.subscriber.run(client)
-        )
+        self.subscriber.start()
 
     async def async_thread(self, ws, running):
 
         id = str(uuid.uuid4())
 
-        q = await self.subscriber.subscribe_all(id)
+        q = self.subscriber.subscribe_all(id)
 
         while running.get():
             try:
-                resp = await asyncio.wait_for(q.get(), 0.5)
+                resp = await asyncio.to_thread(q.get, timeout=0.5)
                 await ws.send_json(serialize_triples(resp))
 
-            except TimeoutError:
+            except queue.Empty:
                 continue
 
             except Exception as e:
                 print(f"Exception: {str(e)}", flush=True)
                 break
 
-        await self.subscriber.unsubscribe_all(id)
+        self.subscriber.unsubscribe_all(id)
 
         running.stop()
 

From 5770af51ef61d5464cd275bba503de6a6cd901c0 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Tue, 3 Dec 2024 21:30:14 +0000
Subject: [PATCH 25/37] Fix async problem on text load (#191)

---
 trustgraph-flow/trustgraph/api/gateway/service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/api/gateway/service.py
index 38ff8291..faa250dc 100755
--- a/trustgraph-flow/trustgraph/api/gateway/service.py
+++ b/trustgraph-flow/trustgraph/api/gateway/service.py
@@ -212,7 +212,7 @@ class Api:
             # Text is base64 encoded
             text = base64.b64decode(data["text"]).decode(charset)
 
-            resp = asyncio.to_thread(
+            resp = await asyncio.to_thread(
                 self.text_out.send,
                 None,
                 TextDocument(

From e3d06ab80b075ab28f699f45ac61521dbf71001f Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Wed, 4 Dec 2024 14:42:55 +0000
Subject: [PATCH 26/37] Fix isinstance test on null values (#192)

Co-authored-by: Mark Adams <mark.adams@surevine.com>
---
 trustgraph-base/trustgraph/api/api.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py
index 7942e081..de96499c 100644
--- a/trustgraph-base/trustgraph/api/api.py
+++ b/trustgraph-base/trustgraph/api/api.py
@@ -205,16 +205,20 @@ class Api:
             "limit": limit
         }
 
-        if not isinstance(s, Uri):
-            raise RuntimeError("s must be Uri")
-        if not isinstance(p, Uri):
-            raise RuntimeError("p must be Uri")
-        if not isinstance(o, Uri) and not isinstance(o, Literal):
-            raise RuntimeError("o must be Uri or Literal")
+        if s:
+            if not isinstance(s, Uri):
+                raise RuntimeError("s must be Uri")
+            input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
+            
+        if p:
+            if not isinstance(p, Uri):
+                raise RuntimeError("p must be Uri")
+            input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
 
-        if s: input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
-        if p: input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
-        if o: input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
+        if o:
+            if not isinstance(o, Uri) and not isinstance(o, Literal):
+                raise RuntimeError("o must be Uri or Literal")
+            input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
 
         url = f"{self.url}triples-query"
 

From bffaf62490c5b8339995b14e02e1a53215358337 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 00:12:49 +0000
Subject: [PATCH 27/37] Feature/memgraph optim (#193)

* Separate memgraph query/write modules to optimise for memgraph
* Used 1GB memory for Memgraph
* Deployed specialised memgraph query/write processors, created memgraph indexes
* One triple is loaded as a single transaction
* Fixed index creation
---
 templates/components/memgraph.jsonnet         |   4 +-
 templates/stores/memgraph.jsonnet             |   3 +
 .../scripts/triples-query-memgraph            |   6 +
 .../scripts/triples-write-memgraph            |   6 +
 trustgraph-flow/setup.py                      |   2 +
 .../query/triples/memgraph/__init__.py        |   3 +
 .../query/triples/memgraph/__main__.py        |   7 +
 .../query/triples/memgraph/service.py         | 357 ++++++++++++++++++
 .../trustgraph/query/triples/neo4j/service.py |   5 +-
 .../storage/triples/memgraph/__init__.py      |   3 +
 .../storage/triples/memgraph/__main__.py      |   7 +
 .../storage/triples/memgraph/write.py         | 252 +++++++++++++
 12 files changed, 651 insertions(+), 4 deletions(-)
 create mode 100755 trustgraph-flow/scripts/triples-query-memgraph
 create mode 100755 trustgraph-flow/scripts/triples-write-memgraph
 create mode 100644 trustgraph-flow/trustgraph/query/triples/memgraph/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/query/triples/memgraph/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/query/triples/memgraph/service.py
 create mode 100644 trustgraph-flow/trustgraph/storage/triples/memgraph/__init__.py
 create mode 100755 trustgraph-flow/trustgraph/storage/triples/memgraph/__main__.py
 create mode 100755 trustgraph-flow/trustgraph/storage/triples/memgraph/write.py

diff --git a/templates/components/memgraph.jsonnet b/templates/components/memgraph.jsonnet
index 5ec0a76e..609da3a2 100644
--- a/templates/components/memgraph.jsonnet
+++ b/templates/components/memgraph.jsonnet
@@ -16,7 +16,7 @@ memgraph + {
                 engine.container("store-triples")
                     .with_image(images.trustgraph)
                     .with_command([
-                        "triples-write-neo4j",
+                        "triples-write-memgraph",
                         "-p",
                         url.pulsar,
                         "-g",
@@ -50,7 +50,7 @@ memgraph + {
                 engine.container("query-triples")
                     .with_image(images.trustgraph)
                     .with_command([
-                        "triples-query-neo4j",
+                        "triples-query-memgraph",
                         "-p",
                         url.pulsar,
                         "-g",
diff --git a/templates/stores/memgraph.jsonnet b/templates/stores/memgraph.jsonnet
index 8f8b6216..75faf5f0 100644
--- a/templates/stores/memgraph.jsonnet
+++ b/templates/stores/memgraph.jsonnet
@@ -10,6 +10,9 @@ local images = import "values/images.jsonnet";
             local container =
                 engine.container("memgraph")
                     .with_image(images.memgraph_mage)
+                    .with_environment({
+                          MEMGRAPH: "--storage-properties-on-edges=true --storage-enable-edges-metadata=true"
+                    })
                     .with_limits("1.0", "1000M")
                     .with_reservations("0.5", "1000M")
                     .with_port(7474, 7474, "api")
diff --git a/trustgraph-flow/scripts/triples-query-memgraph b/trustgraph-flow/scripts/triples-query-memgraph
new file mode 100755
index 00000000..443929e4
--- /dev/null
+++ b/trustgraph-flow/scripts/triples-query-memgraph
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.query.triples.memgraph import run
+
+run()
+
diff --git a/trustgraph-flow/scripts/triples-write-memgraph b/trustgraph-flow/scripts/triples-write-memgraph
new file mode 100755
index 00000000..3d94a576
--- /dev/null
+++ b/trustgraph-flow/scripts/triples-write-memgraph
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.storage.triples.memgraph import run
+
+run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index e6c732a3..c53f96e7 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -103,8 +103,10 @@ setuptools.setup(
         "scripts/text-completion-openai",
         "scripts/triples-query-cassandra",
         "scripts/triples-query-neo4j",
+        "scripts/triples-query-memgraph",
         "scripts/triples-write-cassandra",
         "scripts/triples-write-neo4j",
+        "scripts/triples-write-memgraph",
         "scripts/wikipedia-lookup",
     ]
 )
diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/__init__.py b/trustgraph-flow/trustgraph/query/triples/memgraph/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/triples/memgraph/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/__main__.py b/trustgraph-flow/trustgraph/query/triples/memgraph/__main__.py
new file mode 100755
index 00000000..89684e3e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/triples/memgraph/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . hf import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
new file mode 100755
index 00000000..5144f781
--- /dev/null
+++ b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
@@ -0,0 +1,357 @@
+
+"""
+Triples query service for memgraph.
+Input is a (s, p, o) triple, some values may be null.  Output is a list of
+triples.
+"""
+
+from neo4j import GraphDatabase
+
+from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
+from .... schema import Value, Triple
+from .... schema import triples_request_queue
+from .... schema import triples_response_queue
+from .... base import ConsumerProducer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = triples_request_queue
+default_output_queue = triples_response_queue
+default_subscriber = module
+
+default_graph_host = 'bolt://memgraph:7687'
+default_username = 'memgraph'
+default_password = 'password'
+default_database = 'memgraph'
+
+class Processor(ConsumerProducer):
+
+    def __init__(self, **params):
+
+        input_queue = params.get("input_queue", default_input_queue)
+        output_queue = params.get("output_queue", default_output_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+        graph_host = params.get("graph_host", default_graph_host)
+        username = params.get("username", default_username)
+        password = params.get("password", default_password)
+        database = params.get("database", default_database)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "output_queue": output_queue,
+                "subscriber": subscriber,
+                "input_schema": TriplesQueryRequest,
+                "output_schema": TriplesQueryResponse,
+                "graph_host": graph_host,
+            }
+        )
+
+        self.db = database
+
+        self.io = GraphDatabase.driver(graph_host, auth=(username, password))
+
+    def create_value(self, ent):
+
+        if ent.startswith("http://") or ent.startswith("https://"):
+            return Value(value=ent, is_uri=True)
+        else:
+            return Value(value=ent, is_uri=False)
+
+    def handle(self, msg):
+
+        try:
+
+            v = msg.value()
+
+            # Sender-produced ID
+            id = msg.properties()["id"]
+
+            print(f"Handling input {id}...", flush=True)
+
+            triples = []
+
+            if v.s is not None:
+                if v.p is not None:
+                    if v.o is not None:
+
+                        # SPO
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Literal {value: $value}) "
+                            "RETURN $src as src",
+                            src=v.s.value, rel=v.p.value, value=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            triples.append((v.s.value, v.p.value, v.o.value))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
+                            "RETURN $src as src",
+                            src=v.s.value, rel=v.p.value, uri=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            triples.append((v.s.value, v.p.value, v.o.value))
+
+                    else:
+
+                        # SP
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Literal) "
+                            "RETURN dest.value as dest",
+                            src=v.s.value, rel=v.p.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, v.p.value, data["dest"]))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
+                            "RETURN dest.uri as dest",
+                            src=v.s.value, rel=v.p.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, v.p.value, data["dest"]))
+
+                else:
+
+                    if v.o is not None:
+
+                        # SO
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Literal {value: $value}) "
+                            "RETURN rel.uri as rel",
+                            src=v.s.value, value=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, data["rel"], v.o.value))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
+                            "RETURN rel.uri as rel",
+                            src=v.s.value, uri=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, data["rel"], v.o.value))
+
+                    else:
+
+                        # S
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Literal) "
+                            "RETURN rel.uri as rel, dest.value as dest",
+                            src=v.s.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, data["rel"], data["dest"]))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
+                            "RETURN rel.uri as rel, dest.uri as dest",
+                            src=v.s.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((v.s.value, data["rel"], data["dest"]))
+
+
+            else:
+
+                if v.p is not None:
+
+                    if v.o is not None:
+
+                        # PO
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Literal {value: $value}) "
+                            "RETURN src.uri as src",
+                            uri=v.p.value, value=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], v.p.value, v.o.value))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $uri}) "
+                            "RETURN src.uri as src",
+                            uri=v.p.value, dest=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], v.p.value, v.o.value))
+
+                    else:
+
+                        # P
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Literal) "
+                            "RETURN src.uri as src, dest.value as dest",
+                            uri=v.p.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], v.p.value, data["dest"]))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
+                            "RETURN src.uri as src, dest.uri as dest",
+                            uri=v.p.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], v.p.value, data["dest"]))
+
+                else:
+
+                    if v.o is not None:
+
+                        # O
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel]->(dest:Literal {value: $value}) "
+                            "RETURN src.uri as src, rel.uri as rel",
+                            value=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], data["rel"], v.o.value))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
+                            "RETURN src.uri as src, rel.uri as rel",
+                            uri=v.o.value,
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], data["rel"], v.o.value))
+
+                    else:
+
+                        # *
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel]->(dest:Literal) "
+                            "RETURN src.uri as src, rel.uri as rel, dest.value as dest",
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], data["rel"], data["dest"]))
+
+                        records, summary, keys = self.io.execute_query(
+                            "MATCH (src:Node)-[rel:Rel]->(dest:Node) "
+                            "RETURN src.uri as src, rel.uri as rel, dest.uri as dest",
+                            database_=self.db,
+                        )
+
+                        for rec in records:
+                            data = rec.data()
+                            triples.append((data["src"], data["rel"], data["dest"]))
+
+            triples = [
+                Triple(
+                    s=self.create_value(t[0]),
+                    p=self.create_value(t[1]), 
+                    o=self.create_value(t[2])
+                )
+                for t in triples
+            ]
+
+            print("Send response...", flush=True)
+            r = TriplesQueryResponse(triples=triples, error=None)
+            self.producer.send(r, properties={"id": id})
+
+            print("Done.", flush=True)
+
+        except Exception as e:
+
+            print(f"Exception: {e}")
+
+            print("Send error response...", flush=True)
+
+            r = TriplesQueryResponse(
+                error=Error(
+                    type = "llm-error",
+                    message = str(e),
+                ),
+                response=None,
+            )
+
+            self.producer.send(r, properties={"id": id})
+
+            self.consumer.acknowledge(msg)
+            
+    @staticmethod
+    def add_args(parser):
+
+        ConsumerProducer.add_args(
+            parser, default_input_queue, default_subscriber,
+            default_output_queue,
+        )
+
+        parser.add_argument(
+            '-g', '--graph-host',
+            default=default_graph_host,
+            help=f'Graph host (default: {default_graph_host})'
+        )
+
+        parser.add_argument(
+            '--username',
+            default=default_username,
+            help=f'Memgraph username (default: {default_username})'
+        )
+
+        parser.add_argument(
+            '--password',
+            default=default_password,
+            help=f'Memgraph password (default: {default_password})'
+        )
+
+        parser.add_argument(
+            '--database',
+            default=default_database,
+            help=f'Memgraph database (default: {default_database})'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
index 2caa0193..d60bc4f4 100755
--- a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
@@ -1,7 +1,8 @@
 
 """
-Triples query service.  Input is a (s, p, o) triple, some values may be
-null.  Output is a list of triples.
+Triples query service for neo4j.
+Input is a (s, p, o) triple, some values may be null.  Output is a list of
+triples.
 """
 
 from neo4j import GraphDatabase
diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/__init__.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/__init__.py
new file mode 100644
index 00000000..d891d55f
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/__init__.py
@@ -0,0 +1,3 @@
+
+from . write import *
+
diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/__main__.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/__main__.py
new file mode 100755
index 00000000..c05d8c6d
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . write import run
+
+if __name__ == '__main__':
+    run()
+
diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py
new file mode 100755
index 00000000..17e8c67e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py
@@ -0,0 +1,252 @@
+
+"""
+Graph writer.  Input is graph edge.  Writes edges to Cassandra graph.
+"""
+
+import pulsar
+import base64
+import os
+import argparse
+import time
+
+from neo4j import GraphDatabase
+
+from .... schema import Triples
+from .... schema import triples_store_queue
+from .... log_level import LogLevel
+from .... base import Consumer
+
+module = ".".join(__name__.split(".")[1:-1])
+
+default_input_queue = triples_store_queue
+default_subscriber = module
+
+default_graph_host = 'bolt://memgraph:7687'
+default_username = 'memgraph'
+default_password = 'password'
+default_database = 'memgraph'
+
+class Processor(Consumer):
+
+    def __init__(self, **params):
+        
+        input_queue = params.get("input_queue", default_input_queue)
+        subscriber = params.get("subscriber", default_subscriber)
+        graph_host = params.get("graph_host", default_graph_host)
+        username = params.get("username", default_username)
+        password = params.get("password", default_password)
+        database = params.get("database", default_database)
+
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "subscriber": subscriber,
+                "input_schema": Triples,
+                "graph_host": graph_host,
+            }
+        )
+
+        self.db = database
+
+        self.io = GraphDatabase.driver(graph_host, auth=(username, password))
+
+        with self.io.session(database=self.db) as session:
+            self.create_indexes(session)
+
+    def create_indexes(self, session):
+
+        print("Create indexes...", flush=True)
+
+        try:
+            session.run(
+                "CREATE INDEX ON :Node",
+            )
+        except Exception as e:
+            print(e, flush=True)
+            # Maybe index already exists
+            print("Index create failure ignored", flush=True)
+
+        try:
+            session.run(
+                "CREATE INDEX ON :Node(uri)"
+            )
+        except Exception as e:
+            print(e, flush=True)
+            # Maybe index already exists
+            print("Index create failure ignored", flush=True)
+
+        try:
+            session.run(
+                "CREATE INDEX ON :Literal",
+            )
+        except Exception as e:
+            print(e, flush=True)
+            # Maybe index already exists
+            print("Index create failure ignored", flush=True)
+
+        try:
+            session.run(
+                "CREATE INDEX ON :Literal(value)"
+            )
+        except Exception as e:
+            print(e, flush=True)
+            # Maybe index already exists
+            print("Index create failure ignored", flush=True)
+
+        print("Index creation done", flush=True)
+
+    def create_node(self, uri):
+
+        print("Create node", uri)
+
+        summary = self.io.execute_query(
+            "MERGE (n:Node {uri: $uri})",
+            uri=uri,
+            database_=self.db,
+        ).summary
+
+        print("Created {nodes_created} nodes in {time} ms.".format(
+            nodes_created=summary.counters.nodes_created,
+            time=summary.result_available_after
+        ))
+
+    def create_literal(self, value):
+
+        print("Create literal", value)
+
+        summary = self.io.execute_query(
+            "MERGE (n:Literal {value: $value})",
+            value=value,
+            database_=self.db,
+        ).summary
+
+        print("Created {nodes_created} nodes in {time} ms.".format(
+            nodes_created=summary.counters.nodes_created,
+            time=summary.result_available_after
+        ))
+
+    def relate_node(self, src, uri, dest):
+
+        print("Create node rel", src, uri, dest)
+
+        summary = self.io.execute_query(
+            "MATCH (src:Node {uri: $src}) "
+            "MATCH (dest:Node {uri: $dest}) "
+            "MERGE (src)-[:Rel {uri: $uri}]->(dest)",
+            src=src, dest=dest, uri=uri,
+            database_=self.db,
+        ).summary
+
+        print("Created {nodes_created} nodes in {time} ms.".format(
+            nodes_created=summary.counters.nodes_created,
+            time=summary.result_available_after
+        ))
+
+    def relate_literal(self, src, uri, dest):
+
+        print("Create literal rel", src, uri, dest)
+
+        summary = self.io.execute_query(
+            "MATCH (src:Node {uri: $src}) "
+            "MATCH (dest:Literal {value: $dest}) "
+            "MERGE (src)-[:Rel {uri: $uri}]->(dest)",
+            src=src, dest=dest, uri=uri,
+            database_=self.db,
+        ).summary
+
+        print("Created {nodes_created} nodes in {time} ms.".format(
+            nodes_created=summary.counters.nodes_created,
+            time=summary.result_available_after
+        ))
+
+    def create_triple(self, tx, t):
+
+        # Create new s node with given uri, if not exists
+        result = tx.run(
+            "MERGE (n:Node {uri: $uri})",
+            uri=t.s.value
+        )
+
+        if t.o.is_uri:
+
+            # Create new o node with given uri, if not exists
+            result = tx.run(
+                "MERGE (n:Node {uri: $uri})",
+                uri=t.o.value
+            )
+
+            result = tx.run(
+                "MATCH (src:Node {uri: $src}) "
+                "MATCH (dest:Node {uri: $dest}) "
+                "MERGE (src)-[:Rel {uri: $uri}]->(dest)",
+                src=t.s.value, dest=t.o.value, uri=t.p.value,
+            )
+
+        else:
+        
+            # Create new o literal with given uri, if not exists
+            result = tx.run(
+                "MERGE (n:Literal {value: $value})",
+                value=t.o.value
+            )
+
+            result = tx.run(
+                "MATCH (src:Node {uri: $src}) "
+                "MATCH (dest:Literal {value: $dest}) "
+                "MERGE (src)-[:Rel {uri: $uri}]->(dest)",
+                src=t.s.value, dest=t.o.value, uri=t.p.value,
+            )
+        
+    def handle(self, msg):
+
+        v = msg.value()
+
+        for t in v.triples:
+
+            # self.create_node(t.s.value)
+
+            # if t.o.is_uri:
+            #     self.create_node(t.o.value)
+            #     self.relate_node(t.s.value, t.p.value, t.o.value)
+            # else:
+            #     self.create_literal(t.o.value)
+            #     self.relate_literal(t.s.value, t.p.value, t.o.value)
+
+            with self.io.session(database=self.db) as session:
+                session.execute_write(self.create_triple, t)
+
+    @staticmethod
+    def add_args(parser):
+
+        Consumer.add_args(
+            parser, default_input_queue, default_subscriber,
+        )
+
+        parser.add_argument(
+            '-g', '--graph_host',
+            default=default_graph_host,
+            help=f'Graph host (default: {default_graph_host})'
+        )
+
+        parser.add_argument(
+            '--username',
+            default=default_username,
+            help=f'Memgraph username (default: {default_username})'
+        )
+
+        parser.add_argument(
+            '--password',
+            default=default_password,
+            help=f'Memgraph password (default: {default_password})'
+        )
+
+        parser.add_argument(
+            '--database',
+            default=default_database,
+            help=f'Memgraph database (default: {default_database})'
+        )
+
+def run():
+
+    Processor.start(module, __doc__)
+

From 2818ec9f236ed9373f639a3c809c38cf3bd688b6 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 08:50:49 +0000
Subject: [PATCH 28/37] Fix header (#194)

---
 trustgraph-cli/scripts/tg-dump-msgpack | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trustgraph-cli/scripts/tg-dump-msgpack b/trustgraph-cli/scripts/tg-dump-msgpack
index dc4a8139..2be950db 100755
--- a/trustgraph-cli/scripts/tg-dump-msgpack
+++ b/trustgraph-cli/scripts/tg-dump-msgpack
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-""
+"""
 This utility reads a knowledge core in msgpack format and outputs its
 contents in JSON form to standard output.  This is useful only as a
 diagnostic utility.

From 7df7843dad25bf643734522245b5c7eb5c59976e Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 08:51:10 +0000
Subject: [PATCH 29/37] Main/remove parquet (#195)

* Remove Parquet code, and package build
---
 .github/workflows/release.yaml                |  24 +--
 Containerfile                                 |   5 +-
 Makefile                                      |   3 -
 trustgraph-parquet/README.md                  |   1 -
 trustgraph-parquet/scripts/concat-parquet     |  45 -----
 trustgraph-parquet/scripts/dump-parquet       |  24 ---
 trustgraph-parquet/scripts/ge-dump-parquet    |   6 -
 .../scripts/load-graph-embeddings             | 170 -----------------
 trustgraph-parquet/scripts/load-triples       | 180 ------------------
 .../scripts/triples-dump-parquet              |   6 -
 trustgraph-parquet/setup.py                   |  51 -----
 .../trustgraph/dump/__init__.py               |   0
 .../dump/graph_embeddings/__init__.py         |   0
 .../dump/graph_embeddings/parquet/__init__.py |   3 -
 .../dump/graph_embeddings/parquet/__main__.py |   7 -
 .../graph_embeddings/parquet/processor.py     |  85 ---------
 .../dump/graph_embeddings/parquet/writer.py   |  94 ---------
 .../trustgraph/dump/triples/__init__.py       |   0
 .../dump/triples/parquet/__init__.py          |   3 -
 .../dump/triples/parquet/__main__.py          |   7 -
 .../dump/triples/parquet/processor.py         |  87 ---------
 .../trustgraph/dump/triples/parquet/writer.py |  96 ----------
 22 files changed, 11 insertions(+), 886 deletions(-)
 delete mode 100644 trustgraph-parquet/README.md
 delete mode 100755 trustgraph-parquet/scripts/concat-parquet
 delete mode 100755 trustgraph-parquet/scripts/dump-parquet
 delete mode 100755 trustgraph-parquet/scripts/ge-dump-parquet
 delete mode 100755 trustgraph-parquet/scripts/load-graph-embeddings
 delete mode 100755 trustgraph-parquet/scripts/load-triples
 delete mode 100755 trustgraph-parquet/scripts/triples-dump-parquet
 delete mode 100644 trustgraph-parquet/setup.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/__init__.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/graph_embeddings/__init__.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__init__.py
 delete mode 100755 trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__main__.py
 delete mode 100755 trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/processor.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/writer.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/triples/__init__.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/triples/parquet/__init__.py
 delete mode 100755 trustgraph-parquet/trustgraph/dump/triples/parquet/__main__.py
 delete mode 100755 trustgraph-parquet/trustgraph/dump/triples/parquet/processor.py
 delete mode 100644 trustgraph-parquet/trustgraph/dump/triples/parquet/writer.py

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index fc85a6a8..30fc70ff 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -48,20 +48,6 @@ jobs:
       - name: Publish release distributions to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
 
-      - name: Create deploy bundle
-        run: templates/generate-all deploy.zip ${{ steps.version.outputs.VERSION }}
-
-      - uses: ncipollo/release-action@v1
-        with:
-          artifacts: deploy.zip
-          generateReleaseNotes: true
-          makeLatest: false
-          prerelease: true
-          skipIfReleaseExists: true
-
-      - name: Build container
-        run: make container VERSION=${{ steps.version.outputs.VERSION }}
-
       - name: Extract metadata for container
         id: meta
         uses: docker/metadata-action@v4
@@ -84,3 +70,13 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
 
+      - name: Create deploy bundle
+        run: templates/generate-all deploy.zip ${{ steps.version.outputs.VERSION }}
+
+      - uses: ncipollo/release-action@v1
+        with:
+          artifacts: deploy.zip
+          generateReleaseNotes: true
+          makeLatest: false
+          prerelease: true
+          skipIfReleaseExists: true
diff --git a/Containerfile b/Containerfile
index 0d6d357b..c2735feb 100644
--- a/Containerfile
+++ b/Containerfile
@@ -16,7 +16,7 @@ RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
 RUN pip3 install anthropic boto3 cohere openai google-cloud-aiplatform ollama google-generativeai \
     langchain langchain-core langchain-huggingface langchain-text-splitters \
     langchain-community pymilvus sentence-transformers transformers \
-    huggingface-hub pulsar-client cassandra-driver pyarrow pyyaml \
+    huggingface-hub pulsar-client cassandra-driver pyyaml \
     neo4j tiktoken && \
     pip3 cache purge
 
@@ -32,7 +32,6 @@ COPY trustgraph-base/ /root/build/trustgraph-base/
 COPY trustgraph-flow/ /root/build/trustgraph-flow/
 COPY trustgraph-vertexai/ /root/build/trustgraph-vertexai/
 COPY trustgraph-bedrock/ /root/build/trustgraph-bedrock/
-COPY trustgraph-parquet/ /root/build/trustgraph-parquet/
 COPY trustgraph-embeddings-hf/ /root/build/trustgraph-embeddings-hf/
 COPY trustgraph-cli/ /root/build/trustgraph-cli/
 
@@ -42,7 +41,6 @@ RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-base/
 RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-flow/
 RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-vertexai/
 RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-bedrock/
-RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-parquet/
 RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-embeddings-hf/
 RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-cli/
 
@@ -61,7 +59,6 @@ RUN \
     pip3 install /root/wheels/trustgraph_flow-* && \
     pip3 install /root/wheels/trustgraph_vertexai-* && \
     pip3 install /root/wheels/trustgraph_bedrock-* && \
-    pip3 install /root/wheels/trustgraph_parquet-* && \
     pip3 install /root/wheels/trustgraph_embeddings_hf-* && \
     pip3 install /root/wheels/trustgraph_cli-* && \
     pip3 cache purge && \
diff --git a/Makefile b/Makefile
index 0fb4b175..72d144a9 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,6 @@ wheels:
 	pip3 wheel --no-deps --wheel-dir dist trustgraph-flow/
 	pip3 wheel --no-deps --wheel-dir dist trustgraph-vertexai/
 	pip3 wheel --no-deps --wheel-dir dist trustgraph-bedrock/
-	pip3 wheel --no-deps --wheel-dir dist trustgraph-parquet/
 	pip3 wheel --no-deps --wheel-dir dist trustgraph-embeddings-hf/
 	pip3 wheel --no-deps --wheel-dir dist trustgraph-cli/
 
@@ -25,7 +24,6 @@ packages: update-package-versions
 	cd trustgraph-flow && python3 setup.py sdist --dist-dir ../dist/
 	cd trustgraph-vertexai && python3 setup.py sdist --dist-dir ../dist/
 	cd trustgraph-bedrock && python3 setup.py sdist --dist-dir ../dist/
-	cd trustgraph-parquet && python3 setup.py sdist --dist-dir ../dist/
 	cd trustgraph-embeddings-hf && python3 setup.py sdist --dist-dir ../dist/
 	cd trustgraph-cli && python3 setup.py sdist --dist-dir ../dist/
 
@@ -41,7 +39,6 @@ update-package-versions:
 	echo __version__ = \"${VERSION}\" > trustgraph-flow/trustgraph/flow_version.py
 	echo __version__ = \"${VERSION}\" > trustgraph-vertexai/trustgraph/vertexai_version.py
 	echo __version__ = \"${VERSION}\" > trustgraph-bedrock/trustgraph/bedrock_version.py
-	echo __version__ = \"${VERSION}\" > trustgraph-parquet/trustgraph/parquet_version.py
 	echo __version__ = \"${VERSION}\" > trustgraph-embeddings-hf/trustgraph/embeddings_hf_version.py
 	echo __version__ = \"${VERSION}\" > trustgraph-cli/trustgraph/cli_version.py
 	echo __version__ = \"${VERSION}\" > trustgraph/trustgraph/trustgraph_version.py
diff --git a/trustgraph-parquet/README.md b/trustgraph-parquet/README.md
deleted file mode 100644
index 7a2ce130..00000000
--- a/trustgraph-parquet/README.md
+++ /dev/null
@@ -1 +0,0 @@
-See https://trustgraph.ai/
diff --git a/trustgraph-parquet/scripts/concat-parquet b/trustgraph-parquet/scripts/concat-parquet
deleted file mode 100755
index 7943d436..00000000
--- a/trustgraph-parquet/scripts/concat-parquet
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Concatenates multiple parquet files into a single parquet output
-"""
-
-import pyarrow as pa
-import pyarrow.parquet as pq
-import pandas as pd
-import sys
-import argparse
-
-parser = argparse.ArgumentParser(
-    prog="combine-parquet",
-    description=__doc__
-)
-
-parser.add_argument(
-    '-i', '--input',
-    nargs='*',
-    help=f'Input files'
-)
-
-parser.add_argument(
-    '-o', '--output',
-    help=f'Output files'
-)
-
-args = parser.parse_args()
-
-df = None
-
-for file in args.input:
-
-    part = pq.read_table(file).to_pandas()
-
-    if df is None:
-        df = part
-    else:
-        df = pd.concat([df, part], ignore_index=True)
-
-if df is not None:
-
-    table = pa.Table.from_pandas(df)
-    pq.write_table(table, args.output)
diff --git a/trustgraph-parquet/scripts/dump-parquet b/trustgraph-parquet/scripts/dump-parquet
deleted file mode 100755
index 62b28998..00000000
--- a/trustgraph-parquet/scripts/dump-parquet
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-import pyarrow as pa
-import pyarrow.csv as pc
-import pyarrow.parquet as pq
-import pandas as pd
-import sys
-
-df = None
-
-for file in sys.argv[1:]:
-
-    part = pq.read_table(file).to_pandas()
-
-    if df is None:
-        df = part
-    else:
-        df = pd.concat([df, part], ignore_index=True)
-
-if df is not None:
-
-    table = pa.Table.from_pandas(df)
-    pc.write_csv(table, sys.stdout.buffer)
-
diff --git a/trustgraph-parquet/scripts/ge-dump-parquet b/trustgraph-parquet/scripts/ge-dump-parquet
deleted file mode 100755
index c2b29c51..00000000
--- a/trustgraph-parquet/scripts/ge-dump-parquet
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-
-from trustgraph.dump.graph_embeddings.parquet import run
-
-run()
-
diff --git a/trustgraph-parquet/scripts/load-graph-embeddings b/trustgraph-parquet/scripts/load-graph-embeddings
deleted file mode 100755
index 0e6ecf93..00000000
--- a/trustgraph-parquet/scripts/load-graph-embeddings
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Loads Graph embeddings into TrustGraph processing.
-"""
-
-import pulsar
-from pulsar.schema import JsonSchema
-from trustgraph.schema import GraphEmbeddings, Value, Metadata
-from trustgraph.schema import graph_embeddings_store_queue
-import argparse
-import os
-import time
-import pyarrow as pa
-import pyarrow.parquet as pq
-
-from trustgraph.log_level import LogLevel
-
-class Loader:
-
-    def __init__(
-            self,
-            pulsar_host,
-            output_queue,
-            log_level,
-            file,
-            user,
-            collection,
-    ):
-
-        self.client = pulsar.Client(
-            pulsar_host,
-            logger=pulsar.ConsoleLogger(log_level.to_pulsar())
-        )
-
-        self.producer = self.client.create_producer(
-            topic=output_queue,
-            schema=JsonSchema(GraphEmbeddings),
-            chunking_enabled=True,
-        )
-
-        self.file = file
-        self.user = user
-        self.collection = collection
-
-    def run(self):
-
-        try:
-
-            path = self.file
-
-            print("Reading file...")
-            table = pq.read_table(path)
-            print("Loaded.")
-
-            names = set(table.column_names)
-
-            if "embeddings" not in names:
-                print("No 'embeddings' column")
-
-            if "entity" not in names:
-                print("No 'entity' column")
-
-            embc = table.column("embeddings")
-            entc = table.column("entity")
-
-            for emb, ent in zip(embc, entc):
-
-                b = emb.as_py()
-                n = ent.as_py()
-
-                r = GraphEmbeddings(
-                    metadata=Metadata(
-                        metadata=[],
-                        user=self.user,
-                        collection=self.collection,
-                    ),
-                    vectors=b,
-                    entity=Value(
-                        value=n,
-                        is_uri=n.startswith("https:")
-                    ),
-                )
-
-                self.producer.send(r)
-
-        except Exception as e:
-            print(e, flush=True)
-            
-    def __del__(self):
-        self.client.close()
-
-def main():
-
-    parser = argparse.ArgumentParser(
-        prog='loader',
-        description=__doc__,
-    )
-
-    default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
-    default_output_queue = graph_embeddings_store_queue
-    default_user = 'trustgraph'
-    default_collection = 'default'
-
-    parser.add_argument(
-        '-p', '--pulsar-host',
-        default=default_pulsar_host,
-        help=f'Pulsar host (default: {default_pulsar_host})',
-    )
-
-    parser.add_argument(
-        '-o', '--output-queue',
-        default=default_output_queue,
-        help=f'Output queue (default: {default_output_queue})'
-    )
-
-    parser.add_argument(
-        '-u', '--user',
-        default=default_user,
-        help=f'User ID (default: {default_user})'
-    )
-
-    parser.add_argument(
-        '-c', '--collection',
-        default=default_collection,
-        help=f'Collection ID (default: {default_collection})'
-    )
-
-    parser.add_argument(
-        '-l', '--log-level',
-        type=LogLevel,
-        default=LogLevel.ERROR,
-        choices=list(LogLevel),
-        help=f'Output queue (default: info)'
-    )
-
-    parser.add_argument(
-        '-f', '--file',
-        required=True,
-        help=f'File to load'
-    )
-
-    args = parser.parse_args()
-
-    while True:
-
-        try:
-            p = Loader(
-                pulsar_host=args.pulsar_host,
-                output_queue=args.output_queue,
-                log_level=args.log_level,
-                file=args.file,
-                user=args.user,
-                collection=args.collection,
-            )
-
-            p.run()
-
-            print("File loaded.")
-            break
-
-        except Exception as e:
-
-            print("Exception:", e, flush=True)
-            print("Will retry...", flush=True)
-
-        time.sleep(10)
-
-main()
-
diff --git a/trustgraph-parquet/scripts/load-triples b/trustgraph-parquet/scripts/load-triples
deleted file mode 100755
index e6bb0ff7..00000000
--- a/trustgraph-parquet/scripts/load-triples
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Loads Graph embeddings into TrustGraph processing.
-"""
-
-import pulsar
-from pulsar.schema import JsonSchema
-from trustgraph.schema import Triples, Triple, Value, Metadata
-from trustgraph.schema import triples_store_queue
-import argparse
-import os
-import time
-import pyarrow as pa
-import pyarrow.parquet as pq
-
-from trustgraph.log_level import LogLevel
-
-class Loader:
-
-    def __init__(
-            self,
-            pulsar_host,
-            output_queue,
-            log_level,
-            file,
-            user,
-            collection,
-    ):
-
-        self.client = pulsar.Client(
-            pulsar_host,
-            logger=pulsar.ConsoleLogger(log_level.to_pulsar())
-        )
-
-        self.producer = self.client.create_producer(
-            topic=output_queue,
-            schema=JsonSchema(Triples),
-            chunking_enabled=True,
-        )
-
-        self.file = file
-        self.user = user
-        self.collection = collection
-
-    def run(self):
-
-        try:
-
-            path = self.file
-
-            print("Reading file...")
-            table = pq.read_table(path)
-            print("Loaded.")
-
-            names = set(table.column_names)
-
-            if "s" not in names:
-                print("No 's' column")
-
-            if "p" not in names:
-                print("No 'p' column")
-
-            if "o" not in names:
-                print("No 'o' column")
-
-            sc = table.column("s")
-            pc = table.column("p")
-            oc = table.column("o")
-
-            for s, p, o in zip(sc, pc, oc):
-
-                r = Triples(
-                    metadata=Metadata(
-                        metadata=[],
-                        user=self.user,
-                        collection=self.collection,
-                    ),
-                    triples=[
-                        Triple(
-                            s=Value(
-                                value=s.as_py(), is_uri=True
-                            ),
-                            p=Value(
-                                value=p.as_py(), is_uri=True
-                            ),
-                            o=Value(
-                                value=o.as_py(),
-                                is_uri=o.as_py().startswith("https:")
-                            )
-                        )
-                    ]
-                )
-
-                self.producer.send(r)
-
-        except Exception as e:
-            print(e, flush=True)
-            
-    def __del__(self):
-        self.client.close()
-
-def main():
-
-    parser = argparse.ArgumentParser(
-        prog='loader',
-        description=__doc__,
-    )
-
-    default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
-    default_output_queue = triples_store_queue
-    default_user = 'trustgraph'
-    default_collection = 'default'
-
-    parser.add_argument(
-        '-p', '--pulsar-host',
-        default=default_pulsar_host,
-        help=f'Pulsar host (default: {default_pulsar_host})',
-    )
-
-    parser.add_argument(
-        '-o', '--output-queue',
-        default=default_output_queue,
-        help=f'Output queue (default: {default_output_queue})'
-    )
-
-    parser.add_argument(
-        '-u', '--user',
-        default=default_user,
-        help=f'User ID (default: {default_user})'
-    )
-
-    parser.add_argument(
-        '-c', '--collection',
-        default=default_collection,
-        help=f'Collection ID (default: {default_collection})'
-    )
-
-    parser.add_argument(
-        '-l', '--log-level',
-        type=LogLevel,
-        default=LogLevel.ERROR,
-        choices=list(LogLevel),
-        help=f'Output queue (default: info)'
-    )
-
-    parser.add_argument(
-        '-f', '--file',
-        required=True,
-        help=f'File to load'
-    )
-
-    args = parser.parse_args()
-
-    while True:
-
-        try:
-            p = Loader(
-                pulsar_host=args.pulsar_host,
-                output_queue=args.output_queue,
-                log_level=args.log_level,
-                file=args.file,
-                user=args.user,
-                collection=args.collection,
-            )
-
-            p.run()
-
-            print("File loaded.")
-            break
-
-        except Exception as e:
-
-            print("Exception:", e, flush=True)
-            print("Will retry...", flush=True)
-
-        time.sleep(10)
-
-main()
-
diff --git a/trustgraph-parquet/scripts/triples-dump-parquet b/trustgraph-parquet/scripts/triples-dump-parquet
deleted file mode 100755
index 78d79196..00000000
--- a/trustgraph-parquet/scripts/triples-dump-parquet
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python3
-
-from trustgraph.dump.triples.parquet import run
-
-run()
-
diff --git a/trustgraph-parquet/setup.py b/trustgraph-parquet/setup.py
deleted file mode 100644
index dfe29653..00000000
--- a/trustgraph-parquet/setup.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import setuptools
-import os
-import importlib
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-# Load a version number module
-spec = importlib.util.spec_from_file_location(
-    'version', 'trustgraph/parquet_version.py'
-)
-version_module = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(version_module)
-
-version = version_module.__version__
-
-setuptools.setup(
-    name="trustgraph-parquet",
-    version=version,
-    author="trustgraph.ai",
-    author_email="security@trustgraph.ai",
-    description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/trustgraph-ai/trustgraph",
-    packages=setuptools.find_namespace_packages(
-        where='./',
-    ),
-    classifiers=[ 
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
-        "Operating System :: OS Independent",
-    ],
-    python_requires='>=3.8',
-    download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
-    install_requires=[
-        "trustgraph-base>=0.17,<0.18",
-        "pulsar-client",
-        "prometheus-client",
-        "pyarrow",
-        "pandas",
-    ],
-    scripts=[
-        "scripts/concat-parquet",
-        "scripts/dump-parquet",
-        "scripts/ge-dump-parquet",
-        "scripts/triples-dump-parquet",
-        "scripts/load-graph-embeddings",
-        "scripts/load-triples",
-    ]
-)
diff --git a/trustgraph-parquet/trustgraph/dump/__init__.py b/trustgraph-parquet/trustgraph/dump/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/trustgraph-parquet/trustgraph/dump/graph_embeddings/__init__.py b/trustgraph-parquet/trustgraph/dump/graph_embeddings/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__init__.py b/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__init__.py
deleted file mode 100644
index 9d16af90..00000000
--- a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-from . processor import *
-
diff --git a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__main__.py b/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__main__.py
deleted file mode 100755
index c05d8c6d..00000000
--- a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/__main__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env python3
-
-from . write import run
-
-if __name__ == '__main__':
-    run()
-
diff --git a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/processor.py b/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/processor.py
deleted file mode 100755
index 795f3351..00000000
--- a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/processor.py
+++ /dev/null
@@ -1,85 +0,0 @@
-
-"""
-Write graph embeddings to parquet files in a directory.
-"""
-
-import pulsar
-import base64
-import os
-import argparse
-import time
-
-from .... schema import GraphEmbeddings
-from .... schema import graph_embeddings_store_queue
-from .... base import Consumer
-
-from . writer import ParquetWriter
-
-module = ".".join(__name__.split(".")[1:-1])
-
-default_input_queue = graph_embeddings_store_queue
-default_subscriber = module
-default_graph_host='localhost'
-default_directory = "."
-default_file_template = "graph-embeds-{id}.parquet"
-default_rotation_time = 60
-        
-class Processor(Consumer):
-
-    def __init__(self, **params):
-        
-        input_queue = params.get("input_queue", default_input_queue)
-        subscriber = params.get("subscriber", default_subscriber)
-        directory = params.get("directory", default_directory)
-        file_template = params.get("file_template", default_file_template)
-        rotation_time = params.get("rotation_time", default_rotation_time)
-
-        super(Processor, self).__init__(
-            **params | {
-                "input_queue": input_queue,
-                "subscriber": subscriber,
-                "input_schema": GraphEmbeddings,
-            }
-        )
-
-        self.writer = ParquetWriter(directory, file_template, rotation_time)
-
-    def __del__(self):
-        if hasattr(self, "writer"):
-            del self.writer
-
-    def handle(self, msg):
-
-        v = msg.value()
-        self.writer.write(v.vectors, v.entity.value)
-
-    @staticmethod
-    def add_args(parser):
-
-        Consumer.add_args(
-            parser, default_input_queue, default_subscriber,
-        )
-
-        parser.add_argument(
-            '-d', '--directory',
-            default=default_directory,
-            help=f'Directory to write to (default: {default_directory})'
-        )
-
-        parser.add_argument(
-            '-f', '--file-template',
-            default=default_file_template,
-            help=f'Directory to write to (default: {default_file_template})'
-        )
-
-        parser.add_argument(
-            '-t', '--rotation-time',
-            type=int,
-            default=default_rotation_time,
-            help=f'Rotation time / seconds (default: {default_rotation_time})'
-        )
-
-def run():
-
-    Processor.start(module, __doc__)
-
diff --git a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/writer.py b/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/writer.py
deleted file mode 100644
index 1844cdd1..00000000
--- a/trustgraph-parquet/trustgraph/dump/graph_embeddings/parquet/writer.py
+++ /dev/null
@@ -1,94 +0,0 @@
-
-import threading
-import queue
-import time
-import uuid
-import pyarrow as pa
-import pyarrow.parquet as pq
-
-class ParquetWriter:
-
-    def __init__(self, directory, file_template, rotation_time):
-        self.directory = directory
-        self.file_template = file_template
-        self.rotation_time = rotation_time
-
-        self.q = queue.Queue()
-
-        self.running = True
-
-        self.thread = threading.Thread(target=(self.writer_thread))
-        self.thread.start()
-
-    def writer_thread(self):
-
-        items = []
-
-        timeout = None
-
-        while self.running:
-
-            try:
-
-                item = self.q.get(timeout=1)
-
-                if timeout == None:
-                    timeout = time.time() + self.rotation_time
-
-                items.append(item)
-
-            except queue.Empty:
-                pass
-
-            if timeout:
-                if time.time() > timeout:
-
-                    self.write_file(items)
-                    timeout = None
-                    items = []
-
-    def write_file(self, items):
-
-        try:
-
-            schema = pa.schema([
-                pa.field('embeddings', pa.list_(pa.list_(pa.float64()))),
-                pa.field('entity', pa.string()),
-            ])
-
-            fname = self.file_template.format(id=str(uuid.uuid4()))
-            path = f"{self.directory}/{fname}"
-
-            writer = pq.ParquetWriter(path, schema)
-
-            batch = pa.record_batch(
-                [
-                    [i[0] for i in items],
-                    [i[1] for i in items],
-                ],
-                names=['embeddings', 'entity']
-            )
-
-            writer.write_batch(batch)
-
-            writer.close()
-
-            print(f"Wrote {path}.")
-
-        except Exception as e:
-
-            print("Parquet write:", e)
-
-    def write(self, embeds, ent):
-        self.q.put((embeds, ent))
-
-    def __del__(self):
-
-        self.running = False
-
-        if hasattr(self, "q"):
-            self.thread.join()
-
-            
-
-        
diff --git a/trustgraph-parquet/trustgraph/dump/triples/__init__.py b/trustgraph-parquet/trustgraph/dump/triples/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/trustgraph-parquet/trustgraph/dump/triples/parquet/__init__.py b/trustgraph-parquet/trustgraph/dump/triples/parquet/__init__.py
deleted file mode 100644
index 9d16af90..00000000
--- a/trustgraph-parquet/trustgraph/dump/triples/parquet/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-from . processor import *
-
diff --git a/trustgraph-parquet/trustgraph/dump/triples/parquet/__main__.py b/trustgraph-parquet/trustgraph/dump/triples/parquet/__main__.py
deleted file mode 100755
index c05d8c6d..00000000
--- a/trustgraph-parquet/trustgraph/dump/triples/parquet/__main__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env python3
-
-from . write import run
-
-if __name__ == '__main__':
-    run()
-
diff --git a/trustgraph-parquet/trustgraph/dump/triples/parquet/processor.py b/trustgraph-parquet/trustgraph/dump/triples/parquet/processor.py
deleted file mode 100755
index dc15d8a9..00000000
--- a/trustgraph-parquet/trustgraph/dump/triples/parquet/processor.py
+++ /dev/null
@@ -1,87 +0,0 @@
-
-"""
-Write graphs triples to parquet files in a directory.
-"""
-
-import pulsar
-import base64
-import os
-import argparse
-import time
-
-from .... schema import Triples
-from .... schema import triples_store_queue
-from .... base import Consumer
-
-from . writer import ParquetWriter
-
-module = ".".join(__name__.split(".")[1:-1])
-
-default_input_queue = triples_store_queue
-default_subscriber = module
-default_graph_host='localhost'
-default_directory = "."
-default_file_template = "triples-{id}.parquet"
-default_rotation_time = 60
-        
-class Processor(Consumer):
-
-    def __init__(self, **params):
-        
-        input_queue = params.get("input_queue", default_input_queue)
-        subscriber = params.get("subscriber", default_subscriber)
-        directory = params.get("directory", default_directory)
-        file_template = params.get("file_template", default_file_template)
-        rotation_time = params.get("rotation_time", default_rotation_time)
-
-        super(Processor, self).__init__(
-            **params | {
-                "input_queue": input_queue,
-                "subscriber": subscriber,
-                "input_schema": Triples,
-            }
-        )
-
-        self.writer = ParquetWriter(directory, file_template, rotation_time)
-
-    def __del__(self):
-        if hasattr(self, "writer"):
-            del self.writer
-
-    def handle(self, msg):
-
-        v = msg.value()
-
-        for t in v.triples:
-            self.writer.write(t.s.value, t.p.value, t.o.value)
-
-    @staticmethod
-    def add_args(parser):
-
-        Consumer.add_args(
-            parser, default_input_queue, default_subscriber,
-        )
-
-        parser.add_argument(
-            '-d', '--directory',
-            default=default_directory,
-            help=f'Directory to write to (default: {default_directory})'
-        )
-
-        parser.add_argument(
-            '-f', '--file-template',
-            default=default_file_template,
-            help=f'Directory to write to (default: {default_file_template})'
-        )
-
-        parser.add_argument(
-            '-t', '--rotation-time',
-            type=int,
-            default=default_rotation_time,
-            help=f'Rotation time / seconds (default: {default_rotation_time})'
-        )
-
-def run():
-
-    Processor.start(module, __doc__)
-
diff --git a/trustgraph-parquet/trustgraph/dump/triples/parquet/writer.py b/trustgraph-parquet/trustgraph/dump/triples/parquet/writer.py
deleted file mode 100644
index e68bf342..00000000
--- a/trustgraph-parquet/trustgraph/dump/triples/parquet/writer.py
+++ /dev/null
@@ -1,96 +0,0 @@
-
-import threading
-import queue
-import time
-import uuid
-import pyarrow as pa
-import pyarrow.parquet as pq
-
-class ParquetWriter:
-
-    def __init__(self, directory, file_template, rotation_time):
-        self.directory = directory
-        self.file_template = file_template
-        self.rotation_time = rotation_time
-
-        self.q = queue.Queue()
-
-        self.running = True
-
-        self.thread = threading.Thread(target=(self.writer_thread))
-        self.thread.start()
-
-    def writer_thread(self):
-
-        triples = []
-
-        timeout = None
-
-        while self.running:
-
-            try:
-
-                item = self.q.get(timeout=1)
-
-                if timeout == None:
-                    timeout = time.time() + self.rotation_time
-
-                triples.append(item)
-
-            except queue.Empty:
-                pass
-
-            if timeout:
-                if time.time() > timeout:
-
-                    self.write_file(triples)
-                    timeout = None
-                    triples = []
-
-    def write_file(self, triples):
-
-        try:
-
-            schema = pa.schema([
-                pa.field('s', pa.string()),
-                pa.field('p', pa.string()),
-                pa.field('o', pa.string()),
-            ])
-
-            fname = self.file_template.format(id=str(uuid.uuid4()))
-            path = f"{self.directory}/{fname}"
-
-            writer = pq.ParquetWriter(path, schema)
-
-            batch = pa.record_batch(
-                [
-                    [tpl[0] for tpl in triples],
-                    [tpl[1] for tpl in triples],
-                    [tpl[2] for tpl in triples],
-                ],
-                names=['s', 'p', 'o']
-            )
-
-            writer.write_batch(batch)
-
-            writer.close()
-
-            print(f"Wrote {path}.")
-
-        except Exception as e:
-
-            print("Parquet write:", e)
-
-    def write(self, s, p, o):
-        self.q.put((s, p, o))
-
-    def __del__(self):
-
-        self.running = False
-
-        if hasattr(self, "q"):
-            self.thread.join()
-
-            
-
-        

From 67d69b5285d437577d30b487977672fcb0b48834 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 13:05:56 +0000
Subject: [PATCH 30/37] Fixed a problem with the packages, api/__init__.py
 appeared in both (#196)

trustgraph-flow and trustgraph-base, moved the gateway stuff into a
different directory.
---
 trustgraph-flow/scripts/api-gateway                             | 2 +-
 trustgraph-flow/trustgraph/api/__init__.py                      | 0
 trustgraph-flow/trustgraph/{api => }/gateway/__init__.py        | 0
 trustgraph-flow/trustgraph/{api => }/gateway/__main__.py        | 0
 trustgraph-flow/trustgraph/{api => }/gateway/agent.py           | 0
 trustgraph-flow/trustgraph/{api => }/gateway/auth.py            | 0
 trustgraph-flow/trustgraph/{api => }/gateway/dbpedia.py         | 0
 trustgraph-flow/trustgraph/{api => }/gateway/embeddings.py      | 0
 trustgraph-flow/trustgraph/{api => }/gateway/encyclopedia.py    | 0
 trustgraph-flow/trustgraph/{api => }/gateway/endpoint.py        | 0
 .../trustgraph/{api => }/gateway/graph_embeddings_load.py       | 0
 .../trustgraph/{api => }/gateway/graph_embeddings_stream.py     | 0
 trustgraph-flow/trustgraph/{api => }/gateway/graph_rag.py       | 0
 trustgraph-flow/trustgraph/{api => }/gateway/internet_search.py | 0
 trustgraph-flow/trustgraph/{api => }/gateway/prompt.py          | 0
 trustgraph-flow/trustgraph/{api => }/gateway/publisher.py       | 0
 trustgraph-flow/trustgraph/{api => }/gateway/running.py         | 0
 trustgraph-flow/trustgraph/{api => }/gateway/serialize.py       | 0
 trustgraph-flow/trustgraph/{api => }/gateway/service.py         | 0
 trustgraph-flow/trustgraph/{api => }/gateway/socket.py          | 0
 trustgraph-flow/trustgraph/{api => }/gateway/subscriber.py      | 0
 trustgraph-flow/trustgraph/{api => }/gateway/text_completion.py | 0
 trustgraph-flow/trustgraph/{api => }/gateway/triples_load.py    | 0
 trustgraph-flow/trustgraph/{api => }/gateway/triples_query.py   | 0
 trustgraph-flow/trustgraph/{api => }/gateway/triples_stream.py  | 0
 25 files changed, 1 insertion(+), 1 deletion(-)
 delete mode 100644 trustgraph-flow/trustgraph/api/__init__.py
 rename trustgraph-flow/trustgraph/{api => }/gateway/__init__.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/__main__.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/agent.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/auth.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/dbpedia.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/embeddings.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/encyclopedia.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/endpoint.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/graph_embeddings_load.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/graph_embeddings_stream.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/graph_rag.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/internet_search.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/prompt.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/publisher.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/running.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/serialize.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/service.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/socket.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/subscriber.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/text_completion.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/triples_load.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/triples_query.py (100%)
 rename trustgraph-flow/trustgraph/{api => }/gateway/triples_stream.py (100%)

diff --git a/trustgraph-flow/scripts/api-gateway b/trustgraph-flow/scripts/api-gateway
index d28a5b8a..f7ba0fda 100755
--- a/trustgraph-flow/scripts/api-gateway
+++ b/trustgraph-flow/scripts/api-gateway
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from trustgraph.api.gateway import run
+from trustgraph.gateway import run
 
 run()
 
diff --git a/trustgraph-flow/trustgraph/api/__init__.py b/trustgraph-flow/trustgraph/api/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/trustgraph-flow/trustgraph/api/gateway/__init__.py b/trustgraph-flow/trustgraph/gateway/__init__.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/__init__.py
rename to trustgraph-flow/trustgraph/gateway/__init__.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/__main__.py b/trustgraph-flow/trustgraph/gateway/__main__.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/__main__.py
rename to trustgraph-flow/trustgraph/gateway/__main__.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/agent.py b/trustgraph-flow/trustgraph/gateway/agent.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/agent.py
rename to trustgraph-flow/trustgraph/gateway/agent.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/auth.py b/trustgraph-flow/trustgraph/gateway/auth.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/auth.py
rename to trustgraph-flow/trustgraph/gateway/auth.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/dbpedia.py b/trustgraph-flow/trustgraph/gateway/dbpedia.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/dbpedia.py
rename to trustgraph-flow/trustgraph/gateway/dbpedia.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/embeddings.py b/trustgraph-flow/trustgraph/gateway/embeddings.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/embeddings.py
rename to trustgraph-flow/trustgraph/gateway/embeddings.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/gateway/encyclopedia.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/encyclopedia.py
rename to trustgraph-flow/trustgraph/gateway/encyclopedia.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/endpoint.py b/trustgraph-flow/trustgraph/gateway/endpoint.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/endpoint.py
rename to trustgraph-flow/trustgraph/gateway/endpoint.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/graph_embeddings_load.py
rename to trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/graph_embeddings_stream.py
rename to trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/graph_rag.py b/trustgraph-flow/trustgraph/gateway/graph_rag.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/graph_rag.py
rename to trustgraph-flow/trustgraph/gateway/graph_rag.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/internet_search.py b/trustgraph-flow/trustgraph/gateway/internet_search.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/internet_search.py
rename to trustgraph-flow/trustgraph/gateway/internet_search.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/prompt.py b/trustgraph-flow/trustgraph/gateway/prompt.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/prompt.py
rename to trustgraph-flow/trustgraph/gateway/prompt.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/publisher.py b/trustgraph-flow/trustgraph/gateway/publisher.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/publisher.py
rename to trustgraph-flow/trustgraph/gateway/publisher.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/running.py b/trustgraph-flow/trustgraph/gateway/running.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/running.py
rename to trustgraph-flow/trustgraph/gateway/running.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/serialize.py b/trustgraph-flow/trustgraph/gateway/serialize.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/serialize.py
rename to trustgraph-flow/trustgraph/gateway/serialize.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/service.py
rename to trustgraph-flow/trustgraph/gateway/service.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/socket.py b/trustgraph-flow/trustgraph/gateway/socket.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/socket.py
rename to trustgraph-flow/trustgraph/gateway/socket.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/subscriber.py b/trustgraph-flow/trustgraph/gateway/subscriber.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/subscriber.py
rename to trustgraph-flow/trustgraph/gateway/subscriber.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/text_completion.py b/trustgraph-flow/trustgraph/gateway/text_completion.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/text_completion.py
rename to trustgraph-flow/trustgraph/gateway/text_completion.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_load.py b/trustgraph-flow/trustgraph/gateway/triples_load.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/triples_load.py
rename to trustgraph-flow/trustgraph/gateway/triples_load.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_query.py b/trustgraph-flow/trustgraph/gateway/triples_query.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/triples_query.py
rename to trustgraph-flow/trustgraph/gateway/triples_query.py
diff --git a/trustgraph-flow/trustgraph/api/gateway/triples_stream.py b/trustgraph-flow/trustgraph/gateway/triples_stream.py
similarity index 100%
rename from trustgraph-flow/trustgraph/api/gateway/triples_stream.py
rename to trustgraph-flow/trustgraph/gateway/triples_stream.py

From 55c5c398b60c4bbfa7b8d563e8d205a886b8edba Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 13:37:44 +0000
Subject: [PATCH 31/37] Fix relative imports (#197)

---
 trustgraph-flow/trustgraph/gateway/agent.py                 | 6 +++---
 trustgraph-flow/trustgraph/gateway/dbpedia.py               | 6 +++---
 trustgraph-flow/trustgraph/gateway/embeddings.py            | 6 +++---
 trustgraph-flow/trustgraph/gateway/encyclopedia.py          | 6 +++---
 trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py | 6 +++---
 .../trustgraph/gateway/graph_embeddings_stream.py           | 4 ++--
 trustgraph-flow/trustgraph/gateway/graph_rag.py             | 6 +++---
 trustgraph-flow/trustgraph/gateway/internet_search.py       | 6 +++---
 trustgraph-flow/trustgraph/gateway/prompt.py                | 6 +++---
 trustgraph-flow/trustgraph/gateway/serialize.py             | 2 +-
 trustgraph-flow/trustgraph/gateway/service.py               | 6 +++---
 trustgraph-flow/trustgraph/gateway/text_completion.py       | 6 +++---
 trustgraph-flow/trustgraph/gateway/triples_load.py          | 6 +++---
 trustgraph-flow/trustgraph/gateway/triples_query.py         | 6 +++---
 trustgraph-flow/trustgraph/gateway/triples_stream.py        | 4 ++--
 15 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/trustgraph-flow/trustgraph/gateway/agent.py b/trustgraph-flow/trustgraph/gateway/agent.py
index 40586133..e8fd0e72 100644
--- a/trustgraph-flow/trustgraph/gateway/agent.py
+++ b/trustgraph-flow/trustgraph/gateway/agent.py
@@ -1,7 +1,7 @@
 
-from ... schema import AgentRequest, AgentResponse
-from ... schema import agent_request_queue
-from ... schema import agent_response_queue
+from .. schema import AgentRequest, AgentResponse
+from .. schema import agent_request_queue
+from .. schema import agent_response_queue
 
 from . endpoint import MultiResponseServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/dbpedia.py b/trustgraph-flow/trustgraph/gateway/dbpedia.py
index 4fa7336b..a61292a6 100644
--- a/trustgraph-flow/trustgraph/gateway/dbpedia.py
+++ b/trustgraph-flow/trustgraph/gateway/dbpedia.py
@@ -1,7 +1,7 @@
 
-from ... schema import LookupRequest, LookupResponse
-from ... schema import dbpedia_lookup_request_queue
-from ... schema import dbpedia_lookup_response_queue
+from .. schema import LookupRequest, LookupResponse
+from .. schema import dbpedia_lookup_request_queue
+from .. schema import dbpedia_lookup_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/embeddings.py b/trustgraph-flow/trustgraph/gateway/embeddings.py
index 7c4b578d..6d3a9fe6 100644
--- a/trustgraph-flow/trustgraph/gateway/embeddings.py
+++ b/trustgraph-flow/trustgraph/gateway/embeddings.py
@@ -1,7 +1,7 @@
 
-from ... schema import EmbeddingsRequest, EmbeddingsResponse
-from ... schema import embeddings_request_queue
-from ... schema import embeddings_response_queue
+from .. schema import EmbeddingsRequest, EmbeddingsResponse
+from .. schema import embeddings_request_queue
+from .. schema import embeddings_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/gateway/encyclopedia.py
index c6041cb2..32eb5cd1 100644
--- a/trustgraph-flow/trustgraph/gateway/encyclopedia.py
+++ b/trustgraph-flow/trustgraph/gateway/encyclopedia.py
@@ -1,7 +1,7 @@
 
-from ... schema import LookupRequest, LookupResponse
-from ... schema import encyclopedia_lookup_request_queue
-from ... schema import encyclopedia_lookup_response_queue
+from .. schema import LookupRequest, LookupResponse
+from .. schema import encyclopedia_lookup_request_queue
+from .. schema import encyclopedia_lookup_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
index 81fb6647..18a2e6fe 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
+++ b/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
@@ -4,9 +4,9 @@ from pulsar.schema import JsonSchema
 import uuid
 from aiohttp import WSMsgType
 
-from ... schema import Metadata
-from ... schema import GraphEmbeddings
-from ... schema import graph_embeddings_store_queue
+from .. schema import Metadata
+from .. schema import GraphEmbeddings
+from .. schema import graph_embeddings_store_queue
 
 from . publisher import Publisher
 from . socket import SocketEndpoint
diff --git a/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
index 3d4efd45..f0b4dd86 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
+++ b/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
@@ -4,8 +4,8 @@ import queue
 from pulsar.schema import JsonSchema
 import uuid
 
-from ... schema import GraphEmbeddings
-from ... schema import graph_embeddings_store_queue
+from .. schema import GraphEmbeddings
+from .. schema import graph_embeddings_store_queue
 
 from . subscriber import Subscriber
 from . socket import SocketEndpoint
diff --git a/trustgraph-flow/trustgraph/gateway/graph_rag.py b/trustgraph-flow/trustgraph/gateway/graph_rag.py
index d33090ca..58679004 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_rag.py
+++ b/trustgraph-flow/trustgraph/gateway/graph_rag.py
@@ -1,7 +1,7 @@
 
-from ... schema import GraphRagQuery, GraphRagResponse
-from ... schema import graph_rag_request_queue
-from ... schema import graph_rag_response_queue
+from .. schema import GraphRagQuery, GraphRagResponse
+from .. schema import graph_rag_request_queue
+from .. schema import graph_rag_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/internet_search.py b/trustgraph-flow/trustgraph/gateway/internet_search.py
index f55a4a3e..5a5dc948 100644
--- a/trustgraph-flow/trustgraph/gateway/internet_search.py
+++ b/trustgraph-flow/trustgraph/gateway/internet_search.py
@@ -1,7 +1,7 @@
 
-from ... schema import LookupRequest, LookupResponse
-from ... schema import internet_search_request_queue
-from ... schema import internet_search_response_queue
+from .. schema import LookupRequest, LookupResponse
+from .. schema import internet_search_request_queue
+from .. schema import internet_search_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/prompt.py b/trustgraph-flow/trustgraph/gateway/prompt.py
index d19005bc..f09a0e0e 100644
--- a/trustgraph-flow/trustgraph/gateway/prompt.py
+++ b/trustgraph-flow/trustgraph/gateway/prompt.py
@@ -1,9 +1,9 @@
 
 import json
 
-from ... schema import PromptRequest, PromptResponse
-from ... schema import prompt_request_queue
-from ... schema import prompt_response_queue
+from .. schema import PromptRequest, PromptResponse
+from .. schema import prompt_request_queue
+from .. schema import prompt_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/serialize.py b/trustgraph-flow/trustgraph/gateway/serialize.py
index 2b955645..35932382 100644
--- a/trustgraph-flow/trustgraph/gateway/serialize.py
+++ b/trustgraph-flow/trustgraph/gateway/serialize.py
@@ -1,4 +1,4 @@
-from ... schema import Value, Triple
+from .. schema import Value, Triple
 
 def to_value(x):
     return Value(value=x["v"], is_uri=x["e"])
diff --git a/trustgraph-flow/trustgraph/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py
index faa250dc..e927ecf6 100755
--- a/trustgraph-flow/trustgraph/gateway/service.py
+++ b/trustgraph-flow/trustgraph/gateway/service.py
@@ -22,10 +22,10 @@ import pulsar
 from pulsar.schema import JsonSchema
 from prometheus_client import start_http_server
 
-from ... log_level import LogLevel
+from .. log_level import LogLevel
 
-from ... schema import Metadata, Document, TextDocument
-from ... schema import document_ingest_queue, text_ingest_queue
+from .. schema import Metadata, Document, TextDocument
+from .. schema import document_ingest_queue, text_ingest_queue
 
 from . serialize import to_subgraph
 from . running import Running
diff --git a/trustgraph-flow/trustgraph/gateway/text_completion.py b/trustgraph-flow/trustgraph/gateway/text_completion.py
index d9f69b7e..d59737f0 100644
--- a/trustgraph-flow/trustgraph/gateway/text_completion.py
+++ b/trustgraph-flow/trustgraph/gateway/text_completion.py
@@ -1,7 +1,7 @@
 
-from ... schema import TextCompletionRequest, TextCompletionResponse
-from ... schema import text_completion_request_queue
-from ... schema import text_completion_response_queue
+from .. schema import TextCompletionRequest, TextCompletionResponse
+from .. schema import text_completion_request_queue
+from .. schema import text_completion_response_queue
 
 from . endpoint import ServiceEndpoint
 
diff --git a/trustgraph-flow/trustgraph/gateway/triples_load.py b/trustgraph-flow/trustgraph/gateway/triples_load.py
index dbb3e617..2689f3ad 100644
--- a/trustgraph-flow/trustgraph/gateway/triples_load.py
+++ b/trustgraph-flow/trustgraph/gateway/triples_load.py
@@ -4,9 +4,9 @@ from pulsar.schema import JsonSchema
 import uuid
 from aiohttp import WSMsgType
 
-from ... schema import Metadata
-from ... schema import Triples
-from ... schema import triples_store_queue
+from .. schema import Metadata
+from .. schema import Triples
+from .. schema import triples_store_queue
 
 from . publisher import Publisher
 from . socket import SocketEndpoint
diff --git a/trustgraph-flow/trustgraph/gateway/triples_query.py b/trustgraph-flow/trustgraph/gateway/triples_query.py
index 9c5939c8..5a0cfff8 100644
--- a/trustgraph-flow/trustgraph/gateway/triples_query.py
+++ b/trustgraph-flow/trustgraph/gateway/triples_query.py
@@ -1,7 +1,7 @@
 
-from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
-from ... schema import triples_request_queue
-from ... schema import triples_response_queue
+from .. schema import TriplesQueryRequest, TriplesQueryResponse, Triples
+from .. schema import triples_request_queue
+from .. schema import triples_response_queue
 
 from . endpoint import ServiceEndpoint
 from . serialize import to_value, serialize_subgraph
diff --git a/trustgraph-flow/trustgraph/gateway/triples_stream.py b/trustgraph-flow/trustgraph/gateway/triples_stream.py
index 4638e08d..92ada132 100644
--- a/trustgraph-flow/trustgraph/gateway/triples_stream.py
+++ b/trustgraph-flow/trustgraph/gateway/triples_stream.py
@@ -4,8 +4,8 @@ import queue
 from pulsar.schema import JsonSchema
 import uuid
 
-from ... schema import Triples
-from ... schema import triples_store_queue
+from .. schema import Triples
+from .. schema import triples_store_queue
 
 from . subscriber import Subscriber
 from . socket import SocketEndpoint

From fd3db3c925bf7ef28ef795291627ebc885edf7d2 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 15:16:09 +0000
Subject: [PATCH 32/37] Feature/tidy kg load save (#198)

* Clean exit on ctrl-C
* More functionality in dump
* Dump some metadata
---
 test-api/test-llm-api                  |  3 +
 trustgraph-cli/scripts/tg-dump-msgpack | 56 +++++++++++++++++-
 trustgraph-cli/scripts/tg-load-kg-core | 77 +++++++++++++++++++------
 trustgraph-cli/scripts/tg-save-kg-core | 78 ++++++++++++++++++++------
 4 files changed, 177 insertions(+), 37 deletions(-)

diff --git a/test-api/test-llm-api b/test-api/test-llm-api
index c33c6634..6bee2048 100755
--- a/test-api/test-llm-api
+++ b/test-api/test-llm-api
@@ -19,6 +19,9 @@ resp = requests.post(
     json=input,
 )
 
+if resp.status_code != 200:
+    raise RuntimeError(f"Status code: {resp.status_code}")
+
 resp = resp.json()
 
 if "error" in resp:
diff --git a/trustgraph-cli/scripts/tg-dump-msgpack b/trustgraph-cli/scripts/tg-dump-msgpack
index 2be950db..18819649 100755
--- a/trustgraph-cli/scripts/tg-dump-msgpack
+++ b/trustgraph-cli/scripts/tg-dump-msgpack
@@ -10,7 +10,7 @@ import msgpack
 import sys
 import argparse
 
-def run(input_file):
+def dump(input_file, action):
 
     with open(input_file, 'rb') as f:
 
@@ -19,6 +19,43 @@ def run(input_file):
         for unpacked in unpacker:
             print(unpacked)
 
+def summary(input_file, action):
+
+    vector_dim = None
+
+    triples = set()
+
+    max_records = 1000000
+
+    with open(input_file, 'rb') as f:
+
+        unpacker = msgpack.Unpacker(f, raw=False)
+
+        rec_count = 0
+
+        for msg in unpacker:
+
+            if msg[0] == "ge":
+                vector_dim = len(msg[1]["v"][0])
+
+            if msg[0] == "t":
+
+                for elt in msg[1]["m"]["m"]:
+                    triples.add((
+                        elt["s"]["v"],
+                        elt["p"]["v"],
+                        elt["o"]["v"],
+                    ))
+
+            if rec_count > max_records: break
+            rec_count += 1
+
+    print("Vector dimension:", vector_dim)
+
+    for t in triples:
+        if t[1] == "http://www.w3.org/2000/01/rdf-schema#label":
+            print("-", t[2])
+
 def main():
     
     parser = argparse.ArgumentParser(
@@ -32,9 +69,24 @@ def main():
         help=f'Input file'
     )
 
+    parser.add_argument(
+        '-s', '--summary', action="store_const", const="summary",
+        dest="action",
+        help=f'Show a summary'
+    )
+
+    parser.add_argument(
+        '-r', '--records', action="store_const", const="records",
+        dest="action",
+        help=f'Dump individual records'
+    )
+
     args = parser.parse_args()
 
-    run(**vars(args))
+    if args.action == "summary":
+        summary(**vars(args))
+    else:
+        dump(**vars(args))
 
 main()
 
diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index 4e207cf1..5c2ae140 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -12,16 +12,25 @@ import json
 import sys
 import argparse
 import os
+import signal
 
-async def load_ge(queue, url):
+class Running:
+    def __init__(self): self.running = True
+    def get(self): return self.running
+    def stop(self): self.running = False
+
+async def load_ge(running, queue, url):
 
     async with aiohttp.ClientSession() as session:
 
         async with session.ws_connect(f"{url}load/graph-embeddings") as ws:
 
-            while True:
+            while running.get():
 
-                msg = await queue.get()
+                try:
+                    msg = await asyncio.wait_for(queue.get(), 1)
+                except TimeoutError:
+                    continue
 
                 msg = {
                     "metadata": {
@@ -36,13 +45,18 @@ async def load_ge(queue, url):
 
                 await ws.send_json(msg)
 
-async def load_triples(queue, url):
+async def load_triples(running, queue, url):
+
     async with aiohttp.ClientSession() as session:
+
         async with session.ws_connect(f"{url}load/triples") as ws:
 
-            while True:
+            while running.get():
 
-                msg = await queue.get()
+                try:
+                    msg = await asyncio.wait_for(queue.get(), 1)
+                except TimeoutError:
+                    continue
 
                 msg ={
                     "metadata": {
@@ -59,18 +73,18 @@ async def load_triples(queue, url):
 ge_counts = 0
 t_counts = 0
 
-async def stats():
+async def stats(running):
 
     global t_counts
     global ge_counts
 
-    while True:
-        await asyncio.sleep(5)
+    while running.get():
+        await asyncio.sleep(2)
         print(
             f"Graph embeddings: {ge_counts:10d}  Triples: {t_counts:10d}"
         )
 
-async def loader(ge_queue, t_queue, path, format, user, collection):
+async def loader(running, ge_queue, t_queue, path, format, user, collection):
 
     global t_counts
     global ge_counts
@@ -85,7 +99,12 @@ async def loader(ge_queue, t_queue, path, format, user, collection):
 
             unpacker = msgpack.Unpacker(f, raw=False)
 
-            for unpacked in unpacker:
+            while running.get():
+
+                try:
+                    unpacked = unpacker.unpack()
+                except:
+                    break
 
                 if user:
                     unpacked["metadata"]["user"] = user
@@ -94,14 +113,25 @@ async def loader(ge_queue, t_queue, path, format, user, collection):
                     unpacked["metadata"]["collection"] = collection
 
                 if unpacked[0] == "t":
-                    await t_queue.put(unpacked[1])
+                    qtype = t_queue
                     t_counts += 1
                 else:
                     if unpacked[0] == "ge":
-                        await ge_queue.put(unpacked[1])
+                        qtype = ge_queue
                         ge_counts += 1
 
-async def run(**args):
+                while running.get():
+
+                    try:
+                        await asyncio.wait_for(qtype.put(unpacked[1]), 0.5)
+                    except TimeoutError:
+                        continue
+
+                if not running.get(): break
+
+            running.stop()
+            
+async def run(running, **args):
 
     # Maxsize on queues reduces back-pressure so tg-load-kg-core doesn't
     # grow to eat all memory
@@ -110,6 +140,7 @@ async def run(**args):
 
     load_task = asyncio.create_task(
         loader(
+            running=running,
             ge_queue=ge_q, t_queue=t_q,
             path=args["input_file"], format=args["format"],
             user=args["user"], collection=args["collection"],
@@ -119,24 +150,26 @@ async def run(**args):
 
     ge_task = asyncio.create_task(
         load_ge(
+            running=running,
             queue=ge_q, url=args["url"] + "api/v1/"
         )
     )
 
     triples_task = asyncio.create_task(
         load_triples(
+            running=running,
             queue=t_q, url=args["url"] + "api/v1/"
         )
     )
 
-    stats_task = asyncio.create_task(stats())
+    stats_task = asyncio.create_task(stats(running))
 
     await load_task
     await triples_task
     await ge_task
     await stats_task
 
-async def main():
+async def main(running):
     
     parser = argparse.ArgumentParser(
         prog='tg-load-kg-core',
@@ -179,7 +212,15 @@ async def main():
 
     args = parser.parse_args()
 
-    await run(**vars(args))
+    await run(running, **vars(args))
 
-asyncio.run(main())
+running = Running()
+
+def interrupt(sig, frame):
+    running.stop()
+    print('Interrupt')
+
+signal.signal(signal.SIGINT, interrupt)
+
+asyncio.run(main(running))
 
diff --git a/trustgraph-cli/scripts/tg-save-kg-core b/trustgraph-cli/scripts/tg-save-kg-core
index 3c03383f..f2509dba 100755
--- a/trustgraph-cli/scripts/tg-save-kg-core
+++ b/trustgraph-cli/scripts/tg-save-kg-core
@@ -16,11 +16,26 @@ import json
 import sys
 import argparse
 import os
+import signal
+
+class Running:
+    def __init__(self): self.running = True
+    def get(self): return self.running
+    def stop(self): self.running = False
+
+async def fetch_ge(running, queue, user, collection, url):
 
-async def fetch_ge(queue, user, collection, url):
     async with aiohttp.ClientSession() as session:
+
         async with session.ws_connect(f"{url}stream/graph-embeddings") as ws:
-            async for msg in ws:
+
+            while running.get():
+
+                try:
+                    msg = await asyncio.wait_for(ws.receive(), 1)
+                except:
+                    continue
+
                 if msg.type == aiohttp.WSMsgType.TEXT:
 
                     data = msg.json()
@@ -50,10 +65,19 @@ async def fetch_ge(queue, user, collection, url):
                     print("Error")
                     break
 
-async def fetch_triples(queue, user, collection, url):
+async def fetch_triples(running, queue, user, collection, url):
+
     async with aiohttp.ClientSession() as session:
+
         async with session.ws_connect(f"{url}stream/triples") as ws:
-            async for msg in ws:
+
+            while running.get():
+
+                try:
+                    msg = await asyncio.wait_for(ws.receive(), 1)
+                except:
+                    continue
+
                 if msg.type == aiohttp.WSMsgType.TEXT:
 
                     data = msg.json()
@@ -85,27 +109,32 @@ async def fetch_triples(queue, user, collection, url):
 ge_counts = 0
 t_counts = 0
 
-async def stats():
+async def stats(running):
 
     global t_counts
     global ge_counts
 
-    while True:
-        await asyncio.sleep(5)
+    while running.get():
+
+        await asyncio.sleep(2)
+
         print(
             f"Graph embeddings: {ge_counts:10d}  Triples: {t_counts:10d}"
         )
 
-async def output(queue, path, format):
+async def output(running, queue, path, format):
 
     global t_counts
     global ge_counts
     
     with open(path, "wb") as f:
 
-        while True:
+        while running.get():
 
-            msg = await queue.get()
+            try:
+                msg = await asyncio.wait_for(queue.get(), 0.5)
+            except TimeoutError:
+                continue
 
             if format == "msgpack":
                 f.write(msgpack.packb(msg, use_bin_type=True))
@@ -118,12 +147,15 @@ async def output(queue, path, format):
                 if msg[0] == "ge":
                     ge_counts += 1
 
-async def run(**args):
+    print("Output file closed")
+
+async def run(running, **args):
 
     q = asyncio.Queue()
 
     ge_task = asyncio.create_task(
         fetch_ge(
+            running=running,
             queue=q, user=args["user"], collection=args["collection"],
             url=args["url"] + "api/v1/"
         )
@@ -131,26 +163,30 @@ async def run(**args):
 
     triples_task = asyncio.create_task(
         fetch_triples(
-            queue=q, user=args["user"], collection=args["collection"],
+            running=running, queue=q,
+            user=args["user"], collection=args["collection"],
             url=args["url"] + "api/v1/"
         )
     )
 
     output_task = asyncio.create_task(
         output(
-            queue=q, path=args["output_file"], format=args["format"],
+            running=running, queue=q,
+            path=args["output_file"], format=args["format"],
         )
         
     )
 
-    stats_task = asyncio.create_task(stats())
+    stats_task = asyncio.create_task(stats(running))
 
     await output_task
     await triples_task
     await ge_task
     await stats_task
 
-async def main():
+    print("Exiting")
+
+async def main(running):
     
     parser = argparse.ArgumentParser(
         prog='tg-save-kg-core',
@@ -193,7 +229,15 @@ async def main():
 
     args = parser.parse_args()
 
-    await run(**vars(args))
+    await run(running, **vars(args))
 
-asyncio.run(main())
+running = Running()
+
+def interrupt(sig, frame):
+    running.stop()
+    print('Interrupt')
+
+signal.signal(signal.SIGINT, interrupt)
+
+asyncio.run(main(running))
 

From 656dcb22a92a77a1effda9530d3cb1c72c13199d Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Fri, 6 Dec 2024 23:56:10 +0000
Subject: [PATCH 33/37] Feature/general websocket (#199)

* Split API endpoint into endpoint and requestor
* Service/endpoint separation
* Call out to multiple services working
* Add ID field
* Add mux service on websocket, calls out to all services
---
 trustgraph-flow/trustgraph/gateway/agent.py   |  27 ++--
 trustgraph-flow/trustgraph/gateway/dbpedia.py |   9 +-
 .../trustgraph/gateway/embeddings.py          |  11 +-
 .../trustgraph/gateway/encyclopedia.py        |   9 +-
 .../trustgraph/gateway/endpoint.py            | 115 ++---------------
 .../trustgraph/gateway/graph_rag.py           |   9 +-
 .../trustgraph/gateway/internet_search.py     |   9 +-
 trustgraph-flow/trustgraph/gateway/mux.py     |  94 ++++++++++++++
 trustgraph-flow/trustgraph/gateway/prompt.py  |  11 +-
 .../trustgraph/gateway/requestor.py           |  88 +++++++++++++
 trustgraph-flow/trustgraph/gateway/service.py | 122 ++++++++++++------
 .../trustgraph/gateway/text_completion.py     |  10 +-
 .../trustgraph/gateway/triples_query.py       |   9 +-
 13 files changed, 330 insertions(+), 193 deletions(-)
 create mode 100644 trustgraph-flow/trustgraph/gateway/mux.py
 create mode 100644 trustgraph-flow/trustgraph/gateway/requestor.py

diff --git a/trustgraph-flow/trustgraph/gateway/agent.py b/trustgraph-flow/trustgraph/gateway/agent.py
index e8fd0e72..c7af947b 100644
--- a/trustgraph-flow/trustgraph/gateway/agent.py
+++ b/trustgraph-flow/trustgraph/gateway/agent.py
@@ -3,20 +3,19 @@ from .. schema import AgentRequest, AgentResponse
 from .. schema import agent_request_queue
 from .. schema import agent_response_queue
 
-from . endpoint import MultiResponseServiceEndpoint
+from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class AgentEndpoint(MultiResponseServiceEndpoint):
+class AgentRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(AgentEndpoint, self).__init__(
+        super(AgentRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=agent_request_queue,
             response_queue=agent_response_queue,
             request_schema=AgentRequest,
             response_schema=AgentResponse,
-            endpoint_path="/api/v1/agent",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -25,7 +24,19 @@ class AgentEndpoint(MultiResponseServiceEndpoint):
         )
 
     def from_response(self, message):
+        resp = {
+        }
+
         if message.answer:
-            return { "answer": message.answer }, True
-        else:
-            return {}, False
+            resp["answer"] = message.answer
+
+        if message.thought:
+            resp["thought"] = message.thought
+
+        if message.observation:
+            resp["observation"] = message.observation
+
+        # The 2nd boolean expression indicates whether we're done responding
+        return resp, (message.answer is not None)
+
+    
diff --git a/trustgraph-flow/trustgraph/gateway/dbpedia.py b/trustgraph-flow/trustgraph/gateway/dbpedia.py
index a61292a6..8ae4f695 100644
--- a/trustgraph-flow/trustgraph/gateway/dbpedia.py
+++ b/trustgraph-flow/trustgraph/gateway/dbpedia.py
@@ -4,19 +4,18 @@ from .. schema import dbpedia_lookup_request_queue
 from .. schema import dbpedia_lookup_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class DbpediaEndpoint(ServiceEndpoint):
+class DbpediaRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(DbpediaEndpoint, self).__init__(
+        super(DbpediaRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=dbpedia_lookup_request_queue,
             response_queue=dbpedia_lookup_response_queue,
             request_schema=LookupRequest,
             response_schema=LookupResponse,
-            endpoint_path="/api/v1/dbpedia",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -26,5 +25,5 @@ class DbpediaEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "text": message.text }
+        return { "text": message.text }, True
 
diff --git a/trustgraph-flow/trustgraph/gateway/embeddings.py b/trustgraph-flow/trustgraph/gateway/embeddings.py
index 6d3a9fe6..d0f3e1ef 100644
--- a/trustgraph-flow/trustgraph/gateway/embeddings.py
+++ b/trustgraph-flow/trustgraph/gateway/embeddings.py
@@ -4,19 +4,18 @@ from .. schema import embeddings_request_queue
 from .. schema import embeddings_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class EmbeddingsEndpoint(ServiceEndpoint):
+class EmbeddingsRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(EmbeddingsEndpoint, self).__init__(
+        super(EmbeddingsRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=embeddings_request_queue,
             response_queue=embeddings_response_queue,
             request_schema=EmbeddingsRequest,
             response_schema=EmbeddingsResponse,
-            endpoint_path="/api/v1/embeddings",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -25,4 +24,6 @@ class EmbeddingsEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "vectors": message.vectors }
+        return { "vectors": message.vectors }, True
+
+
diff --git a/trustgraph-flow/trustgraph/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/gateway/encyclopedia.py
index 32eb5cd1..3f4dad79 100644
--- a/trustgraph-flow/trustgraph/gateway/encyclopedia.py
+++ b/trustgraph-flow/trustgraph/gateway/encyclopedia.py
@@ -4,19 +4,18 @@ from .. schema import encyclopedia_lookup_request_queue
 from .. schema import encyclopedia_lookup_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class EncyclopediaEndpoint(ServiceEndpoint):
+class EncyclopediaRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(EncyclopediaEndpoint, self).__init__(
+        super(EncyclopediaRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=encyclopedia_lookup_request_queue,
             response_queue=encyclopedia_lookup_response_queue,
             request_schema=LookupRequest,
             response_schema=LookupResponse,
-            endpoint_path="/api/v1/encyclopedia",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -26,5 +25,5 @@ class EncyclopediaEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "text": message.text }
+        return { "text": message.text }, True
 
diff --git a/trustgraph-flow/trustgraph/gateway/endpoint.py b/trustgraph-flow/trustgraph/gateway/endpoint.py
index 2b246361..6d6ca8d5 100644
--- a/trustgraph-flow/trustgraph/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/gateway/endpoint.py
@@ -13,38 +13,17 @@ logger.setLevel(logging.INFO)
 
 class ServiceEndpoint:
 
-    def __init__(
-            self,
-            pulsar_host,
-            request_queue, request_schema,
-            response_queue, response_schema,
-            endpoint_path,
-            auth,
-            subscription="api-gateway", consumer_name="api-gateway",
-            timeout=600,
-    ):
-
-        self.pub = Publisher(
-            pulsar_host, request_queue,
-            schema=JsonSchema(request_schema)
-        )
-
-        self.sub = Subscriber(
-            pulsar_host, response_queue,
-            subscription, consumer_name,
-            JsonSchema(response_schema)
-        )
+    def __init__(self, endpoint_path, auth, requestor):
 
         self.path = endpoint_path
-        self.timeout = timeout
-        self.auth = auth
 
+        self.auth = auth
         self.operation = "service"
 
-    async def start(self):
+        self.requestor = requestor
 
-        self.pub.start()
-        self.sub.start()
+    async def start(self):
+        await self.requestor.start()
 
     def add_routes(self, app):
 
@@ -52,16 +31,8 @@ class ServiceEndpoint:
             web.post(self.path, self.handle),
         ])
 
-    def to_request(self, request):
-        raise RuntimeError("Not defined")
-
-    def from_response(self, response):
-        raise RuntimeError("Not defined")
-
     async def handle(self, request):
 
-        id = str(uuid.uuid4())
-
         print(request.path, "...")
 
         try:
@@ -82,28 +53,12 @@ class ServiceEndpoint:
 
             print(data)
 
-            q = self.sub.subscribe(id)
+            def responder(x, fin):
+                print(x)
 
-            await asyncio.to_thread(
-                self.pub.send, id, self.to_request(data)
-            )
+            resp, fin = await self.requestor.process(data, responder)
 
-            try:
-                resp = await asyncio.to_thread(q.get, timeout=self.timeout)
-            except Exception as e:
-                raise RuntimeError("Timeout")
-
-            print(resp)
-
-            if resp.error:
-                print("Error")
-                return web.json_response(
-                    { "error": resp.error.message }
-                )
-
-            return web.json_response(
-                self.from_response(resp)
-            )
+            return web.json_response(resp)
 
         except Exception as e:
             logging.error(f"Exception: {e}")
@@ -112,55 +67,3 @@ class ServiceEndpoint:
                 { "error": str(e) }
             )
 
-        finally:
-            self.sub.unsubscribe(id)
-
-
-class MultiResponseServiceEndpoint(ServiceEndpoint):
-
-    async def handle(self, request):
-
-        id = str(uuid.uuid4())
-
-        try:
-
-            data = await request.json()
-
-            q = self.sub.subscribe(id)
-
-            await asyncio.to_thread(
-                self.pub.send, id, self.to_request(data)
-            )
-
-            # Keeps looking at responses...
-
-            while True:
-
-                try:
-                    resp = await asyncio.to_thread(q.get, timeout=self.timeout)
-                except Exception as e:
-                    raise RuntimeError("Timeout waiting for response")
-
-                if resp.error:
-                    return web.json_response(
-                        { "error": resp.error.message }
-                    )
-
-                # Until from_response says we have a finished answer
-                resp, fin = self.from_response(resp)
-
-
-                if fin:
-                    return web.json_response(resp)
-
-                # Not finished, so loop round and continue
-
-        except Exception as e:
-            logging.error(f"Exception: {e}")
-
-            return web.json_response(
-                { "error": str(e) }
-            )
-
-        finally:
-            self.sub.unsubscribe(id)
diff --git a/trustgraph-flow/trustgraph/gateway/graph_rag.py b/trustgraph-flow/trustgraph/gateway/graph_rag.py
index 58679004..55fd5d2f 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_rag.py
+++ b/trustgraph-flow/trustgraph/gateway/graph_rag.py
@@ -4,19 +4,18 @@ from .. schema import graph_rag_request_queue
 from .. schema import graph_rag_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class GraphRagEndpoint(ServiceEndpoint):
+class GraphRagRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(GraphRagEndpoint, self).__init__(
+        super(GraphRagRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=graph_rag_request_queue,
             response_queue=graph_rag_response_queue,
             request_schema=GraphRagQuery,
             response_schema=GraphRagResponse,
-            endpoint_path="/api/v1/graph-rag",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -27,5 +26,5 @@ class GraphRagEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "response": message.response }
+        return { "response": message.response }, True
 
diff --git a/trustgraph-flow/trustgraph/gateway/internet_search.py b/trustgraph-flow/trustgraph/gateway/internet_search.py
index 5a5dc948..127cd5d1 100644
--- a/trustgraph-flow/trustgraph/gateway/internet_search.py
+++ b/trustgraph-flow/trustgraph/gateway/internet_search.py
@@ -4,19 +4,18 @@ from .. schema import internet_search_request_queue
 from .. schema import internet_search_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class InternetSearchEndpoint(ServiceEndpoint):
+class InternetSearchRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(InternetSearchEndpoint, self).__init__(
+        super(InternetSearchRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=internet_search_request_queue,
             response_queue=internet_search_response_queue,
             request_schema=LookupRequest,
             response_schema=LookupResponse,
-            endpoint_path="/api/v1/internet-search",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -26,5 +25,5 @@ class InternetSearchEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "text": message.text }
+        return { "text": message.text }, True
 
diff --git a/trustgraph-flow/trustgraph/gateway/mux.py b/trustgraph-flow/trustgraph/gateway/mux.py
new file mode 100644
index 00000000..cd5ddfba
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/mux.py
@@ -0,0 +1,94 @@
+
+import asyncio
+import queue
+from pulsar.schema import JsonSchema
+import uuid
+from aiohttp import web, WSMsgType
+
+from . socket import SocketEndpoint
+from . text_completion import TextCompletionRequestor
+
+class MuxEndpoint(SocketEndpoint):
+
+    def __init__(
+            self, pulsar_host, auth,
+            services,
+            path="/api/v1/mux",
+    ):
+
+        super(MuxEndpoint, self).__init__(
+            endpoint_path=path, auth=auth,
+        )
+
+        self.q = asyncio.Queue(maxsize=10)
+
+        self.services = services
+
+    async def start(self):
+        pass
+
+    async def async_thread(self, ws, running):
+
+        while running.get():
+
+            try:
+                id, svc, request = await asyncio.wait_for(self.q.get(), 1)
+            except TimeoutError:
+                continue
+            except Exception as e:
+                await ws.send_json({"id": id, "error": str(e)})
+
+            try:
+
+                print(svc, request)
+
+                requestor = self.services[svc]
+
+                async def responder(resp, fin):
+                    await ws.send_json({
+                        "id": id,
+                        "response": resp,
+                        "complete": fin,
+                    })
+
+                resp = await requestor.process(request, responder)
+
+            except Exception as e:
+
+                await ws.send_json({"error": str(e)})
+
+        running.stop()
+
+    async def listener(self, ws, running):
+        
+        async for msg in ws:
+
+            # On error, finish
+            if msg.type == WSMsgType.ERROR:
+                break
+            else:
+
+                try:
+
+                    data = msg.json()
+
+                    if data["service"] not in self.services:
+                        raise RuntimeError("Bad service")
+
+                    if "request" not in data:
+                        raise RuntimeError("Bad message")
+
+                    if "id" not in data:
+                        raise RuntimeError("Bad message")
+
+                    await self.q.put(
+                        (data["id"], data["service"], data["request"])
+                    )
+
+                except Exception as e:
+
+                    await ws.send_json({"error": str(e)})
+                    continue
+
+        running.stop()
+
diff --git a/trustgraph-flow/trustgraph/gateway/prompt.py b/trustgraph-flow/trustgraph/gateway/prompt.py
index f09a0e0e..080d5618 100644
--- a/trustgraph-flow/trustgraph/gateway/prompt.py
+++ b/trustgraph-flow/trustgraph/gateway/prompt.py
@@ -6,19 +6,18 @@ from .. schema import prompt_request_queue
 from .. schema import prompt_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class PromptEndpoint(ServiceEndpoint):
+class PromptRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(PromptEndpoint, self).__init__(
+        super(PromptRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=prompt_request_queue,
             response_queue=prompt_response_queue,
             request_schema=PromptRequest,
             response_schema=PromptResponse,
-            endpoint_path="/api/v1/prompt",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -34,9 +33,9 @@ class PromptEndpoint(ServiceEndpoint):
         if message.object:
             return {
                 "object": message.object
-            }
+            }, True
         else:
             return {
                 "text": message.text
-            }
+            }, True
 
diff --git a/trustgraph-flow/trustgraph/gateway/requestor.py b/trustgraph-flow/trustgraph/gateway/requestor.py
new file mode 100644
index 00000000..5f6e2692
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/requestor.py
@@ -0,0 +1,88 @@
+
+import asyncio
+from pulsar.schema import JsonSchema
+import uuid
+import logging
+
+from . publisher import Publisher
+from . subscriber import Subscriber
+
+logger = logging.getLogger("requestor")
+logger.setLevel(logging.INFO)
+
+class ServiceRequestor:
+
+    def __init__(
+            self,
+            pulsar_host,
+            request_queue, request_schema,
+            response_queue, response_schema,
+            subscription="api-gateway", consumer_name="api-gateway",
+            timeout=600,
+    ):
+
+        self.pub = Publisher(
+            pulsar_host, request_queue,
+            schema=JsonSchema(request_schema)
+        )
+
+        self.sub = Subscriber(
+            pulsar_host, response_queue,
+            subscription, consumer_name,
+            JsonSchema(response_schema)
+        )
+
+        self.timeout = timeout
+
+    async def start(self):
+
+        self.pub.start()
+        self.sub.start()
+
+    def to_request(self, request):
+        raise RuntimeError("Not defined")
+
+    def from_response(self, response):
+        raise RuntimeError("Not defined")
+
+    async def process(self, request, responder=None):
+
+        id = str(uuid.uuid4())
+
+        try:
+
+            q = self.sub.subscribe(id)
+
+            await asyncio.to_thread(
+                self.pub.send, id, self.to_request(request)
+            )
+
+            while True:
+
+                try:
+                    resp = await asyncio.to_thread(q.get, timeout=self.timeout)
+                except Exception as e:
+                    raise RuntimeError("Timeout")
+
+                if resp.error:
+                    return { "error": resp.error.message }
+
+                resp, fin = self.from_response(resp)
+
+                print(resp, fin)
+
+                if responder:
+                    await responder(resp, fin)
+
+                if fin:
+                    return resp
+
+        except Exception as e:
+
+            logging.error(f"Exception: {e}")
+
+            return { "error": str(e) }
+
+        finally:
+            self.sub.unsubscribe(id)
+
diff --git a/trustgraph-flow/trustgraph/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py
index e927ecf6..6a8a62eb 100755
--- a/trustgraph-flow/trustgraph/gateway/service.py
+++ b/trustgraph-flow/trustgraph/gateway/service.py
@@ -31,20 +31,22 @@ from . serialize import to_subgraph
 from . running import Running
 from . publisher import Publisher
 from . subscriber import Subscriber
-from . endpoint import ServiceEndpoint, MultiResponseServiceEndpoint
-from . text_completion import TextCompletionEndpoint
-from . prompt import PromptEndpoint
-from . graph_rag import GraphRagEndpoint
-from . triples_query import TriplesQueryEndpoint
-from . embeddings import EmbeddingsEndpoint
-from . encyclopedia import EncyclopediaEndpoint
-from . agent import AgentEndpoint
-from . dbpedia import DbpediaEndpoint
-from . internet_search import InternetSearchEndpoint
+from . text_completion import TextCompletionRequestor
+from . prompt import PromptRequestor
+from . graph_rag import GraphRagRequestor
+from . triples_query import TriplesQueryRequestor
+from . embeddings import EmbeddingsRequestor
+from . encyclopedia import EncyclopediaRequestor
+from . agent import AgentRequestor
+from . dbpedia import DbpediaRequestor
+from . internet_search import InternetSearchRequestor
 from . triples_stream import TriplesStreamEndpoint
 from . graph_embeddings_stream import GraphEmbeddingsStreamEndpoint
 from . triples_load import TriplesLoadEndpoint
 from . graph_embeddings_load import GraphEmbeddingsLoadEndpoint
+from . mux import MuxEndpoint
+
+from . endpoint import ServiceEndpoint
 from . auth import Authenticator
 
 logger = logging.getLogger("api")
@@ -76,42 +78,81 @@ class Api:
         else:
             self.auth = Authenticator(allow_all=True)
 
+        self.services = {
+            "text-completion": TextCompletionRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "prompt": PromptRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "graph-rag": GraphRagRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "triples-query": TriplesQueryRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "embeddings": EmbeddingsRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "agent": AgentRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "encyclopedia": EncyclopediaRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "dbpedia": DbpediaRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+            "internet-search": InternetSearchRequestor(
+                pulsar_host=self.pulsar_host, timeout=self.timeout,
+                auth = self.auth,
+            ),
+        }
+
         self.endpoints = [
-            TextCompletionEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/text-completion", auth=self.auth,
+                requestor = self.services["text-completion"],
             ),
-            PromptEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/prompt", auth=self.auth,
+                requestor = self.services["prompt"],
             ),
-            GraphRagEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/graph-rag", auth=self.auth,
+                requestor = self.services["graph-rag"],
             ),
-            TriplesQueryEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/triples-query", auth=self.auth,
+                requestor = self.services["triples-query"],
             ),
-            EmbeddingsEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/embeddings", auth=self.auth,
+                requestor = self.services["embeddings"],
             ),
-            AgentEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/agent", auth=self.auth,
+                requestor = self.services["agent"],
             ),
-            EncyclopediaEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/encyclopedia", auth=self.auth,
+                requestor = self.services["encyclopedia"],
             ),
-            DbpediaEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/dbpedia", auth=self.auth,
+                requestor = self.services["dbpedia"],
             ),
-            InternetSearchEndpoint(
-                pulsar_host=self.pulsar_host, timeout=self.timeout,
-                auth = self.auth,
+            ServiceEndpoint(
+                endpoint_path = "/api/v1/internet-search", auth=self.auth,
+                requestor = self.services["internet-search"],
             ),
             TriplesStreamEndpoint(
                 pulsar_host=self.pulsar_host,
@@ -129,6 +170,11 @@ class Api:
                 pulsar_host=self.pulsar_host,
                 auth = self.auth,
             ),
+            MuxEndpoint(
+                pulsar_host=self.pulsar_host,
+                auth = self.auth,
+                services = self.services,
+            ),
         ]
 
         self.document_out = Publisher(
@@ -162,7 +208,7 @@ class Api:
             else:
                 metadata = []
 
-            # Doing a base64 decode/encode here to make sure the
+            # Doing a base64 decoe/encode here to make sure the
             # content is valid base64
             doc = base64.b64decode(data["data"])
 
diff --git a/trustgraph-flow/trustgraph/gateway/text_completion.py b/trustgraph-flow/trustgraph/gateway/text_completion.py
index d59737f0..7291fc88 100644
--- a/trustgraph-flow/trustgraph/gateway/text_completion.py
+++ b/trustgraph-flow/trustgraph/gateway/text_completion.py
@@ -4,19 +4,18 @@ from .. schema import text_completion_request_queue
 from .. schema import text_completion_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 
-class TextCompletionEndpoint(ServiceEndpoint):
+class TextCompletionRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(TextCompletionEndpoint, self).__init__(
+        super(TextCompletionRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=text_completion_request_queue,
             response_queue=text_completion_response_queue,
             request_schema=TextCompletionRequest,
             response_schema=TextCompletionResponse,
-            endpoint_path="/api/v1/text-completion",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -26,4 +25,5 @@ class TextCompletionEndpoint(ServiceEndpoint):
         )
 
     def from_response(self, message):
-        return { "response": message.response }
+        return { "response": message.response }, True
+
diff --git a/trustgraph-flow/trustgraph/gateway/triples_query.py b/trustgraph-flow/trustgraph/gateway/triples_query.py
index 5a0cfff8..0ea7cd8d 100644
--- a/trustgraph-flow/trustgraph/gateway/triples_query.py
+++ b/trustgraph-flow/trustgraph/gateway/triples_query.py
@@ -4,20 +4,19 @@ from .. schema import triples_request_queue
 from .. schema import triples_response_queue
 
 from . endpoint import ServiceEndpoint
+from . requestor import ServiceRequestor
 from . serialize import to_value, serialize_subgraph
 
-class TriplesQueryEndpoint(ServiceEndpoint):
+class TriplesQueryRequestor(ServiceRequestor):
     def __init__(self, pulsar_host, timeout, auth):
 
-        super(TriplesQueryEndpoint, self).__init__(
+        super(TriplesQueryRequestor, self).__init__(
             pulsar_host=pulsar_host,
             request_queue=triples_request_queue,
             response_queue=triples_response_queue,
             request_schema=TriplesQueryRequest,
             response_schema=TriplesQueryResponse,
-            endpoint_path="/api/v1/triples-query",
             timeout=timeout,
-            auth=auth,
         )
 
     def to_request(self, body):
@@ -50,5 +49,5 @@ class TriplesQueryEndpoint(ServiceEndpoint):
         print(message)
         return {
             "response": serialize_subgraph(message.triples)
-        }
+        }, True
 

From a714221b2291286b51bc612cd005c0125c445e2d Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Sat, 7 Dec 2024 00:16:52 +0000
Subject: [PATCH 34/37] Add memgraph cypher LIMIT support (#200)

---
 .../query/triples/memgraph/service.py         | 50 ++++++++++++-------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
index 5144f781..46dd19e3 100755
--- a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
@@ -79,7 +79,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Literal {value: $value}) "
-                            "RETURN $src as src",
+                            "RETURN $src as src "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, rel=v.p.value, value=v.o.value,
                             database_=self.db,
                         )
@@ -89,7 +90,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
-                            "RETURN $src as src",
+                            "RETURN $src as src "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, rel=v.p.value, uri=v.o.value,
                             database_=self.db,
                         )
@@ -103,7 +105,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Literal) "
-                            "RETURN dest.value as dest",
+                            "RETURN dest.value as dest "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, rel=v.p.value,
                             database_=self.db,
                         )
@@ -114,7 +117,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
-                            "RETURN dest.uri as dest",
+                            "RETURN dest.uri as dest "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, rel=v.p.value,
                             database_=self.db,
                         )
@@ -131,7 +135,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Literal {value: $value}) "
-                            "RETURN rel.uri as rel",
+                            "RETURN rel.uri as rel "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, value=v.o.value,
                             database_=self.db,
                         )
@@ -142,7 +147,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
-                            "RETURN rel.uri as rel",
+                            "RETURN rel.uri as rel "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value, uri=v.o.value,
                             database_=self.db,
                         )
@@ -157,7 +163,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Literal) "
-                            "RETURN rel.uri as rel, dest.value as dest",
+                            "RETURN rel.uri as rel, dest.value as dest "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value,
                             database_=self.db,
                         )
@@ -168,7 +175,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
-                            "RETURN rel.uri as rel, dest.uri as dest",
+                            "RETURN rel.uri as rel, dest.uri as dest "
+                            "LIMIT " + str(v.limit),
                             src=v.s.value,
                             database_=self.db,
                         )
@@ -188,7 +196,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Literal {value: $value}) "
-                            "RETURN src.uri as src",
+                            "RETURN src.uri as src "
+                            "LIMIT " + str(v.limit),
                             uri=v.p.value, value=v.o.value,
                             database_=self.db,
                         )
@@ -199,7 +208,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $uri}) "
-                            "RETURN src.uri as src",
+                            "RETURN src.uri as src "
+                            "LIMIT " + str(v.limit),
                             uri=v.p.value, dest=v.o.value,
                             database_=self.db,
                         )
@@ -214,7 +224,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Literal) "
-                            "RETURN src.uri as src, dest.value as dest",
+                            "RETURN src.uri as src, dest.value as dest "
+                            "LIMIT " + str(v.limit),
                             uri=v.p.value,
                             database_=self.db,
                         )
@@ -225,7 +236,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
-                            "RETURN src.uri as src, dest.uri as dest",
+                            "RETURN src.uri as src, dest.uri as dest "
+                            "LIMIT " + str(v.limit),
                             uri=v.p.value,
                             database_=self.db,
                         )
@@ -242,7 +254,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel]->(dest:Literal {value: $value}) "
-                            "RETURN src.uri as src, rel.uri as rel",
+                            "RETURN src.uri as src, rel.uri as rel "
+                            "LIMIT " + str(v.limit),
                             value=v.o.value,
                             database_=self.db,
                         )
@@ -253,7 +266,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
-                            "RETURN src.uri as src, rel.uri as rel",
+                            "RETURN src.uri as src, rel.uri as rel "
+                            "LIMIT " + str(v.limit),
                             uri=v.o.value,
                             database_=self.db,
                         )
@@ -268,7 +282,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel]->(dest:Literal) "
-                            "RETURN src.uri as src, rel.uri as rel, dest.value as dest",
+                            "RETURN src.uri as src, rel.uri as rel, dest.value as dest "
+                            "LIMIT " + str(v.limit),
                             database_=self.db,
                         )
 
@@ -278,7 +293,8 @@ class Processor(ConsumerProducer):
 
                         records, summary, keys = self.io.execute_query(
                             "MATCH (src:Node)-[rel:Rel]->(dest:Node) "
-                            "RETURN src.uri as src, rel.uri as rel, dest.uri as dest",
+                            "RETURN src.uri as src, rel.uri as rel, dest.uri as dest "
+                            "LIMIT " + str(v.limit),
                             database_=self.db,
                         )
 
@@ -292,7 +308,7 @@ class Processor(ConsumerProducer):
                     p=self.create_value(t[1]), 
                     o=self.create_value(t[2])
                 )
-                for t in triples
+                for t in triples[:v.limit]
             ]
 
             print("Send response...", flush=True)

From cf8c76b5c60d2f45dc8bc848be0f01727b1fd46e Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 9 Dec 2024 00:01:01 +0000
Subject: [PATCH 35/37] Fix/save core hang (#201)

* Working around an exception class change in Python 3.11
---
 trustgraph-cli/scripts/tg-load-kg-core | 8 ++++++--
 trustgraph-cli/scripts/tg-save-kg-core | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index 5c2ae140..cada13a7 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -29,7 +29,9 @@ async def load_ge(running, queue, url):
 
                 try:
                     msg = await asyncio.wait_for(queue.get(), 1)
-                except TimeoutError:
+                except:
+                    # Hopefully it's TimeoutError.  Annoying to match since
+                    # it changed in 3.11.
                     continue
 
                 msg = {
@@ -55,7 +57,9 @@ async def load_triples(running, queue, url):
 
                 try:
                     msg = await asyncio.wait_for(queue.get(), 1)
-                except TimeoutError:
+                except:
+                    # Hopefully it's TimeoutError.  Annoying to match since
+                    # it changed in 3.11.
                     continue
 
                 msg ={
diff --git a/trustgraph-cli/scripts/tg-save-kg-core b/trustgraph-cli/scripts/tg-save-kg-core
index f2509dba..e52cd7dc 100755
--- a/trustgraph-cli/scripts/tg-save-kg-core
+++ b/trustgraph-cli/scripts/tg-save-kg-core
@@ -133,7 +133,9 @@ async def output(running, queue, path, format):
 
             try:
                 msg = await asyncio.wait_for(queue.get(), 0.5)
-            except TimeoutError:
+            except:
+                # Hopefully it's TimeoutError.  Annoying to match since
+                # it changed in 3.11.
                 continue
 
             if format == "msgpack":

From 803f11089107a1bcf3fd702a0f05eec50032c122 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 9 Dec 2024 00:31:03 +0000
Subject: [PATCH 36/37] Timeout alias error (#202)

---
 trustgraph-cli/scripts/tg-load-kg-core | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index cada13a7..3d31dd25 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -128,7 +128,9 @@ async def loader(running, ge_queue, t_queue, path, format, user, collection):
 
                     try:
                         await asyncio.wait_for(qtype.put(unpacked[1]), 0.5)
-                    except TimeoutError:
+                    except:
+                        # Hopefully it's TimeoutError.  Annoying to match since
+                        # it changed in 3.11.
                         continue
 
                 if not running.get(): break

From 61031270e4cddd5615a00ff17fb8847d2d932db9 Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Mon, 9 Dec 2024 12:44:30 +0000
Subject: [PATCH 37/37] Fix loop logic flaws in loader (#203)

---
 trustgraph-cli/scripts/tg-load-kg-core | 82 +++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 14 deletions(-)

diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index 3d31dd25..4e76e525 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -19,8 +19,13 @@ class Running:
     def get(self): return self.running
     def stop(self): self.running = False
 
+ge_counts = 0
+t_counts = 0
+
 async def load_ge(running, queue, url):
 
+    global ge_counts 
+
     async with aiohttp.ClientSession() as session:
 
         async with session.ws_connect(f"{url}load/graph-embeddings") as ws:
@@ -29,6 +34,11 @@ async def load_ge(running, queue, url):
 
                 try:
                     msg = await asyncio.wait_for(queue.get(), 1)
+
+                    # End of load
+                    if msg is None:
+                        break
+
                 except:
                     # Hopefully it's TimeoutError.  Annoying to match since
                     # it changed in 3.11.
@@ -45,10 +55,17 @@ async def load_ge(running, queue, url):
                     "entity": msg["e"],
                 }
 
-                await ws.send_json(msg)
+                try:
+                    await ws.send_json(msg)
+                except Exception as e:
+                    print(e)
+
+                ge_counts += 1
 
 async def load_triples(running, queue, url):
 
+    global t_counts 
+
     async with aiohttp.ClientSession() as session:
 
         async with session.ws_connect(f"{url}load/triples") as ws:
@@ -57,6 +74,11 @@ async def load_triples(running, queue, url):
 
                 try:
                     msg = await asyncio.wait_for(queue.get(), 1)
+
+                    # End of load
+                    if msg is None:
+                        break
+
                 except:
                     # Hopefully it's TimeoutError.  Annoying to match since
                     # it changed in 3.11.
@@ -72,10 +94,12 @@ async def load_triples(running, queue, url):
                     "triples": msg["t"],
                 }
 
-                await ws.send_json(msg)
+                try:
+                    await ws.send_json(msg)
+                except Exception as e:
+                    print(e)
 
-ge_counts = 0
-t_counts = 0
+                t_counts += 1
 
 async def stats(running):
 
@@ -83,16 +107,15 @@ async def stats(running):
     global ge_counts
 
     while running.get():
+
         await asyncio.sleep(2)
+
         print(
             f"Graph embeddings: {ge_counts:10d}  Triples: {t_counts:10d}"
         )
 
 async def loader(running, ge_queue, t_queue, path, format, user, collection):
 
-    global t_counts
-    global ge_counts
-
     if format == "json":
 
         raise RuntimeError("Not implemented")
@@ -118,16 +141,18 @@ async def loader(running, ge_queue, t_queue, path, format, user, collection):
 
                 if unpacked[0] == "t":
                     qtype = t_queue
-                    t_counts += 1
                 else:
                     if unpacked[0] == "ge":
                         qtype = ge_queue
-                        ge_counts += 1
 
                 while running.get():
 
                     try:
                         await asyncio.wait_for(qtype.put(unpacked[1]), 0.5)
+
+                        # Successful put message, move on
+                        break
+
                     except:
                         # Hopefully it's TimeoutError.  Annoying to match since
                         # it changed in 3.11.
@@ -135,14 +160,40 @@ async def loader(running, ge_queue, t_queue, path, format, user, collection):
 
                 if not running.get(): break
 
-            running.stop()
-            
+        # Put 'None' on end of queue to finish
+        while running.get():
+
+            try:
+                await asyncio.wait_for(t_queue.put(None), 1)
+
+                # Successful put message, move on
+                break
+
+            except:
+                # Hopefully it's TimeoutError.  Annoying to match since
+                # it changed in 3.11.
+                continue
+
+        # Put 'None' on end of queue to finish
+        while running.get():
+
+            try:
+                await asyncio.wait_for(ge_queue.put(None), 1)
+
+                # Successful put message, move on
+                break
+
+            except:
+                # Hopefully it's TimeoutError.  Annoying to match since
+                # it changed in 3.11.
+                continue
+
 async def run(running, **args):
 
     # Maxsize on queues reduces back-pressure so tg-load-kg-core doesn't
     # grow to eat all memory
-    ge_q = asyncio.Queue(maxsize=500)
-    t_q = asyncio.Queue(maxsize=500)
+    ge_q = asyncio.Queue(maxsize=10)
+    t_q = asyncio.Queue(maxsize=10)
 
     load_task = asyncio.create_task(
         loader(
@@ -170,9 +221,12 @@ async def run(running, **args):
 
     stats_task = asyncio.create_task(stats(running))
 
-    await load_task
     await triples_task
     await ge_task
+
+    running.stop()
+
+    await load_task
     await stats_task
 
 async def main(running):