Maint/rename pkg (#88)

* Rename trustgraph-utils -> trustgraph-cli
* Update YAMLs
This commit is contained in:
cybermaggedon 2024-09-30 22:20:26 +01:00 committed by GitHub
parent 771d9fc2c7
commit 88a7dfa126
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 361 additions and 361 deletions

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python3
"""
Connects to the graph query service and dumps all graph edges.
"""
import argparse
import os
from trustgraph.clients.triples_query_client import TriplesQueryClient
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def show_graph(pulsar):
tq = TriplesQueryClient(pulsar_host=pulsar)
rows = tq.request(None, None, None, limit=10_000_000)
for row in rows:
print(row.s.value, row.p.value, row.o.value)
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
args = parser.parse_args()
try:
show_graph(args.pulsar_host)
except Exception as e:
print("Exception:", e, flush=True)
main()

View file

@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""
Connects to the graph query service and dumps all graph edges.
"""
import argparse
import os
from trustgraph.clients.triples_query_client import TriplesQueryClient
import rdflib
import io
import sys
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def show_graph(pulsar):
tq = TriplesQueryClient(pulsar_host=pulsar)
rows = tq.request(None, None, None, limit=10_000_000)
g = rdflib.Graph()
for row in rows:
sv = rdflib.term.URIRef(row.s.value)
pv = rdflib.term.URIRef(row.p.value)
if row.o.is_uri:
# Skip malformed URLs with spaces in
if " " in row.o.value:
continue
ov = rdflib.term.URIRef(row.o.value)
else:
ov = rdflib.term.Literal(row.o.value)
g.add((sv, pv, ov))
g.serialize(destination="output.ttl", format="turtle")
buf = io.BytesIO()
g.serialize(destination=buf, format="turtle")
sys.stdout.write(buf.getvalue().decode("utf-8"))
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
args = parser.parse_args()
try:
show_graph(args.pulsar_host)
except Exception as e:
print("Exception:", e, flush=True)
main()

View file

@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""
Initialises Pulsar with Trustgraph tenant / namespaces & policy
"""
import requests
import time
import argparse
default_pulsar_admin_url = "http://pulsar:8080"
def get_clusters(url):
print("Get clusters...", flush=True)
resp = requests.get(f"{url}/admin/v2/clusters")
if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
return resp.json()
def ensure_tenant(url, tenant, clusters):
resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
if resp.status_code == 200:
print(f"Tenant {tenant} already exists.", flush=True)
return
resp = requests.put(
f"{url}/admin/v2/tenants/{tenant}",
json={
"adminRoles": [],
"allowedClusters": clusters,
}
)
if resp.status_code != 204:
print(resp.text, flush=True)
raise RuntimeError("Tenant creation failed.")
print(f"Tenant {tenant} created.", flush=True)
def ensure_namespace(url, tenant, namespace, config):
resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
if resp.status_code == 200:
print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
return
resp = requests.put(
f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
json=config,
)
if resp.status_code != 204:
print(resp.status_code, flush=True)
print(resp.text, flush=True)
raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
print(f"Namespace {tenant}/{namespace} created.", flush=True)
def init(url, tenant="tg"):
clusters = get_clusters(url)
ensure_tenant(url, tenant, clusters)
ensure_namespace(url, tenant, "flow", {})
ensure_namespace(url, tenant, "request", {})
ensure_namespace(url, tenant, "response", {
"retention_policies": {
"retentionSizeInMB": -1,
"retentionTimeInMinutes": 3,
}
})
def main():
parser = argparse.ArgumentParser(
prog='tg-init-pulsar',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-admin-url',
default=default_pulsar_admin_url,
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
)
args = parser.parse_args()
while True:
try:
print(flush=True)
print(
f"Initialising with Pulsar {args.pulsar_admin_url}...",
flush=True
)
init(args.pulsar_admin_url, "tg")
print("Initialisation complete.", flush=True)
break
except Exception as e:
print("Exception:", e, flush=True)
print("Sleeping...", flush=True)
time.sleep(2)
print("Will retry...", flush=True)
main()

View file

@ -0,0 +1,11 @@
#!/usr/bin/env bash
CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token)
curl \
-H "X-XSRF-TOKEN: $CSRF_TOKEN" \
-H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \
-H 'Content-Type: application/json' \
-X PUT \
http://localhost:7750/pulsar-manager/users/superuser \
-d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}'

View file

@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""
Loads a PDF document into TrustGraph processing.
"""
import pulsar
from pulsar.schema import JsonSchema
from trustgraph.schema import Document, Source, document_ingest_queue
import base64
import hashlib
import argparse
import os
import time
from trustgraph.log_level import LogLevel
class Loader:
def __init__(
self,
pulsar_host,
output_queue,
log_level,
file,
):
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
)
self.producer = self.client.create_producer(
topic=output_queue,
schema=JsonSchema(Document),
chunking_enabled=True,
)
self.file = file
def run(self):
try:
path = self.file
data = open(path, "rb").read()
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
r = Document(
source=Source(
source=path,
title=path,
id=id,
),
data=base64.b64encode(data),
)
self.producer.send(r)
except Exception as e:
print(e, flush=True)
def __del__(self):
self.client.close()
def main():
parser = argparse.ArgumentParser(
prog='loader',
description=__doc__,
)
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
default_output_queue = document_ingest_queue
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-o', '--output-queue',
default=default_output_queue,
help=f'Output queue (default: {default_output_queue})'
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,
default=LogLevel.ERROR,
choices=list(LogLevel),
help=f'Output queue (default: info)'
)
parser.add_argument(
'-f', '--file',
required=True,
help=f'File to load'
)
args = parser.parse_args()
while True:
try:
p = Loader(
pulsar_host=args.pulsar_host,
output_queue=args.output_queue,
log_level=args.log_level,
file=args.file,
)
p.run()
print("File loaded.")
break
except Exception as e:
print("Exception:", e, flush=True)
print("Will retry...", flush=True)
time.sleep(10)
main()

View file

@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""
Loads a text document into TrustGraph processing.
"""
import pulsar
from pulsar.schema import JsonSchema
from trustgraph.schema import TextDocument, Source, text_ingest_queue
import base64
import hashlib
import argparse
import os
import time
from trustgraph.log_level import LogLevel
class Loader:
def __init__(
self,
pulsar_host,
output_queue,
log_level,
file,
):
self.client = pulsar.Client(
pulsar_host,
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
)
self.producer = self.client.create_producer(
topic=output_queue,
schema=JsonSchema(TextDocument),
chunking_enabled=True,
)
self.file = file
def run(self):
try:
path = self.file
data = open(path, "rb").read()
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
r = TextDocument(
source=Source(
source=path,
title=path,
id=id,
),
text=data,
)
self.producer.send(r)
except Exception as e:
print(e, flush=True)
def __del__(self):
self.client.close()
def main():
parser = argparse.ArgumentParser(
prog='loader',
description=__doc__,
)
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
default_output_queue = text_ingest_queue
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-o', '--output-queue',
default=default_output_queue,
help=f'Output queue (default: {default_output_queue})'
)
parser.add_argument(
'-l', '--log-level',
type=LogLevel,
default=LogLevel.ERROR,
choices=list(LogLevel),
help=f'Output queue (default: info)'
)
parser.add_argument(
'-f', '--file',
required=True,
help=f'File to load'
)
args = parser.parse_args()
while True:
try:
p = Loader(
pulsar_host=args.pulsar_host,
output_queue=args.output_queue,
log_level=args.log_level,
file=args.file,
)
p.run()
print("File loaded.")
break
except Exception as e:
print("Exception:", e, flush=True)
print("Will retry...", flush=True)
time.sleep(10)
main()

View file

@ -0,0 +1,24 @@
#!/usr/bin/env python3
import requests
import tabulate
url = 'http://localhost:9090/api/v1/query?query=processor_state%7Bprocessor_state%3D%22running%22%7D'
resp = requests.get(url)
obj = resp.json()
tbl = [
[
m["metric"]["job"],
"running" if int(m["value"][1]) > 0 else "down"
]
for m in obj["data"]["result"]
]
print(tabulate.tabulate(
tbl, tablefmt="pretty", headers=["processor", "state"],
stralign="left"
))

View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
"""
Uses the Document RAG service to answer a query
"""
import argparse
import os
from trustgraph.clients.document_rag_client import DocumentRagClient
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def query(pulsar, query):
rag = DocumentRagClient(pulsar_host=pulsar)
resp = rag.request(query)
print(resp)
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-q', '--query',
required=True,
help=f'Query to execute',
)
args = parser.parse_args()
try:
query(args.pulsar_host, args.query)
except Exception as e:
print("Exception:", e, flush=True)
main()

View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
"""
Uses the GraphRAG service to answer a query
"""
import argparse
import os
from trustgraph.clients.graph_rag_client import GraphRagClient
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def query(pulsar, query):
rag = GraphRagClient(pulsar_host=pulsar)
resp = rag.request(query)
print(resp)
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-q', '--query',
required=True,
help=f'Query to execute',
)
args = parser.parse_args()
try:
query(args.pulsar_host, args.query)
except Exception as e:
print("Exception:", e, flush=True)
main()