mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Maint/rename pkg (#88)
* Rename trustgraph-utils -> trustgraph-cli * Update YAMLs
This commit is contained in:
parent
771d9fc2c7
commit
88a7dfa126
32 changed files with 361 additions and 361 deletions
1
trustgraph-cli/README.md
Normal file
1
trustgraph-cli/README.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
See https://trustgraph.ai/
|
||||
46
trustgraph-cli/scripts/tg-graph-show
Executable file
46
trustgraph-cli/scripts/tg-graph-show
Executable file
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Connects to the graph query service and dumps all graph edges.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.triples_query_client import TriplesQueryClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def show_graph(pulsar):
|
||||
|
||||
tq = TriplesQueryClient(pulsar_host=pulsar)
|
||||
|
||||
rows = tq.request(None, None, None, limit=10_000_000)
|
||||
|
||||
for row in rows:
|
||||
print(row.s.value, row.p.value, row.o.value)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
show_graph(args.pulsar_host)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
74
trustgraph-cli/scripts/tg-graph-to-turtle
Executable file
74
trustgraph-cli/scripts/tg-graph-to-turtle
Executable file
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Connects to the graph query service and dumps all graph edges.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.triples_query_client import TriplesQueryClient
|
||||
import rdflib
|
||||
import io
|
||||
import sys
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def show_graph(pulsar):
|
||||
|
||||
tq = TriplesQueryClient(pulsar_host=pulsar)
|
||||
|
||||
rows = tq.request(None, None, None, limit=10_000_000)
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
for row in rows:
|
||||
|
||||
sv = rdflib.term.URIRef(row.s.value)
|
||||
pv = rdflib.term.URIRef(row.p.value)
|
||||
|
||||
if row.o.is_uri:
|
||||
|
||||
# Skip malformed URLs with spaces in
|
||||
if " " in row.o.value:
|
||||
continue
|
||||
|
||||
ov = rdflib.term.URIRef(row.o.value)
|
||||
else:
|
||||
ov = rdflib.term.Literal(row.o.value)
|
||||
|
||||
g.add((sv, pv, ov))
|
||||
|
||||
g.serialize(destination="output.ttl", format="turtle")
|
||||
|
||||
buf = io.BytesIO()
|
||||
|
||||
g.serialize(destination=buf, format="turtle")
|
||||
|
||||
sys.stdout.write(buf.getvalue().decode("utf-8"))
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
show_graph(args.pulsar_host)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
119
trustgraph-cli/scripts/tg-init-pulsar
Executable file
119
trustgraph-cli/scripts/tg-init-pulsar
Executable file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Initialises Pulsar with Trustgraph tenant / namespaces & policy
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import argparse
|
||||
|
||||
default_pulsar_admin_url = "http://pulsar:8080"
|
||||
|
||||
def get_clusters(url):
|
||||
|
||||
print("Get clusters...", flush=True)
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/clusters")
|
||||
|
||||
if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
|
||||
|
||||
return resp.json()
|
||||
|
||||
def ensure_tenant(url, tenant, clusters):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Tenant {tenant} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/tenants/{tenant}",
|
||||
json={
|
||||
"adminRoles": [],
|
||||
"allowedClusters": clusters,
|
||||
}
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError("Tenant creation failed.")
|
||||
|
||||
print(f"Tenant {tenant} created.", flush=True)
|
||||
|
||||
def ensure_namespace(url, tenant, namespace, config):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
|
||||
json=config,
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.status_code, flush=True)
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
|
||||
|
||||
print(f"Namespace {tenant}/{namespace} created.", flush=True)
|
||||
|
||||
def init(url, tenant="tg"):
|
||||
|
||||
clusters = get_clusters(url)
|
||||
|
||||
ensure_tenant(url, tenant, clusters)
|
||||
|
||||
ensure_namespace(url, tenant, "flow", {})
|
||||
|
||||
ensure_namespace(url, tenant, "request", {})
|
||||
|
||||
ensure_namespace(url, tenant, "response", {
|
||||
"retention_policies": {
|
||||
"retentionSizeInMB": -1,
|
||||
"retentionTimeInMinutes": 3,
|
||||
}
|
||||
})
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-init-pulsar',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-admin-url',
|
||||
default=default_pulsar_admin_url,
|
||||
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Initialising with Pulsar {args.pulsar_admin_url}...",
|
||||
flush=True
|
||||
)
|
||||
init(args.pulsar_admin_url, "tg")
|
||||
print("Initialisation complete.", flush=True)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
print("Sleeping...", flush=True)
|
||||
time.sleep(2)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
main()
|
||||
|
||||
11
trustgraph-cli/scripts/tg-init-pulsar-manager
Executable file
11
trustgraph-cli/scripts/tg-init-pulsar-manager
Executable file
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token)
|
||||
|
||||
curl \
|
||||
-H "X-XSRF-TOKEN: $CSRF_TOKEN" \
|
||||
-H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-X PUT \
|
||||
http://localhost:7750/pulsar-manager/users/superuser \
|
||||
-d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}'
|
||||
128
trustgraph-cli/scripts/tg-load-pdf
Executable file
128
trustgraph-cli/scripts/tg-load-pdf
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads a PDF document into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import Document, Source, document_ingest_queue
|
||||
import base64
|
||||
import hashlib
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from trustgraph.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Document),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
data = open(path, "rb").read()
|
||||
|
||||
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
|
||||
|
||||
r = Document(
|
||||
source=Source(
|
||||
source=path,
|
||||
title=path,
|
||||
id=id,
|
||||
),
|
||||
data=base64.b64encode(data),
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = document_ingest_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
128
trustgraph-cli/scripts/tg-load-text
Executable file
128
trustgraph-cli/scripts/tg-load-text
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads a text document into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import TextDocument, Source, text_ingest_queue
|
||||
import base64
|
||||
import hashlib
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from trustgraph.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextDocument),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
data = open(path, "rb").read()
|
||||
|
||||
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
|
||||
|
||||
r = TextDocument(
|
||||
source=Source(
|
||||
source=path,
|
||||
title=path,
|
||||
id=id,
|
||||
),
|
||||
text=data,
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = text_ingest_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
24
trustgraph-cli/scripts/tg-processor-state
Executable file
24
trustgraph-cli/scripts/tg-processor-state
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import tabulate
|
||||
|
||||
url = 'http://localhost:9090/api/v1/query?query=processor_state%7Bprocessor_state%3D%22running%22%7D'
|
||||
|
||||
resp = requests.get(url)
|
||||
|
||||
obj = resp.json()
|
||||
|
||||
tbl = [
|
||||
[
|
||||
m["metric"]["job"],
|
||||
"running" if int(m["value"][1]) > 0 else "down"
|
||||
]
|
||||
for m in obj["data"]["result"]
|
||||
]
|
||||
|
||||
print(tabulate.tabulate(
|
||||
tbl, tablefmt="pretty", headers=["processor", "state"],
|
||||
stralign="left"
|
||||
))
|
||||
|
||||
49
trustgraph-cli/scripts/tg-query-document-rag
Executable file
49
trustgraph-cli/scripts/tg-query-document-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the Document RAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.document_rag_client import DocumentRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = DocumentRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
49
trustgraph-cli/scripts/tg-query-graph-rag
Executable file
49
trustgraph-cli/scripts/tg-query-graph-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the GraphRAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.graph_rag_client import GraphRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = GraphRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
53
trustgraph-cli/setup.py
Normal file
53
trustgraph-cli/setup.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import setuptools
|
||||
import os
|
||||
import importlib
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
# Load a version number module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
'version', 'trustgraph/cli_version.py'
|
||||
)
|
||||
version_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(version_module)
|
||||
|
||||
version = version_module.__version__
|
||||
|
||||
setuptools.setup(
|
||||
name="trustgraph-cli",
|
||||
version=version,
|
||||
author="trustgraph.ai",
|
||||
author_email="security@trustgraph.ai",
|
||||
description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/trustgraph-ai/trustgraph",
|
||||
packages=setuptools.find_namespace_packages(
|
||||
where='./',
|
||||
),
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
|
||||
install_requires=[
|
||||
"trustgraph-base",
|
||||
"requests",
|
||||
"pulsar-client",
|
||||
"tabulate",
|
||||
],
|
||||
scripts=[
|
||||
"scripts/tg-graph-show",
|
||||
"scripts/tg-graph-to-turtle",
|
||||
"scripts/tg-init-pulsar-manager",
|
||||
"scripts/tg-load-pdf",
|
||||
"scripts/tg-load-text",
|
||||
"scripts/tg-query-document-rag",
|
||||
"scripts/tg-query-graph-rag",
|
||||
"scripts/tg-init-pulsar",
|
||||
"scripts/tg-processor-state",
|
||||
]
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue