mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-01 11:26:22 +02:00
Feature/subpackages (#80)
* Renaming what will become the core package * Tweaking to get package build working * Fix metering merge * Rename to core directory * Bump version. Use namespace searching for packaging trustgraph-core * Change references to trustgraph-core * Forming embeddings-hf package * Reference modules in core package. * Build both packages to one container, bump version * Update YAMLs
This commit is contained in:
parent
14d79ef9f1
commit
f081933217
303 changed files with 681 additions and 624 deletions
6
trustgraph-core/scripts/chunker-recursive
Executable file
6
trustgraph-core/scripts/chunker-recursive
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.chunking.recursive import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/chunker-token
Executable file
6
trustgraph-core/scripts/chunker-token
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.chunking.token import run
|
||||
|
||||
run()
|
||||
|
||||
45
trustgraph-core/scripts/concat-parquet
Executable file
45
trustgraph-core/scripts/concat-parquet
Executable file
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Concatenates multiple parquet files into a single parquet output
|
||||
"""
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import pandas as pd
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="combine-parquet",
|
||||
description=__doc__
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input',
|
||||
nargs='*',
|
||||
help=f'Input files'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
help=f'Output files'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
df = None
|
||||
|
||||
for file in args.input:
|
||||
|
||||
part = pq.read_table(file).to_pandas()
|
||||
|
||||
if df is None:
|
||||
df = part
|
||||
else:
|
||||
df = pd.concat([df, part], ignore_index=True)
|
||||
|
||||
if df is not None:
|
||||
|
||||
table = pa.Table.from_pandas(df)
|
||||
pq.write_table(table, args.output)
|
||||
6
trustgraph-core/scripts/de-query-milvus
Executable file
6
trustgraph-core/scripts/de-query-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.doc_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/de-query-qdrant
Normal file
6
trustgraph-core/scripts/de-query-qdrant
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.doc_embeddings.qdrant import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/de-write-milvus
Executable file
6
trustgraph-core/scripts/de-write-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.doc_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/de-write-qdrant
Normal file
6
trustgraph-core/scripts/de-write-qdrant
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.doc_embeddings.qdrant import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/document-rag
Executable file
6
trustgraph-core/scripts/document-rag
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.retrieval.document_rag import run
|
||||
|
||||
run()
|
||||
|
||||
24
trustgraph-core/scripts/dump-parquet
Executable file
24
trustgraph-core/scripts/dump-parquet
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.csv as pc
|
||||
import pyarrow.parquet as pq
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
df = None
|
||||
|
||||
for file in sys.argv[1:]:
|
||||
|
||||
part = pq.read_table(file).to_pandas()
|
||||
|
||||
if df is None:
|
||||
df = part
|
||||
else:
|
||||
df = pd.concat([df, part], ignore_index=True)
|
||||
|
||||
if df is not None:
|
||||
|
||||
table = pa.Table.from_pandas(df)
|
||||
pc.write_csv(table, sys.stdout.buffer)
|
||||
|
||||
6
trustgraph-core/scripts/embeddings-ollama
Executable file
6
trustgraph-core/scripts/embeddings-ollama
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.embeddings.ollama import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/embeddings-vectorize
Executable file
6
trustgraph-core/scripts/embeddings-vectorize
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.embeddings.vectorize import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/ge-dump-parquet
Executable file
6
trustgraph-core/scripts/ge-dump-parquet
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.dump.graph_embeddings.parquet import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/ge-query-milvus
Executable file
6
trustgraph-core/scripts/ge-query-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.graph_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/ge-query-qdrant
Executable file
6
trustgraph-core/scripts/ge-query-qdrant
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.graph_embeddings.qdrant import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/ge-write-milvus
Executable file
6
trustgraph-core/scripts/ge-write-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.graph_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/ge-write-qdrant
Executable file
6
trustgraph-core/scripts/ge-write-qdrant
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.graph_embeddings.qdrant import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/graph-rag
Executable file
6
trustgraph-core/scripts/graph-rag
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.retrieval.graph_rag import run
|
||||
|
||||
run()
|
||||
|
||||
46
trustgraph-core/scripts/graph-show
Executable file
46
trustgraph-core/scripts/graph-show
Executable file
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Connects to the graph query service and dumps all graph edges.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.core.clients.triples_query_client import TriplesQueryClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def show_graph(pulsar):
|
||||
|
||||
tq = TriplesQueryClient(pulsar_host=pulsar)
|
||||
|
||||
rows = tq.request(None, None, None, limit=10_000_000)
|
||||
|
||||
for row in rows:
|
||||
print(row.s.value, row.p.value, row.o.value)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
show_graph(args.pulsar_host)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
74
trustgraph-core/scripts/graph-to-turtle
Executable file
74
trustgraph-core/scripts/graph-to-turtle
Executable file
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Connects to the graph query service and dumps all graph edges.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.core.clients.triples_query_client import TriplesQueryClient
|
||||
import rdflib
|
||||
import io
|
||||
import sys
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def show_graph(pulsar):
|
||||
|
||||
tq = TriplesQueryClient(pulsar_host=pulsar)
|
||||
|
||||
rows = tq.request(None, None, None, limit=10_000_000)
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
for row in rows:
|
||||
|
||||
sv = rdflib.term.URIRef(row.s.value)
|
||||
pv = rdflib.term.URIRef(row.p.value)
|
||||
|
||||
if row.o.is_uri:
|
||||
|
||||
# Skip malformed URLs with spaces in
|
||||
if " " in row.o.value:
|
||||
continue
|
||||
|
||||
ov = rdflib.term.URIRef(row.o.value)
|
||||
else:
|
||||
ov = rdflib.term.Literal(row.o.value)
|
||||
|
||||
g.add((sv, pv, ov))
|
||||
|
||||
g.serialize(destination="output.ttl", format="turtle")
|
||||
|
||||
buf = io.BytesIO()
|
||||
|
||||
g.serialize(destination=buf, format="turtle")
|
||||
|
||||
sys.stdout.write(buf.getvalue().decode("utf-8"))
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
show_graph(args.pulsar_host)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
11
trustgraph-core/scripts/init-pulsar-manager
Executable file
11
trustgraph-core/scripts/init-pulsar-manager
Executable file
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token)
|
||||
|
||||
curl \
|
||||
-H "X-XSRF-TOKEN: $CSRF_TOKEN" \
|
||||
-H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-X PUT \
|
||||
http://localhost:7750/pulsar-manager/users/superuser \
|
||||
-d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}'
|
||||
6
trustgraph-core/scripts/kg-extract-definitions
Executable file
6
trustgraph-core/scripts/kg-extract-definitions
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.extract.kg.definitions import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/kg-extract-relationships
Executable file
6
trustgraph-core/scripts/kg-extract-relationships
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.extract.kg.relationships import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/kg-extract-topics
Executable file
6
trustgraph-core/scripts/kg-extract-topics
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.extract.kg.topics import run
|
||||
|
||||
run()
|
||||
|
||||
145
trustgraph-core/scripts/load-graph-embeddings
Executable file
145
trustgraph-core/scripts/load-graph-embeddings
Executable file
|
|
@ -0,0 +1,145 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads Graph embeddings into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.core.schema import GraphEmbeddings, Value
|
||||
from trustgraph.core.schema import graph_embeddings_store_queue
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from trustgraph.core.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(GraphEmbeddings),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
|
||||
print("Reading file...")
|
||||
table = pq.read_table(path)
|
||||
print("Loaded.")
|
||||
|
||||
names = set(table.column_names)
|
||||
|
||||
if "embeddings" not in names:
|
||||
print("No 'embeddings' column")
|
||||
|
||||
if "entity" not in names:
|
||||
print("No 'entity' column")
|
||||
|
||||
embc = table.column("embeddings")
|
||||
entc = table.column("entity")
|
||||
|
||||
for emb, ent in zip(embc, entc):
|
||||
|
||||
b = emb.as_py()
|
||||
n = ent.as_py()
|
||||
|
||||
r = GraphEmbeddings(
|
||||
vectors=b,
|
||||
entity=Value(
|
||||
value=n,
|
||||
is_uri=n.startswith("https:")
|
||||
)
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = graph_embeddings_store_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
128
trustgraph-core/scripts/load-pdf
Executable file
128
trustgraph-core/scripts/load-pdf
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads a PDF document into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.core.schema import Document, Source, document_ingest_queue
|
||||
import base64
|
||||
import hashlib
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from trustgraph.core.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Document),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
data = open(path, "rb").read()
|
||||
|
||||
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
|
||||
|
||||
r = Document(
|
||||
source=Source(
|
||||
source=path,
|
||||
title=path,
|
||||
id=id,
|
||||
),
|
||||
data=base64.b64encode(data),
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = document_ingest_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
128
trustgraph-core/scripts/load-text
Executable file
128
trustgraph-core/scripts/load-text
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads a text document into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.core.schema import TextDocument, Source, text_ingest_queue
|
||||
import base64
|
||||
import hashlib
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from trustgraph.core.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextDocument),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
data = open(path, "rb").read()
|
||||
|
||||
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
|
||||
|
||||
r = TextDocument(
|
||||
source=Source(
|
||||
source=path,
|
||||
title=path,
|
||||
id=id,
|
||||
),
|
||||
text=data,
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = text_ingest_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
144
trustgraph-core/scripts/load-triples
Executable file
144
trustgraph-core/scripts/load-triples
Executable file
|
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Loads Graph embeddings into TrustGraph processing.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.core.schema import Triple, Value
|
||||
from trustgraph.core.schema import triples_store_queue
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from trustgraph.core.log_level import LogLevel
|
||||
|
||||
class Loader:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
output_queue,
|
||||
log_level,
|
||||
file,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Triple),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.file = file
|
||||
|
||||
def run(self):
|
||||
|
||||
try:
|
||||
|
||||
path = self.file
|
||||
|
||||
print("Reading file...")
|
||||
table = pq.read_table(path)
|
||||
print("Loaded.")
|
||||
|
||||
names = set(table.column_names)
|
||||
|
||||
if "s" not in names:
|
||||
print("No 's' column")
|
||||
|
||||
if "p" not in names:
|
||||
print("No 'p' column")
|
||||
|
||||
if "o" not in names:
|
||||
print("No 'o' column")
|
||||
|
||||
sc = table.column("s")
|
||||
pc = table.column("p")
|
||||
oc = table.column("o")
|
||||
|
||||
for s, p, o in zip(sc, pc, oc):
|
||||
|
||||
r = Triple(
|
||||
s=Value(value=s.as_py(), is_uri=True),
|
||||
p=Value(value=p.as_py(), is_uri=True),
|
||||
o=Value(value=o.as_py(), is_uri=o.as_py().startswith("https:"))
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='loader',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
default_output_queue = triples_store_queue
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.ERROR,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--file',
|
||||
required=True,
|
||||
help=f'File to load'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Loader(
|
||||
pulsar_host=args.pulsar_host,
|
||||
output_queue=args.output_queue,
|
||||
log_level=args.log_level,
|
||||
file=args.file,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
print("File loaded.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
main()
|
||||
|
||||
5
trustgraph-core/scripts/metering
Executable file
5
trustgraph-core/scripts/metering
Executable file
|
|
@ -0,0 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.metering import run
|
||||
|
||||
run()
|
||||
6
trustgraph-core/scripts/object-extract-row
Executable file
6
trustgraph-core/scripts/object-extract-row
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.extract.object.row import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/oe-write-milvus
Executable file
6
trustgraph-core/scripts/oe-write-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.object_embeddings.milvus import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/pdf-decoder
Executable file
6
trustgraph-core/scripts/pdf-decoder
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.decoding.pdf import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/prompt-generic
Executable file
6
trustgraph-core/scripts/prompt-generic
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.prompt.generic import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/prompt-template
Executable file
6
trustgraph-core/scripts/prompt-template
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.prompt.template import run
|
||||
|
||||
run()
|
||||
|
||||
49
trustgraph-core/scripts/query-document-rag
Executable file
49
trustgraph-core/scripts/query-document-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the Document RAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.core.clients.document_rag_client import DocumentRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = DocumentRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
49
trustgraph-core/scripts/query-graph-rag
Executable file
49
trustgraph-core/scripts/query-graph-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the GraphRAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.core.clients.graph_rag_client import GraphRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = GraphRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
6
trustgraph-core/scripts/rows-write-cassandra
Executable file
6
trustgraph-core/scripts/rows-write-cassandra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.rows.cassandra import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/run-processing
Executable file
6
trustgraph-core/scripts/run-processing
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.processing import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-azure
Executable file
6
trustgraph-core/scripts/text-completion-azure
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.azure import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-bedrock
Executable file
6
trustgraph-core/scripts/text-completion-bedrock
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.bedrock import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-claude
Executable file
6
trustgraph-core/scripts/text-completion-claude
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.claude import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-cohere
Executable file
6
trustgraph-core/scripts/text-completion-cohere
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.cohere import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-llamafile
Executable file
6
trustgraph-core/scripts/text-completion-llamafile
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.llamafile import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-ollama
Executable file
6
trustgraph-core/scripts/text-completion-ollama
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.ollama import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-openai
Executable file
6
trustgraph-core/scripts/text-completion-openai
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.openai import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/text-completion-vertexai
Executable file
6
trustgraph-core/scripts/text-completion-vertexai
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.model.text_completion.vertexai import run
|
||||
|
||||
run()
|
||||
|
||||
119
trustgraph-core/scripts/tg-init-pulsar
Executable file
119
trustgraph-core/scripts/tg-init-pulsar
Executable file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Initialises Pulsar with Trustgraph tenant / namespaces & policy
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import argparse
|
||||
|
||||
default_pulsar_admin_url = "http://pulsar:8080"
|
||||
|
||||
def get_clusters(url):
|
||||
|
||||
print("Get clusters...", flush=True)
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/clusters")
|
||||
|
||||
if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
|
||||
|
||||
return resp.json()
|
||||
|
||||
def ensure_tenant(url, tenant, clusters):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Tenant {tenant} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/tenants/{tenant}",
|
||||
json={
|
||||
"adminRoles": [],
|
||||
"allowedClusters": clusters,
|
||||
}
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError("Tenant creation failed.")
|
||||
|
||||
print(f"Tenant {tenant} created.", flush=True)
|
||||
|
||||
def ensure_namespace(url, tenant, namespace, config):
|
||||
|
||||
resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
|
||||
return
|
||||
|
||||
resp = requests.put(
|
||||
f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
|
||||
json=config,
|
||||
)
|
||||
|
||||
if resp.status_code != 204:
|
||||
print(resp.status_code, flush=True)
|
||||
print(resp.text, flush=True)
|
||||
raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
|
||||
|
||||
print(f"Namespace {tenant}/{namespace} created.", flush=True)
|
||||
|
||||
def init(url, tenant="tg"):
|
||||
|
||||
clusters = get_clusters(url)
|
||||
|
||||
ensure_tenant(url, tenant, clusters)
|
||||
|
||||
ensure_namespace(url, tenant, "flow", {})
|
||||
|
||||
ensure_namespace(url, tenant, "request", {})
|
||||
|
||||
ensure_namespace(url, tenant, "response", {
|
||||
"retention_policies": {
|
||||
"retentionSizeInMB": -1,
|
||||
"retentionTimeInMinutes": 3,
|
||||
}
|
||||
})
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-init-pulsar',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-admin-url',
|
||||
default=default_pulsar_admin_url,
|
||||
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Initialising with Pulsar {args.pulsar_admin_url}...",
|
||||
flush=True
|
||||
)
|
||||
init(args.pulsar_admin_url, "tg")
|
||||
print("Initialisation complete.", flush=True)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
print("Sleeping...", flush=True)
|
||||
time.sleep(2)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
main()
|
||||
|
||||
24
trustgraph-core/scripts/tg-processor-state
Executable file
24
trustgraph-core/scripts/tg-processor-state
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import tabulate
|
||||
|
||||
url = 'http://localhost:9090/api/v1/query?query=processor_state%7Bprocessor_state%3D%22running%22%7D'
|
||||
|
||||
resp = requests.get(url)
|
||||
|
||||
obj = resp.json()
|
||||
|
||||
tbl = [
|
||||
[
|
||||
m["metric"]["job"],
|
||||
"running" if int(m["value"][1]) > 0 else "down"
|
||||
]
|
||||
for m in obj["data"]["result"]
|
||||
]
|
||||
|
||||
print(tabulate.tabulate(
|
||||
tbl, tablefmt="pretty", headers=["processor", "state"],
|
||||
stralign="left"
|
||||
))
|
||||
|
||||
6
trustgraph-core/scripts/triples-dump-parquet
Executable file
6
trustgraph-core/scripts/triples-dump-parquet
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.dump.triples.parquet import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/triples-query-cassandra
Executable file
6
trustgraph-core/scripts/triples-query-cassandra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.triples.cassandra import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/triples-query-neo4j
Executable file
6
trustgraph-core/scripts/triples-query-neo4j
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.query.triples.neo4j import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/triples-write-cassandra
Executable file
6
trustgraph-core/scripts/triples-write-cassandra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.triples.cassandra import run
|
||||
|
||||
run()
|
||||
|
||||
6
trustgraph-core/scripts/triples-write-neo4j
Executable file
6
trustgraph-core/scripts/triples-write-neo4j
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.core.storage.triples.neo4j import run
|
||||
|
||||
run()
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue