Trustgraph, first drop of code

This commit is contained in:
Cyber MacGeddon 2024-07-10 17:04:24 +01:00
commit 299332dd4e
120 changed files with 12493 additions and 0 deletions

6
scripts/chunker-recursive Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.chunker.recursive import run
run()

6
scripts/embeddings-hf Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.embeddings.hf import run
run()

6
scripts/embeddings-vectorize Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.embeddings.vectorize import run
run()

6
scripts/graph-rag Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.rag.graph import run
run()

10
scripts/graph-show Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env python3
from trustgraph.trustgraph import TrustGraph
t = TrustGraph()
rows = t.get_all(limit=100_000_000)
for s, p, o in rows:
print(s, p, o)

37
scripts/graph-to-turtle Executable file
View file

@ -0,0 +1,37 @@
#!/usr/bin/env python3
from trustgraph.trustgraph import TrustGraph
import rdflib
import sys
import io
t = TrustGraph()
g = rdflib.Graph()
rows = t.get_all(limit=100_000_000)
for s, p, o in rows:
# print(s, p, o)
sv = rdflib.term.URIRef(s)
pv = rdflib.term.URIRef(p)
if o.startswith("https://") or o.startswith("http://"):
# Skip malformed URLs with spaces in
if " " in o:
continue
ov = rdflib.term.URIRef(o)
else:
ov = rdflib.term.Literal(o)
g.add((sv, pv, ov))
g.serialize(destination="output.ttl", format="turtle")
buf = io.BytesIO()
g.serialize(destination=buf, format="turtle")
sys.stdout.write(buf.getvalue().decode("utf-8"))

6
scripts/graph-write-cassandra Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.graph.cassandra_write import run
run()

11
scripts/init-pulsar-manager Executable file
View file

@ -0,0 +1,11 @@
#!/usr/bin/env bash
CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token)
curl \
-H "X-XSRF-TOKEN: $CSRF_TOKEN" \
-H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \
-H 'Content-Type: application/json' \
-X PUT \
http://localhost:7750/pulsar-manager/users/superuser \
-d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}'

6
scripts/kg-extract-definitions Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.kg.extract_definitions import run
run()

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.kg.extract_relationships import run
run()

6
scripts/llm-azure-text Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.llm.azure_text import run
run()

6
scripts/llm-claude-text Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.llm.claude_text import run
run()

6
scripts/llm-ollama-text Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.llm.ollama_text import run
run()

6
scripts/llm-vertexai-text Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.llm.vertexai_text import run
run()

47
scripts/loader Executable file
View file

@ -0,0 +1,47 @@
#!/usr/bin/env python3
import pulsar
from pulsar.schema import JsonSchema, Bytes, String
from trustgraph.schema import Document, Source
import base64
import hashlib
# client = pulsar.Client("pulsar://localhost:6650")
host="10.89.1.246"
host="localhost"
client = pulsar.Client(f"pulsar://{host}:6650")
producer = client.create_producer(
topic='document-load',
schema=JsonSchema(Document),
chunking_enabled=True,
)
files=[
"Challenger-Report-Vol1.pdf",
# "columbia-accident-investigation-board-report-volume-1.pdf",
# "Proposed_CIRCIA_Rules.pdf",
]
for file in files:
path = "sources/" + file
data = open(path, "rb").read()
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
r = Document(
source=Source(
source=path,
title=path,
id=id,
),
data=base64.b64encode(data),
)
resp = producer.send(r)
print(resp)
client.close()

6
scripts/pdf-decoder Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.decoder.pdf import run
run()

16
scripts/query Executable file
View file

@ -0,0 +1,16 @@
#!/usr/bin/env python3
from trustgraph.graph_rag import GraphRag
import sys
query = " ".join(sys.argv[1:])
gr = GraphRag(verbose=True)
if query == "":
query="""This knowledge graph describes the Space Shuttle disaster.
Present 20 facts which are present in the knowledge graph."""
resp = gr.query(query)
print(resp)

6
scripts/vector-write-milvus Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.vector.milvus_write import run
run()