Tidied scripts, added 2 query scripts (#53)

This commit is contained in:
cybermaggedon 2024-09-05 16:45:22 +01:00 committed by GitHub
parent 65d7f6d261
commit 6e4534e35c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 167 additions and 29 deletions

View file

@ -268,7 +268,7 @@ curl -o sources/Challenger-Report-Vol1.pdf https://sma.nasa.gov/SignificantIncid
Load the file for knowledge extraction:
```
scripts/loader -f sources/Challenger-Report-Vol1.pdf
scripts/load-pdf -f sources/Challenger-Report-Vol1.pdf
```
The console output `File loaded.` indicates the PDF has been sucessfully loaded to the processing queues and extraction will begin.
@ -391,9 +391,9 @@ scripts/graph-show | wc -l
The Challenger report has a long introduction with quite a bit of adminstrative text commonly found in official reports. The first few hundred graph edges mostly capture this document formatting knowledge. To fully test the ability to extract complex knowledge, wait until at least `1000` graph edges have been extracted. The full extraction for this PDF will extract many thousand graph edges.
### RAG Test Script
### RAG Test
```
tests/test-graph-rag
scripts/query-graph-rag -q 'Give me 20 facts about the space shuttle Challenger'
```
This script forms a LM prompt asking for 20 facts regarding the Challenger disaster. Depending on how many graph edges have been extracted, the response will be similar to:
@ -428,7 +428,8 @@ docker logs -f trustgraph-graph-rag-1
```
### More RAG Test Queries
If you want to try different RAG queries, modify the `query` in the [test script](https://github.com/trustgraph-ai/trustgraph/blob/master/tests/test-graph-rag).
If you want to try different RAG queries, modify the parameter to the `-q`
option.
### Shutting Down TrustGraph

View file

@ -1,37 +1,74 @@
#!/usr/bin/env python3
from trustgraph.trustgraph import TrustGraph
"""
Connects to the graph query service and dumps all graph edges.
"""
import argparse
import os
from trustgraph.clients.triples_query_client import TriplesQueryClient
import rdflib
import sys
import io
import sys
t = TrustGraph()
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
g = rdflib.Graph()
def show_graph(pulsar):
rows = t.get_all(limit=100_000_000)
for s, p, o in rows:
tq = TriplesQueryClient(pulsar_host="pulsar://localhost:6650")
# print(s, p, o)
sv = rdflib.term.URIRef(s)
pv = rdflib.term.URIRef(p)
rows = tq.request(None, None, None, limit=10_000_000)
if o.startswith("https://") or o.startswith("http://"):
g = rdflib.Graph()
# Skip malformed URLs with spaces in
if " " in o:
continue
for row in rows:
ov = rdflib.term.URIRef(o)
else:
ov = rdflib.term.Literal(o)
sv = rdflib.term.URIRef(row.s.value)
pv = rdflib.term.URIRef(row.p.value)
g.add((sv, pv, ov))
if row.o.is_uri:
g.serialize(destination="output.ttl", format="turtle")
# Skip malformed URLs with spaces in
if " " in row.o.value:
continue
buf = io.BytesIO()
ov = rdflib.term.URIRef(row.o.value)
else:
ov = rdflib.term.Literal(row.o.value)
g.serialize(destination=buf, format="turtle")
g.add((sv, pv, ov))
g.serialize(destination="output.ttl", format="turtle")
buf = io.BytesIO()
g.serialize(destination=buf, format="turtle")
sys.stdout.write(buf.getvalue().decode("utf-8"))
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
args = parser.parse_args()
try:
show_graph(args.pulsar_host)
except Exception as e:
print("Exception:", e, flush=True)
main()
sys.stdout.write(buf.getvalue().decode("utf-8"))

49
scripts/query-document-rag Executable file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
"""
Uses the Document RAG service to answer a query
"""
import argparse
import os
from trustgraph.clients.document_rag_client import DocumentRagClient
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def query(pulsar, query):
rag = DocumentRagClient(pulsar_host=pulsar)
resp = rag.request(query)
print(resp)
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-q', '--query',
required=True,
help=f'Query to execute',
)
args = parser.parse_args()
try:
query(args.pulsar_host, args.query)
except Exception as e:
print("Exception:", e, flush=True)
main()

49
scripts/query-graph-rag Executable file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
"""
Uses the GraphRAG service to answer a query
"""
import argparse
import os
from trustgraph.clients.graph_rag_client import GraphRagClient
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
def query(pulsar, query):
rag = GraphRagClient(pulsar_host=pulsar)
resp = rag.request(query)
print(resp)
def main():
parser = argparse.ArgumentParser(
prog='graph-show',
description=__doc__,
)
parser.add_argument(
'-p', '--pulsar-host',
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
parser.add_argument(
'-q', '--query',
required=True,
help=f'Query to execute',
)
args = parser.parse_args()
try:
query(args.pulsar_host, args.query)
except Exception as e:
print("Exception:", e, flush=True)
main()

View file

@ -57,8 +57,8 @@ setuptools.setup(
"scripts/chunker-token",
"scripts/concat-parquet",
"scripts/de-query-milvus",
"scripts/de-write-milvus",
"scripts/de-query-qdrant",
"scripts/de-write-milvus",
"scripts/de-write-qdrant",
"scripts/document-rag",
"scripts/dump-parquet",
@ -67,8 +67,8 @@ setuptools.setup(
"scripts/embeddings-vectorize",
"scripts/ge-dump-parquet",
"scripts/ge-query-milvus",
"scripts/ge-write-milvus",
"scripts/ge-query-qdrant",
"scripts/ge-write-milvus",
"scripts/ge-write-qdrant",
"scripts/graph-rag",
"scripts/graph-show",
@ -77,14 +77,16 @@ setuptools.setup(
"scripts/kg-extract-definitions",
"scripts/kg-extract-relationships",
"scripts/load-graph-embeddings",
"scripts/load-pdf",
"scripts/load-text",
"scripts/load-triples",
"scripts/loader",
"scripts/object-extract-row",
"scripts/oe-write-milvus",
"scripts/pdf-decoder",
"scripts/prompt-generic",
"scripts/prompt-template",
"scripts/query",
"scripts/query-document-rag",
"scripts/query-graph-rag",
"scripts/rows-write-cassandra",
"scripts/run-processing",
"scripts/text-completion-azure",