mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Tidied scripts, added 2 query scripts (#53)
This commit is contained in:
parent
65d7f6d261
commit
6e4534e35c
7 changed files with 167 additions and 29 deletions
|
|
@ -268,7 +268,7 @@ curl -o sources/Challenger-Report-Vol1.pdf https://sma.nasa.gov/SignificantIncid
|
|||
Load the file for knowledge extraction:
|
||||
|
||||
```
|
||||
scripts/loader -f sources/Challenger-Report-Vol1.pdf
|
||||
scripts/load-pdf -f sources/Challenger-Report-Vol1.pdf
|
||||
```
|
||||
|
||||
The console output `File loaded.` indicates the PDF has been sucessfully loaded to the processing queues and extraction will begin.
|
||||
|
|
@ -391,9 +391,9 @@ scripts/graph-show | wc -l
|
|||
|
||||
The Challenger report has a long introduction with quite a bit of adminstrative text commonly found in official reports. The first few hundred graph edges mostly capture this document formatting knowledge. To fully test the ability to extract complex knowledge, wait until at least `1000` graph edges have been extracted. The full extraction for this PDF will extract many thousand graph edges.
|
||||
|
||||
### RAG Test Script
|
||||
### RAG Test
|
||||
```
|
||||
tests/test-graph-rag
|
||||
scripts/query-graph-rag -q 'Give me 20 facts about the space shuttle Challenger'
|
||||
```
|
||||
This script forms a LM prompt asking for 20 facts regarding the Challenger disaster. Depending on how many graph edges have been extracted, the response will be similar to:
|
||||
|
||||
|
|
@ -428,7 +428,8 @@ docker logs -f trustgraph-graph-rag-1
|
|||
```
|
||||
### More RAG Test Queries
|
||||
|
||||
If you want to try different RAG queries, modify the `query` in the [test script](https://github.com/trustgraph-ai/trustgraph/blob/master/tests/test-graph-rag).
|
||||
If you want to try different RAG queries, modify the parameter to the `-q`
|
||||
option.
|
||||
|
||||
### Shutting Down TrustGraph
|
||||
|
||||
|
|
|
|||
|
|
@ -1,37 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.trustgraph import TrustGraph
|
||||
"""
|
||||
Connects to the graph query service and dumps all graph edges.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.triples_query_client import TriplesQueryClient
|
||||
import rdflib
|
||||
import sys
|
||||
import io
|
||||
import sys
|
||||
|
||||
t = TrustGraph()
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
|
||||
g = rdflib.Graph()
|
||||
def show_graph(pulsar):
|
||||
|
||||
rows = t.get_all(limit=100_000_000)
|
||||
for s, p, o in rows:
|
||||
tq = TriplesQueryClient(pulsar_host="pulsar://localhost:6650")
|
||||
|
||||
# print(s, p, o)
|
||||
sv = rdflib.term.URIRef(s)
|
||||
pv = rdflib.term.URIRef(p)
|
||||
rows = tq.request(None, None, None, limit=10_000_000)
|
||||
|
||||
if o.startswith("https://") or o.startswith("http://"):
|
||||
g = rdflib.Graph()
|
||||
|
||||
# Skip malformed URLs with spaces in
|
||||
if " " in o:
|
||||
continue
|
||||
for row in rows:
|
||||
|
||||
ov = rdflib.term.URIRef(o)
|
||||
else:
|
||||
ov = rdflib.term.Literal(o)
|
||||
sv = rdflib.term.URIRef(row.s.value)
|
||||
pv = rdflib.term.URIRef(row.p.value)
|
||||
|
||||
g.add((sv, pv, ov))
|
||||
if row.o.is_uri:
|
||||
|
||||
g.serialize(destination="output.ttl", format="turtle")
|
||||
# Skip malformed URLs with spaces in
|
||||
if " " in row.o.value:
|
||||
continue
|
||||
|
||||
buf = io.BytesIO()
|
||||
ov = rdflib.term.URIRef(row.o.value)
|
||||
else:
|
||||
ov = rdflib.term.Literal(row.o.value)
|
||||
|
||||
g.serialize(destination=buf, format="turtle")
|
||||
g.add((sv, pv, ov))
|
||||
|
||||
g.serialize(destination="output.ttl", format="turtle")
|
||||
|
||||
buf = io.BytesIO()
|
||||
|
||||
g.serialize(destination=buf, format="turtle")
|
||||
|
||||
sys.stdout.write(buf.getvalue().decode("utf-8"))
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
show_graph(args.pulsar_host)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
sys.stdout.write(buf.getvalue().decode("utf-8"))
|
||||
|
|
|
|||
49
scripts/query-document-rag
Executable file
49
scripts/query-document-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the Document RAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.document_rag_client import DocumentRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = DocumentRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
49
scripts/query-graph-rag
Executable file
49
scripts/query-graph-rag
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Uses the GraphRAG service to answer a query
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from trustgraph.clients.graph_rag_client import GraphRagClient
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
|
||||
|
||||
def query(pulsar, query):
|
||||
|
||||
rag = GraphRagClient(pulsar_host=pulsar)
|
||||
resp = rag.request(query)
|
||||
print(resp)
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-show',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
required=True,
|
||||
help=f'Query to execute',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(args.pulsar_host, args.query)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
main()
|
||||
|
||||
10
setup.py
10
setup.py
|
|
@ -57,8 +57,8 @@ setuptools.setup(
|
|||
"scripts/chunker-token",
|
||||
"scripts/concat-parquet",
|
||||
"scripts/de-query-milvus",
|
||||
"scripts/de-write-milvus",
|
||||
"scripts/de-query-qdrant",
|
||||
"scripts/de-write-milvus",
|
||||
"scripts/de-write-qdrant",
|
||||
"scripts/document-rag",
|
||||
"scripts/dump-parquet",
|
||||
|
|
@ -67,8 +67,8 @@ setuptools.setup(
|
|||
"scripts/embeddings-vectorize",
|
||||
"scripts/ge-dump-parquet",
|
||||
"scripts/ge-query-milvus",
|
||||
"scripts/ge-write-milvus",
|
||||
"scripts/ge-query-qdrant",
|
||||
"scripts/ge-write-milvus",
|
||||
"scripts/ge-write-qdrant",
|
||||
"scripts/graph-rag",
|
||||
"scripts/graph-show",
|
||||
|
|
@ -77,14 +77,16 @@ setuptools.setup(
|
|||
"scripts/kg-extract-definitions",
|
||||
"scripts/kg-extract-relationships",
|
||||
"scripts/load-graph-embeddings",
|
||||
"scripts/load-pdf",
|
||||
"scripts/load-text",
|
||||
"scripts/load-triples",
|
||||
"scripts/loader",
|
||||
"scripts/object-extract-row",
|
||||
"scripts/oe-write-milvus",
|
||||
"scripts/pdf-decoder",
|
||||
"scripts/prompt-generic",
|
||||
"scripts/prompt-template",
|
||||
"scripts/query",
|
||||
"scripts/query-document-rag",
|
||||
"scripts/query-graph-rag",
|
||||
"scripts/rows-write-cassandra",
|
||||
"scripts/run-processing",
|
||||
"scripts/text-completion-azure",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue