Sample docs loader (#365)

This commit is contained in:
cybermaggedon 2025-05-06 13:43:17 +01:00 committed by GitHub
parent 9e4eb634a4
commit 54e475fa3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 818 additions and 56 deletions

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Loads a PDF document into the library
Loads a document into the library
"""
import hashlib

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Deletes a flow class
"""
import argparse

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dumps out the current configuration
Outputs a flow class definition in JSON format.
"""
import argparse

View file

@ -162,7 +162,7 @@ def init(pulsar_admin_url, pulsar_host, pulsar_api_key, config, tenant):
def main():
parser = argparse.ArgumentParser(
prog='tg-init-pulsar',
prog='tg-init-trustgraph',
description=__doc__,
)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Uses the GraphRAG service to answer a question
Uses the agent service to answer a question
"""
import argparse

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Uses the GraphRAG service to answer a question
Uses the DocumentRAG service to answer a question
"""
import argparse

View file

@ -60,12 +60,6 @@ def main():
help='''Prompt template terms of the form variable=value, can be
specified multiple times''',
)
# parser.add_argument(
# '--pulsar-api-key',
# default=default_pulsar_api_key,
# help=f'Pulsar API key',
# )
args = parser.parse_args()

View file

@ -1,8 +1,9 @@
#!/usr/bin/env python3
"""This utility takes a knowledge core and loads it into a running TrustGraph
through the API. The knowledge core should be in msgpack format, which is the
default format produce by tg-save-kg-core.
"""
This utility takes a document embeddings core and loads it into a running
TrustGraph through the API. The document embeddings core should be in msgpack
format, which is the default format produce by tg-save-doc-embeds.
"""
import aiohttp
@ -174,7 +175,7 @@ async def run(running, **args):
async def main(running):
parser = argparse.ArgumentParser(
prog='tg-load-kg-core',
prog='tg-load-doc-embeds',
description=__doc__,
)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""This utility takes a knowledge core and loads it into a running TrustGraph
"""
This utility takes a knowledge core and loads it into a running TrustGraph
through the API. The knowledge core should be in msgpack format, which is the
default format produce by tg-save-kg-core.
"""

View file

@ -1,7 +1,10 @@
#!/usr/bin/env python3
"""
Loads a PDF document into TrustGraph processing.
Loads a PDF document into TrustGraph processing by directing to
the pdf-decoder queue.
Consider using tg-add-library-document to load
a document, followed by tg-start-library-processing to initiate processing.
"""
import hashlib

View file

@ -0,0 +1,736 @@
#!/usr/bin/env python3
"""
Loads a PDF document into the library
"""
import argparse
import os
import uuid
import datetime
import requests
from trustgraph.api import Api
from trustgraph.api.types import Uri, Literal, Triple
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_user = 'trustgraph'
from requests.adapters import HTTPAdapter
from urllib3.response import HTTPResponse
class FileAdapter(HTTPAdapter):
def send(self, request, *args, **kwargs):
resp = HTTPResponse(body=open(request.url[7:], 'rb'), status=200, preload_content=False)
return self.build_response(request, resp)
session = requests.session()
session.mount('file://', FileAdapter())
documents = [
{
"id": "https://trustgraph.ai/doc/challenger-report-vol-1",
"title": "Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1",
"comments": "The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined",
# "url": "https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf",
"url": "file://19860015255.pdf",
"kind": "application/pdf",
"date": datetime.datetime.now().date(),
"tags": ["nasa", "safety-engineering", "space-shuttle"],
"metadata": [
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/DigitalDocument")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/name"),
o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/description"),
o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/copyrightNotice"),
o = Literal("Work of the US Gov. Public Use Permitted")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/copyrightHolder"),
o = Literal("US Gov.")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/copyrightYear"),
o = Literal("1986")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/keywords"),
o = Literal("nasa")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/keywords"),
o = Literal("space-shuttle")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/keywords"),
o = Literal("safety-engineering")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/keywords"),
o = Literal("challenger")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/keywords"),
o = Literal("space-transportation")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/publication"),
o = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/PublicationEvent")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
p = Uri("https://schema.org/description"),
o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
p = Uri("https://schema.org/publishedBy"),
o = Uri("https://trustgraph.ai/org/nasa")
),
Triple(
s = Uri("https://trustgraph.ai/org/nasa"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Organization")
),
Triple(
s = Uri("https://trustgraph.ai/org/nasa"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("NASA")
),
Triple(
s = Uri("https://trustgraph.ai/org/nasa"),
p = Uri("https://schema.org/name"),
o = Literal("NASA")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
p = Uri("https://schema.org/startDate"),
o = Literal("1986-06-06")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
p = Uri("https://schema.org/endDate"),
o = Literal("1986-06-06")
),
Triple(
s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
p = Uri("https://schema.org/url"),
o = Uri("https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf")
)
]
},
{
"id": "https://trustgraph.ai/doc/icelandic-dictionary",
"title": "A Concise Dictionary of Old Icelandic",
"comments": "A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic.",
"url": "https://css4.pub/2015/icelandic/dictionary.pdf",
"kind": "application/pdf",
"date": datetime.datetime.now().date(),
"tags": ["old-icelandic", "dictionary", "language", "grammar", "old-norse", "icelandic"],
"metadata": [
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/DigitalDocument")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("A Concise Dictionary of Old Icelandic"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/name"),
o = Literal("A Concise Dictionary of Old Icelandic"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/description"),
o = Literal("A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic."),
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/copyrightNotice"),
o = Literal("Copyright expired, public domain")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/copyrightHolder"),
o = Literal("Geir Zoëga, Clarendon Press")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/copyrightYear"),
o = Literal("1910")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/keywords"),
o = Literal("icelandic")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/keywords"),
o = Literal("old-norse")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/keywords"),
o = Literal("dictionary")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/keywords"),
o = Literal("grammar")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/keywords"),
o = Literal("old-icelandic")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/publication"),
o = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/PublicationEvent")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
p = Uri("https://schema.org/description"),
o = Literal("Published by Clarendon Press in 1910"),
),
Triple(
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
p = Uri("https://schema.org/publishedBy"),
o = Uri("https://trustgraph.ai/org/clarendon-press")
),
Triple(
s = Uri("https://trustgraph.ai/org/clarendon-press"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Organization")
),
Triple(
s = Uri("https://trustgraph.ai/org/clarendon-press"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("NASA")
),
Triple(
s = Uri("https://trustgraph.ai/org/clarendon-press"),
p = Uri("https://schema.org/name"),
o = Literal("Clarendon Press")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
p = Uri("https://schema.org/startDate"),
o = Literal("1910-01-01")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
p = Uri("https://schema.org/endDate"),
o = Literal("1910-01-01")
),
Triple(
s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
p = Uri("https://schema.org/url"),
o = Uri("https://digital-research-books-beta.nypl.org/edition/10476341")
)
]
},
{
"id": "https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025",
"title": "Annual threat assessment of the U.S. intelligence community - March 2025",
"comments": "The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and Americas interests anywhere in the world.",
"url": "https://www.intelligence.senate.gov/sites/default/files/2025%20Annual%20Threat%20Assessment%20of%20the%20U.S.%20Intelligence%20Community.pdf",
"kind": "application/pdf",
"date": datetime.datetime.now().date(),
"tags": ["adversary-cooperation", "cyberthreats", "supply-chain-vulnerabilities", "economic-competition", "national-security", "data-privacy"],
"metadata": [
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/DigitalDocument")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/name"),
o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/description"),
o = Literal("The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and Americas interests anywhere in the world."),
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/copyrightNotice"),
o = Literal("Not copyright")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/copyrightHolder"),
o = Literal("US Government")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/copyrightYear"),
o = Literal("2025")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/keywords"),
o = Literal("adversary-cooperation")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/keywords"),
o = Literal("cyberthreats")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/keywords"),
o = Literal("supply-chain-vulnerabilities")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/keywords"),
o = Literal("economic-competition")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/keywords"),
o = Literal("national-security")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/publication"),
o = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/PublicationEvent")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
p = Uri("https://schema.org/description"),
o = Literal("Published by the Director of National Intelligence (DNI)"),
),
Triple(
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
p = Uri("https://schema.org/publishedBy"),
o = Uri("https://trustgraph.ai/org/us-gov-dni")
),
Triple(
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Organization")
),
Triple(
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("The Director of National Intelligence")
),
Triple(
s = Uri("https://trustgraph.ai/org/us-gov-dni"),
p = Uri("https://schema.org/name"),
o = Literal("The Director of National Intelligence")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
p = Uri("https://schema.org/startDate"),
o = Literal("2025-03-18")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
p = Uri("https://schema.org/endDate"),
o = Literal("2025-03-18")
),
Triple(
s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
p = Uri("https://schema.org/url"),
o = Uri("https://www.dni.gov/index.php/newsroom/reports-publications/reports-publications-2025/4058-2025-annual-threat-assessment")
)
]
},
{
"id": "https://trustgraph.ai/doc/intelligence-and-state",
"title": "The Role of Intelligence and State Policies in International Security",
"comments": "A volume by Mehmet Emin Erendor, published by Cambridge Scholars Publishing (2021). It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.",
"url": "https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf",
"kind": "application/pdf",
"date": "2025-05-06",
"tags": ["intelligence", "state-policy", "international-security", "national-security", "geopolitics", "foreign-policy", "security-studies", "military", "crime"],
"metadata": [
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Book")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("The Role of Intelligence and State Policies in International Security")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/name"),
o = Literal("The Role of Intelligence and State Policies in International Security")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/description"),
o = Literal("A volume by Mehmet Emin Erendor. It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/author"),
o = Literal("Mehmet Emin Erendor")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/isbn"),
o = Literal("9781527576049")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/numberOfPages"),
o = Literal("220")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("intelligence")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("state policy")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("international security")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("national security")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("geopolitics")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/publication"),
o = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/PublicationEvent")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
p = Uri("https://schema.org/description"),
o = Literal("Published by Cambridge Scholars Publishing on October 28, 2021.")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
p = Uri("https://schema.org/publishedBy"),
o = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing")
),
Triple(
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Organization")
),
Triple(
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("Cambridge Scholars Publishing")
),
Triple(
s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
p = Uri("https://schema.org/name"),
o = Literal("Cambridge Scholars Publishing")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
p = Uri("https://schema.org/startDate"),
o = Literal("2021-10-28")
),
Triple(
s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
p = Uri("https://schema.org/url"),
o = Uri("https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf")
)
]
},
{
"id": "https://trustgraph.ai/doc/beyond-vigilant-state",
"title": "Beyond the vigilant state: globalisation and intelligence",
"comments": "This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era.",
"url": "https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf",
"kind": "application/pdf",
"date": datetime.datetime.now().date(),
"tags": ["intelligence", "globalization", "security-studies", "surveillance", "international-relations", "post-cold-war"],
"metadata": [
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/ScholarlyArticle")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("Beyond the vigilant state: globalisation and intelligence"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/name"),
o = Literal("Beyond the vigilant state: globalisation and intelligence"),
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/description"),
o = Literal("This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era."),
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/copyrightNotice"),
o = Literal("(c) British International Studies Association")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/copyrightHolder"),
o = Literal("British International Studies Association")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/author"),
o = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22")
),
Triple(
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Person")
),
Triple(
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("Richard J. Aldrich")
),
Triple(
s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
p = Uri("https://schema.org/name"),
o = Literal("Richard J. Aldrich")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("intelligence")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("globalisation")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("security-studies")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("surveillance")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("international-relations")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/keywords"),
o = Literal("post-cold-war")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/publication"),
o = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/PublicationEvent")
),
Triple(
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
p = Uri("https://schema.org/description"),
o = Literal("Published in Review of International Studies"),
),
Triple(
s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
p = Uri("https://schema.org/publishedBy"),
o = Uri("https://trustgraph.ai/org/british-international-studies-association")
),
Triple(
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o = Uri("https://schema.org/Organization")
),
Triple(
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
o = Literal("British International Studies Association")
),
Triple(
s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
p = Uri("https://schema.org/name"),
o = Literal("British International Studies Association")
),
Triple(
s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
p = Uri("https://schema.org/url"),
o = Uri("https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf")
)
]
}
]
class Loader:
def __init__(
self, url, user
):
self.api = Api(url).library()
self.user = user
def load(self, documents):
for doc in documents:
self.load_doc(doc)
def load_doc(self, doc):
try:
print(doc["title"], ":")
print(" downloading...")
print(" done.")
resp = session.get(doc["url"])
content = resp.content
print(" adding...")
self.api.add_document(
id = doc["id"], metadata = doc["metadata"],
user = self.user, kind = doc["kind"], title = doc["title"],
comments = doc["comments"], tags = doc["tags"],
document = content
)
print(" successful.")
except Exception as e:
print("Failed: {str(e)}", flush=True)
raise e
def main():
parser = argparse.ArgumentParser(
prog='tg-add-library-document',
description=__doc__,
)
parser.add_argument(
'-u', '--url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-U', '--user',
default=default_user,
help=f'User ID (default: {default_user})'
)
args = parser.parse_args()
try:
p = Loader(
url=args.url,
user=args.user,
)
p.load(documents)
except Exception as e:
print("Exception:", e, flush=True)
raise e
main()
# https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf
# https://www.ialeia.org/docs/Psychology_of_Intelligence_Analysis.pdf

View file

@ -1,7 +1,10 @@
#!/usr/bin/env python3
"""
Loads a text document into TrustGraph processing.
Loads a text document into TrustGraph processing by directing to a text
loader queue.
Consider using tg-add-library-document to load
a document, followed by tg-start-library-processing to initiate processing.
"""
import pulsar

View file

@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""
Dumps out the current configuration
Uploads a flow class definition. You can take the output of
tg-get-flow-class and load it back in using this utility.
"""
import argparse
@ -37,7 +38,7 @@ def main():
parser.add_argument(
'-c', '--config',
help=f'Initial configuration to load',
help=f'Initial configuration to load, should be raw JSON',
)
args = parser.parse_args()

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Remove a PDF document from the library
Remove a document from the library
"""
import argparse

View file

@ -2,11 +2,11 @@
"""
This utility connects to a running TrustGraph through the API and creates
a knowledge core from the data streaming through the processing queues.
For completeness of data, tg-save-kg-core should be initiated before data
loading takes place. The default output format, msgpack should be used.
JSON output format is also available - msgpack produces a more compact
representation, which is also more performant to load.
a document embeddings core from the data streaming through the processing
queues. For completeness of data, tg-save-doc-embeds should be initiated
before data loading takes place. The default output format, msgpack
should be used. JSON output format is also available - msgpack produces
a more compact representation, which is also more performant to load.
"""
import aiohttp

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dumps out the current prompts
Sets a prompt template.
"""
import argparse
@ -63,7 +63,7 @@ def set_prompt(url, id, prompt, response, schema):
def main():
parser = argparse.ArgumentParser(
prog='tg-show-prompts',
prog='tg-set-prompt',
description=__doc__,
)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dumps out the current prompts
Sets a model's token costs.
"""
import argparse
@ -67,7 +67,7 @@ def set_prompt(url, id, prompt, response, schema):
def main():
parser = argparse.ArgumentParser(
prog='tg-show-prompts',
prog='tg-set-token-costs',
description=__doc__,
)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Shows all defined flow classes.
"""
import argparse

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dump out TrustGraph processor states.
Dump out a flow's processor states
"""
import requests

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Shows configured flows.
"""
import argparse

View file

@ -27,7 +27,7 @@ def show_graph(url, flow_id, user, collection):
def main():
parser = argparse.ArgumentParser(
prog='tg-graph-show',
prog='tg-show-graph',
description=__doc__,
)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Shows all loaded library documents
"""
import argparse
@ -29,14 +30,14 @@ def show_docs(url, user):
table.append(("time", doc.time))
table.append(("title", doc.title))
table.append(("kind", doc.kind))
table.append(("comments", doc.comments))
table.append(("note", doc.comments))
table.append(("tags", ", ".join(doc.tags)))
print(tabulate.tabulate(
table,
tablefmt="pretty",
stralign="left",
maxcolwidths=[None, 55],
maxcolwidths=[None, 67],
))
print()

View file

@ -31,7 +31,7 @@ def dump_status(url):
def main():
parser = argparse.ArgumentParser(
prog='tg-processor-state',
prog='tg-show-processor-state',
description=__doc__,
)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dumps out the current prompts
Dumps out token cost configuration
"""
import argparse
@ -53,7 +53,7 @@ def show_config(url):
def main():
parser = argparse.ArgumentParser(
prog='tg-show-prompts',
prog='tg-show-token-costs',
description=__doc__,
)

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Dumps out the current agent tools
Dumps out the current agent tool configuration
"""
import argparse
@ -60,7 +60,7 @@ def show_config(url):
def main():
parser = argparse.ArgumentParser(
prog='tg-show-prompts',
prog='tg-show-tools',
description=__doc__,
)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Starts a processing flow using a defined flow class
"""
import argparse

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Submits a library document for processing
"""
import argparse

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Stops a processing flow.
"""
import argparse

View file

@ -1,6 +1,8 @@
#!/usr/bin/env python3
"""
Removes a library document processing record. This is just a record of
procesing, it doesn't stop in-flight processing at the moment.
"""
import argparse