trustgraph/query

129 lines
2.8 KiB
Text
Raw Normal View History

2024-07-10 17:04:24 +01:00
#!/usr/bin/env python3
from langchain_huggingface import HuggingFaceEmbeddings
from edge_map import VectorStore
from trustgraph import TrustGraph
from llm_ollama import Llm
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
client = VectorStore()
tg = TrustGraph()
entity_limit=50
query_limit=30
max_sg_size=3000
query = """Explain how CISA contributes to incident management in the
US government economy"""
# query = """Explain in a step-by-step form how CISA helps new legislation to emerge for national security"""
query = """An insider threat has been discovered inside CISA.
The employee has been discovered to be stealing data using admin credentials.
Provide 3 theories as to the cause of the failure in hiring this individual,
referencing oversight information in the knowledge graph provided.
"""
query = """An insider threat has been discovered inside CISA.
Their job title is Protective Security Advisor. What threats are posed
to to CISA by the compromise of somoene in this role?
Provide 3 hypothetical compromise scenarios which could be cause by
someone in such a role being compromised as an inside threat.
"""
query = """Provide three hypothetical causes for the space shuttle
disaster. For each hypothesis, provided a set of step-by-step events
which lead to the disaster.
"""
qembeds = embeddings.embed_documents([query])[0]
res = client.search(
qembeds,
limit=entity_limit
)
entities = set([
item["entity"]["entity"]
for item in res
])
subgraph = set()
for e in entities:
res = tg.get_s(e, limit=query_limit)
for p, o in res:
subgraph.add((e, p, o))
res = tg.get_p(e, limit=query_limit)
for s, o in res:
subgraph.add((s, e, o))
res = tg.get_o(e, limit=query_limit)
for s, p in res:
subgraph.add((s, p, e))
subgraph = list(subgraph)
subgraph = subgraph[0:max_sg_size]
cache = {}
def maybe_label(e):
if e in cache:
return cache[e]
res = tg.get_sp(e, LABEL)
res = list(res)
if len(res) == 0:
cache[e] = e
return e
cache[e] = res[0][0]
return cache[e]
sg2 = []
for edge in subgraph:
if edge[1] == LABEL:
continue
s = maybe_label(edge[0])
p = maybe_label(edge[1])
o = maybe_label(edge[2])
sg2.append(f"({s})-[{p}]->({o})")
prompt=f"""<instructions>Study the knowledge graph provided, and use
the information to answer the question. The question should be answered
in plain English only.
</instructions>
<knowledge-graph>
{"\n".join(sg2)}
</knowledge-graph>
<question>
{query}
</question>
"""
print(prompt)
print(len(prompt))
# llm = Llm("deepseek-v2")
# llm = Llm("gemma2")
llm = Llm("phi3:14b")
resp = llm(prompt)
print(resp)