mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
129 lines
2.8 KiB
Text
129 lines
2.8 KiB
Text
|
|
#!/usr/bin/env python3
|
||
|
|
|
||
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||
|
|
from edge_map import VectorStore
|
||
|
|
from trustgraph import TrustGraph
|
||
|
|
from llm_ollama import Llm
|
||
|
|
|
||
|
|
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
||
|
|
DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
|
||
|
|
|
||
|
|
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
||
|
|
|
||
|
|
client = VectorStore()
|
||
|
|
|
||
|
|
tg = TrustGraph()
|
||
|
|
|
||
|
|
entity_limit=50
|
||
|
|
query_limit=30
|
||
|
|
max_sg_size=3000
|
||
|
|
|
||
|
|
query = """Explain how CISA contributes to incident management in the
|
||
|
|
US government economy"""
|
||
|
|
|
||
|
|
# query = """Explain in a step-by-step form how CISA helps new legislation to emerge for national security"""
|
||
|
|
|
||
|
|
query = """An insider threat has been discovered inside CISA.
|
||
|
|
The employee has been discovered to be stealing data using admin credentials.
|
||
|
|
Provide 3 theories as to the cause of the failure in hiring this individual,
|
||
|
|
referencing oversight information in the knowledge graph provided.
|
||
|
|
"""
|
||
|
|
|
||
|
|
query = """An insider threat has been discovered inside CISA.
|
||
|
|
Their job title is Protective Security Advisor. What threats are posed
|
||
|
|
to to CISA by the compromise of somoene in this role?
|
||
|
|
Provide 3 hypothetical compromise scenarios which could be cause by
|
||
|
|
someone in such a role being compromised as an inside threat.
|
||
|
|
"""
|
||
|
|
|
||
|
|
query = """Provide three hypothetical causes for the space shuttle
|
||
|
|
disaster. For each hypothesis, provided a set of step-by-step events
|
||
|
|
which lead to the disaster.
|
||
|
|
"""
|
||
|
|
|
||
|
|
qembeds = embeddings.embed_documents([query])[0]
|
||
|
|
|
||
|
|
res = client.search(
|
||
|
|
qembeds,
|
||
|
|
limit=entity_limit
|
||
|
|
)
|
||
|
|
|
||
|
|
entities = set([
|
||
|
|
item["entity"]["entity"]
|
||
|
|
for item in res
|
||
|
|
])
|
||
|
|
|
||
|
|
subgraph = set()
|
||
|
|
|
||
|
|
for e in entities:
|
||
|
|
|
||
|
|
res = tg.get_s(e, limit=query_limit)
|
||
|
|
for p, o in res:
|
||
|
|
subgraph.add((e, p, o))
|
||
|
|
|
||
|
|
res = tg.get_p(e, limit=query_limit)
|
||
|
|
for s, o in res:
|
||
|
|
subgraph.add((s, e, o))
|
||
|
|
|
||
|
|
res = tg.get_o(e, limit=query_limit)
|
||
|
|
for s, p in res:
|
||
|
|
subgraph.add((s, p, e))
|
||
|
|
|
||
|
|
subgraph = list(subgraph)
|
||
|
|
|
||
|
|
subgraph = subgraph[0:max_sg_size]
|
||
|
|
|
||
|
|
cache = {}
|
||
|
|
|
||
|
|
def maybe_label(e):
|
||
|
|
|
||
|
|
if e in cache:
|
||
|
|
return cache[e]
|
||
|
|
|
||
|
|
res = tg.get_sp(e, LABEL)
|
||
|
|
res = list(res)
|
||
|
|
|
||
|
|
if len(res) == 0:
|
||
|
|
cache[e] = e
|
||
|
|
return e
|
||
|
|
|
||
|
|
cache[e] = res[0][0]
|
||
|
|
return cache[e]
|
||
|
|
|
||
|
|
sg2 = []
|
||
|
|
|
||
|
|
for edge in subgraph:
|
||
|
|
|
||
|
|
if edge[1] == LABEL:
|
||
|
|
continue
|
||
|
|
|
||
|
|
s = maybe_label(edge[0])
|
||
|
|
p = maybe_label(edge[1])
|
||
|
|
o = maybe_label(edge[2])
|
||
|
|
|
||
|
|
sg2.append(f"({s})-[{p}]->({o})")
|
||
|
|
|
||
|
|
prompt=f"""<instructions>Study the knowledge graph provided, and use
|
||
|
|
the information to answer the question. The question should be answered
|
||
|
|
in plain English only.
|
||
|
|
</instructions>
|
||
|
|
<knowledge-graph>
|
||
|
|
{"\n".join(sg2)}
|
||
|
|
</knowledge-graph>
|
||
|
|
<question>
|
||
|
|
{query}
|
||
|
|
</question>
|
||
|
|
"""
|
||
|
|
|
||
|
|
print(prompt)
|
||
|
|
|
||
|
|
print(len(prompt))
|
||
|
|
|
||
|
|
# llm = Llm("deepseek-v2")
|
||
|
|
# llm = Llm("gemma2")
|
||
|
|
llm = Llm("phi3:14b")
|
||
|
|
resp = llm(prompt)
|
||
|
|
|
||
|
|
print(resp)
|
||
|
|
|