trustgraph/query

#!/usr/bin/env python3

from langchain_huggingface import HuggingFaceEmbeddings
from edge_map import VectorStore
from trustgraph import TrustGraph
from llm_ollama import Llm

LABEL="http://www.w3.org/2000/01/rdf-schema#label"
DEFINITION="http://www.w3.org/2004/02/skos/core#definition"

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

client = VectorStore()

tg = TrustGraph()

entity_limit=50
query_limit=30
max_sg_size=3000

query = """Explain how CISA contributes to incident management in the
US government economy"""

# query = """Explain in a step-by-step form how CISA helps new legislation to emerge for national security"""

query = """An insider threat has been discovered inside CISA.
The employee has been discovered to be stealing data using admin credentials.
Provide 3 theories as to the cause of the failure in hiring this individual,
referencing oversight information in the knowledge graph provided.
"""

query = """An insider threat has been discovered inside CISA.
Their job title is Protective Security Advisor.  What threats are posed
to to CISA by the compromise of somoene in this role?
Provide 3 hypothetical compromise scenarios which could be cause by
someone in such a role being compromised as an inside threat.
"""

query = """Provide three hypothetical causes for the space shuttle
disaster.  For each hypothesis, provided a set of step-by-step events
which lead to the disaster.
"""

qembeds = embeddings.embed_documents([query])[0]

res = client.search(
    qembeds,
    limit=entity_limit
)

entities = set([
    item["entity"]["entity"]
    for item in res
])

subgraph = set()

for e in entities:

    res = tg.get_s(e, limit=query_limit)
    for p, o in res:
        subgraph.add((e, p, o))

    res = tg.get_p(e, limit=query_limit)
    for s, o in res:
        subgraph.add((s, e, o))

    res = tg.get_o(e, limit=query_limit)
    for s, p in res:
        subgraph.add((s, p, e))

subgraph = list(subgraph)

subgraph = subgraph[0:max_sg_size]

cache = {}

def maybe_label(e):

    if e in cache:
        return cache[e]

    res = tg.get_sp(e, LABEL)
    res = list(res)

    if len(res) == 0:
        cache[e] = e
        return e

    cache[e] = res[0][0]
    return cache[e]

sg2 = []

for edge in subgraph:

    if edge[1] == LABEL:
        continue

    s = maybe_label(edge[0])
    p = maybe_label(edge[1])
    o = maybe_label(edge[2])

    sg2.append(f"({s})-[{p}]->({o})")

prompt=f"""<instructions>Study the knowledge graph provided, and use
the information to answer the question.  The question should be answered
in plain English only.
</instructions>
<knowledge-graph>
{"\n".join(sg2)}
</knowledge-graph>
<question>
{query}
</question>
"""

print(prompt)

print(len(prompt))

# llm = Llm("deepseek-v2")
# llm = Llm("gemma2")
llm = Llm("phi3:14b")
resp = llm(prompt)

print(resp)
Trustgraph, first drop of code 2024-07-10 17:04:24 +01:00			`#!/usr/bin/env python3`

			`from langchain_huggingface import HuggingFaceEmbeddings`
			`from edge_map import VectorStore`
			`from trustgraph import TrustGraph`
			`from llm_ollama import Llm`

			`LABEL="http://www.w3.org/2000/01/rdf-schema#label"`
			`DEFINITION="http://www.w3.org/2004/02/skos/core#definition"`

			`embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")`

			`client = VectorStore()`

			`tg = TrustGraph()`

			`entity_limit=50`
			`query_limit=30`
			`max_sg_size=3000`

			`query = """Explain how CISA contributes to incident management in the`
			`US government economy"""`

			`# query = """Explain in a step-by-step form how CISA helps new legislation to emerge for national security"""`

			`query = """An insider threat has been discovered inside CISA.`
			`The employee has been discovered to be stealing data using admin credentials.`
			`Provide 3 theories as to the cause of the failure in hiring this individual,`
			`referencing oversight information in the knowledge graph provided.`
			`"""`

			`query = """An insider threat has been discovered inside CISA.`
			`Their job title is Protective Security Advisor. What threats are posed`
			`to to CISA by the compromise of somoene in this role?`
			`Provide 3 hypothetical compromise scenarios which could be cause by`
			`someone in such a role being compromised as an inside threat.`
			`"""`

			`query = """Provide three hypothetical causes for the space shuttle`
			`disaster. For each hypothesis, provided a set of step-by-step events`
			`which lead to the disaster.`
			`"""`

			`qembeds = embeddings.embed_documents([query])[0]`

			`res = client.search(`
			`qembeds,`
			`limit=entity_limit`
			`)`

			`entities = set([`
			`item["entity"]["entity"]`
			`for item in res`
			`])`

			`subgraph = set()`

			`for e in entities:`

			`res = tg.get_s(e, limit=query_limit)`
			`for p, o in res:`
			`subgraph.add((e, p, o))`

			`res = tg.get_p(e, limit=query_limit)`
			`for s, o in res:`
			`subgraph.add((s, e, o))`

			`res = tg.get_o(e, limit=query_limit)`
			`for s, p in res:`
			`subgraph.add((s, p, e))`

			`subgraph = list(subgraph)`

			`subgraph = subgraph[0:max_sg_size]`

			`cache = {}`

			`def maybe_label(e):`

			`if e in cache:`
			`return cache[e]`

			`res = tg.get_sp(e, LABEL)`
			`res = list(res)`

			`if len(res) == 0:`
			`cache[e] = e`
			`return e`

			`cache[e] = res[0][0]`
			`return cache[e]`

			`sg2 = []`

			`for edge in subgraph:`

			`if edge[1] == LABEL:`
			`continue`

			`s = maybe_label(edge[0])`
			`p = maybe_label(edge[1])`
			`o = maybe_label(edge[2])`

			`sg2.append(f"({s})-[{p}]->({o})")`

			`prompt=f"""<instructions>Study the knowledge graph provided, and use`
			`the information to answer the question. The question should be answered`
			`in plain English only.`
			`</instructions>`
			`<knowledge-graph>`
			`{"\n".join(sg2)}`
			`</knowledge-graph>`
			`<question>`
			`{query}`
			`</question>`
			`"""`

			`print(prompt)`

			`print(len(prompt))`

			`# llm = Llm("deepseek-v2")`
			`# llm = Llm("gemma2")`
			`llm = Llm("phi3:14b")`
			`resp = llm(prompt)`

			`print(resp)`