#!/usr/bin/env python3 from langchain_huggingface import HuggingFaceEmbeddings from edge_map import VectorStore from trustgraph import TrustGraph from llm_ollama import Llm LABEL="http://www.w3.org/2000/01/rdf-schema#label" DEFINITION="http://www.w3.org/2004/02/skos/core#definition" embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") client = VectorStore() tg = TrustGraph() entity_limit=50 query_limit=30 max_sg_size=3000 query = """Explain how CISA contributes to incident management in the US government economy""" # query = """Explain in a step-by-step form how CISA helps new legislation to emerge for national security""" query = """An insider threat has been discovered inside CISA. The employee has been discovered to be stealing data using admin credentials. Provide 3 theories as to the cause of the failure in hiring this individual, referencing oversight information in the knowledge graph provided. """ query = """An insider threat has been discovered inside CISA. Their job title is Protective Security Advisor. What threats are posed to to CISA by the compromise of somoene in this role? Provide 3 hypothetical compromise scenarios which could be cause by someone in such a role being compromised as an inside threat. """ query = """Provide three hypothetical causes for the space shuttle disaster. For each hypothesis, provided a set of step-by-step events which lead to the disaster. """ qembeds = embeddings.embed_documents([query])[0] res = client.search( qembeds, limit=entity_limit ) entities = set([ item["entity"]["entity"] for item in res ]) subgraph = set() for e in entities: res = tg.get_s(e, limit=query_limit) for p, o in res: subgraph.add((e, p, o)) res = tg.get_p(e, limit=query_limit) for s, o in res: subgraph.add((s, e, o)) res = tg.get_o(e, limit=query_limit) for s, p in res: subgraph.add((s, p, e)) subgraph = list(subgraph) subgraph = subgraph[0:max_sg_size] cache = {} def maybe_label(e): if e in cache: return cache[e] res = tg.get_sp(e, LABEL) res = list(res) if len(res) == 0: cache[e] = e return e cache[e] = res[0][0] return cache[e] sg2 = [] for edge in subgraph: if edge[1] == LABEL: continue s = maybe_label(edge[0]) p = maybe_label(edge[1]) o = maybe_label(edge[2]) sg2.append(f"({s})-[{p}]->({o})") prompt=f"""Study the knowledge graph provided, and use the information to answer the question. The question should be answered in plain English only. {"\n".join(sg2)} {query} """ print(prompt) print(len(prompt)) # llm = Llm("deepseek-v2") # llm = Llm("gemma2") llm = Llm("phi3:14b") resp = llm(prompt) print(resp)