fix: GraphQAchain Invalid prompts

2026-07-02 22:01:05 +02:00 · 2024-08-13 19:56:54 -07:00 · 2024-08-13 19:56:54 -07:00 · 4b3148fc3e
commit 4b3148fc3e
parent 6d8e1fe994
47 changed files with 62 additions and 5849 deletions
--- a/backend/DataExample.py
+++ b/backend/DataExample.py
--- a/backend/prompts.py
+++ b/backend/prompts.py
@ -3,20 +3,44 @@ from langchain_core.prompts import ChatPromptTemplate
 from datetime import datetime, timezone


+
+
 DATE_TODAY = "Today's date is " + datetime.now(timezone.utc).astimezone().isoformat() + '\n'

+GRAPH_QUERY_GEN_TEMPLATE = DATE_TODAY + """You are a top tier Prompt Engineering Expert.
+A User's Data is stored in a Knowledge Graph.
+Your main task is to read the User Question below and give a optimized Question prompt in Natural Language.
+Question prompt will be used by a LLM to easlily get data from Knowledge Graph's.
+
+Make sure to only return the promt text thats it. Never change the meaning of users question.
+
+Here are the examples of the User's Data Documents that is stored in Knowledge Graph:
+{context}
+
+Note: Do not include any explanations or apologies in your responses.
+Do not include any text except the generated promt text.
+
+Question: {question}
+Prompt For Cypher Query Construction:"""
+
+GRAPH_QUERY_GEN_PROMPT = PromptTemplate(
+    input_variables=["context", "question"], template=GRAPH_QUERY_GEN_TEMPLATE
+)
+
 CYPHER_QA_TEMPLATE = DATE_TODAY + """You are an assistant that helps to form nice and human understandable answers.
 The information part contains the provided information that you must use to construct an answer.
 The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
 Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
+Only give the answer if it satisfies the user requirements in Question. Else return exactly 'don't know' as answer.
+
 Here are the examples:

-Question: Website on which the most time was spend on?
-Context:[{'d.VisitedWebPageURL': 'https://stackoverflow.com/questions/59873698/the-default-export-is-not-a-react-component-in-page-nextjs', 'totalDuration': 8889167}]
-Helpful Answer: You visited https://stackoverflow.com/questions/59873698/the-default-export-is-not-a-react-component-in-page-nextjs for 8889167 milliseconds or 8889.167 seconds.
+Question: What type of general topics I explore the most?
+Context:[['Topic': 'Langchain', 'topicCount': 5], ['Topic': 'Graphrag', 'topicCount': 2], ['Topic': 'Ai', 'topicCount': 2], ['Topic': 'Fastapi', 'topicCount': 2], ['Topic': 'Nextjs', 'topicCount': 1]]
+Helpful Answer: You mostly explore about Langchain, Graphrag, Ai, Fastapi and Nextjs.

 Follow this example when generating answers.
-If the provided information is empty, then and only then, return exactly 'don't know' as answer.
+If the provided information is empty or incomplete, return exactly 'don't know' as answer.

 Information:
 {context}
@ -54,7 +78,6 @@ Note: Do not include any explanations or apologies in your responses.
 Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
 Do not include any text except the generated Cypher statement.

-
 The question is:
 {question}"""
 CYPHER_GENERATION_PROMPT = PromptTemplate(
--- a/backend/pydmodels.py
+++ b/backend/pydmodels.py
@ -38,4 +38,8 @@ class RetrivedDocList(BaseModel):
 class UserQueryResponse(BaseModel):
    response: str
    relateddocs: List[DocMeta]
+    
+
+class VectorSearchQuery(BaseModel):
+    searchquery: str
    
--- a/backend/server.py
+++ b/backend/server.py
@ -6,13 +6,14 @@ from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Neo4jVector
 from envs import ACCESS_TOKEN_EXPIRE_MINUTES, ALGORITHM, API_SECRET_KEY, SECRET_KEY
-from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
-from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse
+from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
+from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, VectorSearchQuery
 from langchain_experimental.text_splitter import SemanticChunker

 #Our Imps
 from LLMGraphTransformer import LLMGraphTransformer
 from langchain_openai import ChatOpenAI
+from DataExample import examples

 # Auth Libs
 from fastapi import FastAPI, Depends, HTTPException, Request, status
@ -39,8 +40,6 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
        raise HTTPException(status_code=401, detail="Unauthorized")
    
    
-    query = data.query
-    
    graph = Neo4jGraph(url=data.neourl, username=data.neouser, password=data.neopass)
    
    llm = ChatOpenAI(
@ -51,6 +50,13 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
        api_key=data.openaikey
    )
    
+    # Query Expansion
+    searchchain = GRAPH_QUERY_GEN_PROMPT | llm
+        
+    qry = searchchain.invoke({"question": data.query, "context": examples})
+    
+    query = qry.content
+    
    embeddings = OpenAIEmbeddings(
        model="text-embedding-ada-002",
        api_key=data.openaikey,
@ -96,19 +102,22 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
        )
                
    docstoreturn = [i for n, i in enumerate(docstoreturn) if i not in docstoreturn[n + 1:]]
-
+    
+    # responsegrp = chain.invoke({"query": query})
+ 

    try:
-        response = chain.invoke({"query": query})
-        if "don't know" in response["result"]:
+        responsegrp = chain.invoke({"query": query})
+           
+        if "don't know" in responsegrp["result"]:
            raise Exception("No response from graph")
        
-        structured_llm = llm.with_structured_output(RetrivedDocList)
+        structured_llm = llm.with_structured_output(VectorSearchQuery)
        doc_extract_chain = DOCUMENT_METADATA_EXTRACTION_PROMT | structured_llm
        
-        query = doc_extract_chain.invoke(response["intermediate_steps"][1]["context"])
+        newquery = doc_extract_chain.invoke(responsegrp["intermediate_steps"][1]["context"])
        
-        docs = vector_index.similarity_search(query.searchquery,k=5)
+        docs = vector_index.similarity_search(newquery.searchquery,k=5)
    
        docstoreturn = []
        
@ -127,12 +136,12 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
                    
        docstoreturn = [i for n, i in enumerate(docstoreturn) if i not in docstoreturn[n + 1:]]
        
-        return UserQueryResponse(relateddocs=docstoreturn,response=response["result"])
+        return UserQueryResponse(relateddocs=docstoreturn,response=responsegrp["result"])
    except:
        # Fallback to Similarity Search RAG
        searchchain = SIMILARITY_SEARCH_PROMPT | llm
        
-        response = searchchain.invoke({"question": query, "context": docs})
+        response = searchchain.invoke({"question": data.query, "context": docs})
        
        return UserQueryResponse(relateddocs=docstoreturn,response=response.content)