fix: GraphQAchain Invalid prompts

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2024-08-13 19:56:54 -07:00
parent 6d8e1fe994
commit 4b3148fc3e
47 changed files with 62 additions and 5849 deletions

3
backend/DataExample.py Normal file

File diff suppressed because one or more lines are too long

View file

@ -3,20 +3,44 @@ from langchain_core.prompts import ChatPromptTemplate
from datetime import datetime, timezone
DATE_TODAY = "Today's date is " + datetime.now(timezone.utc).astimezone().isoformat() + '\n'
GRAPH_QUERY_GEN_TEMPLATE = DATE_TODAY + """You are a top tier Prompt Engineering Expert.
A User's Data is stored in a Knowledge Graph.
Your main task is to read the User Question below and give a optimized Question prompt in Natural Language.
Question prompt will be used by a LLM to easlily get data from Knowledge Graph's.
Make sure to only return the promt text thats it. Never change the meaning of users question.
Here are the examples of the User's Data Documents that is stored in Knowledge Graph:
{context}
Note: Do not include any explanations or apologies in your responses.
Do not include any text except the generated promt text.
Question: {question}
Prompt For Cypher Query Construction:"""
GRAPH_QUERY_GEN_PROMPT = PromptTemplate(
input_variables=["context", "question"], template=GRAPH_QUERY_GEN_TEMPLATE
)
CYPHER_QA_TEMPLATE = DATE_TODAY + """You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
Only give the answer if it satisfies the user requirements in Question. Else return exactly 'don't know' as answer.
Here are the examples:
Question: Website on which the most time was spend on?
Context:[{'d.VisitedWebPageURL': 'https://stackoverflow.com/questions/59873698/the-default-export-is-not-a-react-component-in-page-nextjs', 'totalDuration': 8889167}]
Helpful Answer: You visited https://stackoverflow.com/questions/59873698/the-default-export-is-not-a-react-component-in-page-nextjs for 8889167 milliseconds or 8889.167 seconds.
Question: What type of general topics I explore the most?
Context:[['Topic': 'Langchain', 'topicCount': 5], ['Topic': 'Graphrag', 'topicCount': 2], ['Topic': 'Ai', 'topicCount': 2], ['Topic': 'Fastapi', 'topicCount': 2], ['Topic': 'Nextjs', 'topicCount': 1]]
Helpful Answer: You mostly explore about Langchain, Graphrag, Ai, Fastapi and Nextjs.
Follow this example when generating answers.
If the provided information is empty, then and only then, return exactly 'don't know' as answer.
If the provided information is empty or incomplete, return exactly 'don't know' as answer.
Information:
{context}
@ -54,7 +78,6 @@ Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
The question is:
{question}"""
CYPHER_GENERATION_PROMPT = PromptTemplate(

View file

@ -38,4 +38,8 @@ class RetrivedDocList(BaseModel):
class UserQueryResponse(BaseModel):
response: str
relateddocs: List[DocMeta]
class VectorSearchQuery(BaseModel):
searchquery: str

View file

@ -6,13 +6,14 @@ from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Neo4jVector
from envs import ACCESS_TOKEN_EXPIRE_MINUTES, ALGORITHM, API_SECRET_KEY, SECRET_KEY
from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse
from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, VectorSearchQuery
from langchain_experimental.text_splitter import SemanticChunker
#Our Imps
from LLMGraphTransformer import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from DataExample import examples
# Auth Libs
from fastapi import FastAPI, Depends, HTTPException, Request, status
@ -39,8 +40,6 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
raise HTTPException(status_code=401, detail="Unauthorized")
query = data.query
graph = Neo4jGraph(url=data.neourl, username=data.neouser, password=data.neopass)
llm = ChatOpenAI(
@ -51,6 +50,13 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
api_key=data.openaikey
)
# Query Expansion
searchchain = GRAPH_QUERY_GEN_PROMPT | llm
qry = searchchain.invoke({"question": data.query, "context": examples})
query = qry.content
embeddings = OpenAIEmbeddings(
model="text-embedding-ada-002",
api_key=data.openaikey,
@ -96,19 +102,22 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
)
docstoreturn = [i for n, i in enumerate(docstoreturn) if i not in docstoreturn[n + 1:]]
# responsegrp = chain.invoke({"query": query})
try:
response = chain.invoke({"query": query})
if "don't know" in response["result"]:
responsegrp = chain.invoke({"query": query})
if "don't know" in responsegrp["result"]:
raise Exception("No response from graph")
structured_llm = llm.with_structured_output(RetrivedDocList)
structured_llm = llm.with_structured_output(VectorSearchQuery)
doc_extract_chain = DOCUMENT_METADATA_EXTRACTION_PROMT | structured_llm
query = doc_extract_chain.invoke(response["intermediate_steps"][1]["context"])
newquery = doc_extract_chain.invoke(responsegrp["intermediate_steps"][1]["context"])
docs = vector_index.similarity_search(query.searchquery,k=5)
docs = vector_index.similarity_search(newquery.searchquery,k=5)
docstoreturn = []
@ -127,12 +136,12 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
docstoreturn = [i for n, i in enumerate(docstoreturn) if i not in docstoreturn[n + 1:]]
return UserQueryResponse(relateddocs=docstoreturn,response=response["result"])
return UserQueryResponse(relateddocs=docstoreturn,response=responsegrp["result"])
except:
# Fallback to Similarity Search RAG
searchchain = SIMILARITY_SEARCH_PROMPT | llm
response = searchchain.invoke({"question": query, "context": docs})
response = searchchain.invoke({"question": data.query, "context": docs})
return UserQueryResponse(relateddocs=docstoreturn,response=response.content)