From a7f42f0a575e1d53b29661a79508121c1e688178 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 8 Oct 2024 01:33:16 -0700 Subject: [PATCH] feat: Multi File Support --- backend/HIndices.py | 12 ++++-------- backend/prompts.py | 8 +------- backend/pydmodels.py | 18 +++++++++++------- backend/server.py | 11 +++-------- 4 files changed, 19 insertions(+), 30 deletions(-) diff --git a/backend/HIndices.py b/backend/HIndices.py index b08f1c94f..e9f85f1b7 100644 --- a/backend/HIndices.py +++ b/backend/HIndices.py @@ -324,11 +324,9 @@ class HIndices: for i, doc in enumerate(contextdocs): content = f":DOCUMENT {str(i)}\n" content += f"=======================================METADATA==================================== \n" - content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n" - content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n" - content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n" + content += f"{doc.metadata} \n" content += f"===================================================================================== \n" - content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n" + content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n" content += f"===================================================================================== \n" context_to_answer += content @@ -362,11 +360,9 @@ class HIndices: for i, doc in enumerate(top_summaries_compressed_docs): content = f":DOCUMENT {str(i)}\n" content += f"=======================================METADATA==================================== \n" - content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n" - content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n" - content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n" + content += f"{doc.metadata} \n" content += f"===================================================================================== \n" - content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n" + content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n" content += f"===================================================================================== \n" context_to_answer += content diff --git a/backend/prompts.py b/backend/prompts.py index 393ed643c..c97577e47 100644 --- a/backend/prompts.py +++ b/backend/prompts.py @@ -50,13 +50,7 @@ Detailed Report:""" ANSWER_WITH_CITATIONS = DATE_TODAY + """You're a helpful AI assistant. Given a user question and some Webpage article snippets, \ answer the user question and provide citations. If none of the articles answer the question, just say you don't know. -Remember, you must return both an answer and citations. Citation information is in given Document Metadata. -A citation consists of a “Web Page Title.” Website Name, URL. Accessed Day Month Year. - -Citations Example: -Citations -1. “Citing Sources in Academic Writing.” Scribbr. www.scribbr.com/category/citing-sources/. Accessed 4 March 2021. -2. “What is SEO?” Backlinko. www.backlinko.com/seo. Accessed 10 March 2022. +Remember, you must return both an answer and citations. Citation information is given in Document Metadata. Here are the Webpage article snippets: {context} diff --git a/backend/pydmodels.py b/backend/pydmodels.py index b4285e389..7330d4278 100644 --- a/backend/pydmodels.py +++ b/backend/pydmodels.py @@ -14,14 +14,18 @@ class DocMeta(BaseModel): VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document") VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"), +# class DocWithContent(BaseModel): +# BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document") +# VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document") +# VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document") +# VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document") +# VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document") +# VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"), +# VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document") + class DocWithContent(BaseModel): - BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document") - VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document") - VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document") - VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document") - VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document") - VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"), - VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document") + DocMetadata: Optional[str] = Field(default=None, description="Document Metadata") + Content: Optional[str] = Field(default=None, description="Document Page Content") class DocumentsToDelete(BaseModel): ids_to_delete: List[str] diff --git a/backend/server.py b/backend/server.py index ad5eb3c5b..2ab43e50b 100644 --- a/backend/server.py +++ b/backend/server.py @@ -216,14 +216,9 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse): returnDocs = [] for doc in combined_docs_unique_documents: entry = DocWithContent( - BrowsingSessionId=doc.metadata['BrowsingSessionId'], - VisitedWebPageURL=doc.metadata['VisitedWebPageURL'], - VisitedWebPageContent=doc.page_content, - VisitedWebPageTitle=doc.metadata['VisitedWebPageTitle'], - VisitedWebPageDateWithTimeInISOString=doc.metadata['VisitedWebPageDateWithTimeInISOString'], - VisitedWebPageReffererURL=doc.metadata['VisitedWebPageReffererURL'], - VisitedWebPageVisitDurationInMilliseconds=doc.metadata['VisitedWebPageVisitDurationInMilliseconds'], - ) + DocMetadata=stringify(doc.metadata), + Content=doc.page_content + ) returnDocs.append(entry)