feat: Multi File Support

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2024-10-08 01:33:16 -07:00
parent 743a43f50b
commit a7f42f0a57
4 changed files with 19 additions and 30 deletions

View file

@ -324,11 +324,9 @@ class HIndices:
for i, doc in enumerate(contextdocs):
content = f":DOCUMENT {str(i)}\n"
content += f"=======================================METADATA==================================== \n"
content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n"
content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n"
content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n"
content += f"{doc.metadata} \n"
content += f"===================================================================================== \n"
content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n"
content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n"
content += f"===================================================================================== \n"
context_to_answer += content
@ -362,11 +360,9 @@ class HIndices:
for i, doc in enumerate(top_summaries_compressed_docs):
content = f":DOCUMENT {str(i)}\n"
content += f"=======================================METADATA==================================== \n"
content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n"
content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n"
content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n"
content += f"{doc.metadata} \n"
content += f"===================================================================================== \n"
content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n"
content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n"
content += f"===================================================================================== \n"
context_to_answer += content

View file

@ -50,13 +50,7 @@ Detailed Report:"""
ANSWER_WITH_CITATIONS = DATE_TODAY + """You're a helpful AI assistant. Given a user question and some Webpage article snippets, \
answer the user question and provide citations. If none of the articles answer the question, just say you don't know.
Remember, you must return both an answer and citations. Citation information is in given Document Metadata.
A citation consists of a Web Page Title. Website Name, URL. Accessed Day Month Year.
Citations Example:
Citations
1. Citing Sources in Academic Writing. Scribbr. www.scribbr.com/category/citing-sources/. Accessed 4 March 2021.
2. What is SEO? Backlinko. www.backlinko.com/seo. Accessed 10 March 2022.
Remember, you must return both an answer and citations. Citation information is given in Document Metadata.
Here are the Webpage article snippets:
{context}

View file

@ -14,14 +14,18 @@ class DocMeta(BaseModel):
VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
# class DocWithContent(BaseModel):
# BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
# VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
# VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document")
# VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document")
# VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
# VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
# VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
class DocWithContent(BaseModel):
BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document")
VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document")
VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
DocMetadata: Optional[str] = Field(default=None, description="Document Metadata")
Content: Optional[str] = Field(default=None, description="Document Page Content")
class DocumentsToDelete(BaseModel):
ids_to_delete: List[str]

View file

@ -216,14 +216,9 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
returnDocs = []
for doc in combined_docs_unique_documents:
entry = DocWithContent(
BrowsingSessionId=doc.metadata['BrowsingSessionId'],
VisitedWebPageURL=doc.metadata['VisitedWebPageURL'],
VisitedWebPageContent=doc.page_content,
VisitedWebPageTitle=doc.metadata['VisitedWebPageTitle'],
VisitedWebPageDateWithTimeInISOString=doc.metadata['VisitedWebPageDateWithTimeInISOString'],
VisitedWebPageReffererURL=doc.metadata['VisitedWebPageReffererURL'],
VisitedWebPageVisitDurationInMilliseconds=doc.metadata['VisitedWebPageVisitDurationInMilliseconds'],
)
DocMetadata=stringify(doc.metadata),
Content=doc.page_content
)
returnDocs.append(entry)