mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-23 19:05:16 +02:00
feat: Multi File Support
This commit is contained in:
parent
743a43f50b
commit
a7f42f0a57
4 changed files with 19 additions and 30 deletions
|
|
@ -324,11 +324,9 @@ class HIndices:
|
|||
for i, doc in enumerate(contextdocs):
|
||||
content = f":DOCUMENT {str(i)}\n"
|
||||
content += f"=======================================METADATA==================================== \n"
|
||||
content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n"
|
||||
content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n"
|
||||
content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n"
|
||||
content += f"{doc.metadata} \n"
|
||||
content += f"===================================================================================== \n"
|
||||
content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n"
|
||||
content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n"
|
||||
content += f"===================================================================================== \n"
|
||||
|
||||
context_to_answer += content
|
||||
|
|
@ -362,11 +360,9 @@ class HIndices:
|
|||
for i, doc in enumerate(top_summaries_compressed_docs):
|
||||
content = f":DOCUMENT {str(i)}\n"
|
||||
content += f"=======================================METADATA==================================== \n"
|
||||
content += f"Webpage Url : {doc.metadata['VisitedWebPageURL']} \n"
|
||||
content += f"Webpage Title : {doc.metadata['VisitedWebPageTitle']} \n"
|
||||
content += f"Accessed on (Date With Time In ISO String): {doc.metadata['VisitedWebPageDateWithTimeInISOString']} \n"
|
||||
content += f"{doc.metadata} \n"
|
||||
content += f"===================================================================================== \n"
|
||||
content += f"Webpage CONTENT CHUCK: \n\n {doc.page_content} \n\n"
|
||||
content += f"DOCUMENT CONTENT: \n\n {doc.page_content} \n\n"
|
||||
content += f"===================================================================================== \n"
|
||||
|
||||
context_to_answer += content
|
||||
|
|
|
|||
|
|
@ -50,13 +50,7 @@ Detailed Report:"""
|
|||
ANSWER_WITH_CITATIONS = DATE_TODAY + """You're a helpful AI assistant. Given a user question and some Webpage article snippets, \
|
||||
answer the user question and provide citations. If none of the articles answer the question, just say you don't know.
|
||||
|
||||
Remember, you must return both an answer and citations. Citation information is in given Document Metadata.
|
||||
A citation consists of a “Web Page Title.” Website Name, URL. Accessed Day Month Year.
|
||||
|
||||
Citations Example:
|
||||
Citations
|
||||
1. “Citing Sources in Academic Writing.” Scribbr. www.scribbr.com/category/citing-sources/. Accessed 4 March 2021.
|
||||
2. “What is SEO?” Backlinko. www.backlinko.com/seo. Accessed 10 March 2022.
|
||||
Remember, you must return both an answer and citations. Citation information is given in Document Metadata.
|
||||
|
||||
Here are the Webpage article snippets:
|
||||
{context}
|
||||
|
|
|
|||
|
|
@ -14,14 +14,18 @@ class DocMeta(BaseModel):
|
|||
VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
|
||||
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
|
||||
|
||||
# class DocWithContent(BaseModel):
|
||||
# BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
|
||||
# VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
|
||||
# VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document")
|
||||
# VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document")
|
||||
# VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
|
||||
# VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
|
||||
# VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
|
||||
|
||||
class DocWithContent(BaseModel):
|
||||
BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
|
||||
VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
|
||||
VisitedWebPageTitle: Optional[str] = Field(default=None, description="VisitedWebPageTitle of Document")
|
||||
VisitedWebPageDateWithTimeInISOString: Optional[str] = Field(default=None, description="VisitedWebPageDateWithTimeInISOString of Document")
|
||||
VisitedWebPageReffererURL: Optional[str] = Field(default=None, description="VisitedWebPageReffererURL of Document")
|
||||
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
|
||||
VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
|
||||
DocMetadata: Optional[str] = Field(default=None, description="Document Metadata")
|
||||
Content: Optional[str] = Field(default=None, description="Document Page Content")
|
||||
|
||||
class DocumentsToDelete(BaseModel):
|
||||
ids_to_delete: List[str]
|
||||
|
|
|
|||
|
|
@ -216,14 +216,9 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
|||
returnDocs = []
|
||||
for doc in combined_docs_unique_documents:
|
||||
entry = DocWithContent(
|
||||
BrowsingSessionId=doc.metadata['BrowsingSessionId'],
|
||||
VisitedWebPageURL=doc.metadata['VisitedWebPageURL'],
|
||||
VisitedWebPageContent=doc.page_content,
|
||||
VisitedWebPageTitle=doc.metadata['VisitedWebPageTitle'],
|
||||
VisitedWebPageDateWithTimeInISOString=doc.metadata['VisitedWebPageDateWithTimeInISOString'],
|
||||
VisitedWebPageReffererURL=doc.metadata['VisitedWebPageReffererURL'],
|
||||
VisitedWebPageVisitDurationInMilliseconds=doc.metadata['VisitedWebPageVisitDurationInMilliseconds'],
|
||||
)
|
||||
DocMetadata=stringify(doc.metadata),
|
||||
Content=doc.page_content
|
||||
)
|
||||
|
||||
returnDocs.append(entry)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue