mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
feat: add internal metadata keys and clean metadata in document formatting
This commit is contained in:
parent
64a83f7ccc
commit
0fd03709c6
1 changed files with 18 additions and 1 deletions
|
|
@ -353,6 +353,20 @@ def _compute_tool_output_budget(max_input_tokens: int | None) -> int:
|
||||||
return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
|
return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
|
||||||
|
|
||||||
|
|
||||||
|
_INTERNAL_METADATA_KEYS: frozenset[str] = frozenset(
|
||||||
|
{
|
||||||
|
"message_id",
|
||||||
|
"thread_id",
|
||||||
|
"event_id",
|
||||||
|
"calendar_id",
|
||||||
|
"google_drive_file_id",
|
||||||
|
"page_id",
|
||||||
|
"issue_id",
|
||||||
|
"connector_id",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def format_documents_for_context(
|
def format_documents_for_context(
|
||||||
documents: list[dict[str, Any]],
|
documents: list[dict[str, Any]],
|
||||||
*,
|
*,
|
||||||
|
|
@ -481,7 +495,10 @@ def format_documents_for_context(
|
||||||
total_docs = len(grouped)
|
total_docs = len(grouped)
|
||||||
|
|
||||||
for doc_idx, g in enumerate(grouped.values()):
|
for doc_idx, g in enumerate(grouped.values()):
|
||||||
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
|
metadata_clean = {
|
||||||
|
k: v for k, v in g["metadata"].items() if k not in _INTERNAL_METADATA_KEYS
|
||||||
|
}
|
||||||
|
metadata_json = json.dumps(metadata_clean, ensure_ascii=False)
|
||||||
is_live_search = g["document_type"] in live_search_connectors
|
is_live_search = g["document_type"] in live_search_connectors
|
||||||
|
|
||||||
doc_lines: list[str] = [
|
doc_lines: list[str] = [
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue