From 31982cea9a7282749f2e06d6dd290dd083b2be0d Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 14 Oct 2025 14:19:48 -0700 Subject: [PATCH] chore: removed content trunking for better UI --- .../app/agents/researcher/nodes.py | 78 ++++--------------- .../app/connectors/slack_history.py | 40 ---------- .../app/services/connector_service.py | 49 ++++-------- .../app/services/streaming_service.py | 2 +- .../connector_indexers/confluence_indexer.py | 4 +- .../google_calendar_indexer.py | 4 +- .../connector_indexers/linear_indexer.py | 4 +- .../tasks/connector_indexers/luma_indexer.py | 4 +- surfsense_web/components/homepage/navbar.tsx | 2 +- 9 files changed, 42 insertions(+), 145 deletions(-) diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 2b113785a..73faf09e6 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -71,9 +71,7 @@ def extract_sources_from_documents( source = { "id": doc.get("chunk_id", source_id_counter), "title": document_info.get("title", "Untitled Document"), - "description": doc.get("content", "")[:100] + "..." - if len(doc.get("content", "")) > 100 - else doc.get("content", ""), + "description": doc.get("content", "").strip(), "url": metadata.get("url", metadata.get("page_url", "")), } @@ -204,11 +202,7 @@ async def fetch_documents_by_ids( title += f" ({issue_state})" # Create description - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content if comment_count: description += f" | Comments: {comment_count}" @@ -229,11 +223,7 @@ async def fetch_documents_by_ids( if message_date: title += f" ({message_date})" - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content url = ( f"https://slack.com/app_redirect?channel={channel_id}" if channel_id @@ -246,11 +236,7 @@ async def fetch_documents_by_ids( page_id = metadata.get("page_id", "") title = f"Notion: {page_title}" - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content url = ( f"https://notion.so/{page_id.replace('-', '')}" if page_id @@ -261,11 +247,7 @@ async def fetch_documents_by_ids( title = f"GitHub: {doc.title}" description = metadata.get( "description", - ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ), + (doc.content), ) url = metadata.get("url", "") @@ -281,11 +263,7 @@ async def fetch_documents_by_ids( description = metadata.get( "description", - ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ), + (doc.content), ) url = ( f"https://www.youtube.com/watch?v={video_id}" @@ -304,11 +282,7 @@ async def fetch_documents_by_ids( if message_date: title += f" ({message_date})" - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content if guild_id and channel_id: url = f"https://discord.com/channels/{guild_id}/{channel_id}" @@ -329,11 +303,7 @@ async def fetch_documents_by_ids( if status: title += f" ({status})" - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content if priority: description += f" | Priority: {priority}" if issue_type: @@ -395,11 +365,7 @@ async def fetch_documents_by_ids( except Exception: pass - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content if location: description += f" | Location: {location}" if calendar_id and calendar_id != "primary": @@ -437,11 +403,8 @@ async def fetch_documents_by_ids( except Exception: pass - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content + if location_name: description += f" | Venue: {location_name}" elif meeting_url: @@ -466,11 +429,7 @@ async def fetch_documents_by_ids( ) title += f" (visited: {formatted_date})" - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content url = webpage_url elif doc_type == "CRAWLED_URL": @@ -479,22 +438,15 @@ async def fetch_documents_by_ids( "og:description", metadata.get( "ogDescription", - ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ), + (doc.content), ), ) url = metadata.get("url", "") else: # FILE and other types title = doc.title - description = ( - doc.content[:100] + "..." - if len(doc.content) > 100 - else doc.content - ) + description = doc.content + url = metadata.get("url", "") # Create source entry diff --git a/surfsense_backend/app/connectors/slack_history.py b/surfsense_backend/app/connectors/slack_history.py index 982dc8a67..36160c30b 100644 --- a/surfsense_backend/app/connectors/slack_history.py +++ b/surfsense_backend/app/connectors/slack_history.py @@ -379,43 +379,3 @@ class SlackHistory: formatted["user_name"] = "Unknown" return formatted - - -# Example usage (uncomment to use): -""" -if __name__ == "__main__": - # Set your token here or via environment variable - token = os.environ.get("SLACK_API_TOKEN", "xoxb-your-token-here") - - slack = SlackHistory(token) - - # Get all channels - try: - channels = slack.get_all_channels() - print("Available channels:") - for name, channel_id in sorted(channels.items()): - print(f"- {name}: {channel_id}") - - # Example: Get history for a specific channel and date range - channel_id = channels.get("general") - if channel_id: - messages, error = slack.get_history_by_date_range( - channel_id=channel_id, - start_date="2023-01-01", - end_date="2023-01-31", - limit=500 - ) - - if error: - print(f"Error: {error}") - else: - print(f"\nRetrieved {len(messages)} messages from #general") - - # Print formatted messages - for msg in messages[:10]: # Show first 10 messages - formatted = slack.format_message(msg, include_user_info=True) - print(f"[{formatted['datetime']}] {formatted['user_name']}: {formatted['text']}") - - except Exception as e: - print(f"Error: {e}") -""" diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index ca323f510..369630226 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -115,7 +115,7 @@ class ConnectorService: "title": document.get("title", "Untitled Document"), "description": metadata.get( "og:description", - metadata.get("ogDescription", chunk.get("content", "")[:100]), + metadata.get("ogDescription", chunk.get("content", "")), ), "url": metadata.get("url", ""), } @@ -189,7 +189,7 @@ class ConnectorService: "title": document.get("title", "Untitled Document"), "description": metadata.get( "og:description", - metadata.get("ogDescription", chunk.get("content", "")[:100]), + metadata.get("ogDescription", chunk.get("content", "")), ), "url": metadata.get("url", ""), } @@ -330,7 +330,7 @@ class ConnectorService: source = { "id": self.source_id_counter, "title": result.get("title", "Tavily Result"), - "description": result.get("content", "")[:100], + "description": result.get("content", ""), "url": result.get("url", ""), } sources_list.append(source) @@ -520,7 +520,7 @@ class ConnectorService: for result in searx_results: description = result.get("content") or result.get("snippet") or "" if len(description) > 160: - description = f"{description[:157]}..." + description = f"{description}" source = { "id": self.source_id_counter, @@ -621,9 +621,7 @@ class ConnectorService: title += f" ({message_date})" # Create a more descriptive description for Slack messages - description = chunk.get("content", "")[:100] - if len(description) == 100: - description += "..." + description = chunk.get("content", "") # For URL, we can use a placeholder or construct a URL to the Slack channel if available url = "" @@ -717,7 +715,7 @@ class ConnectorService: title += f" (indexed: {indexed_at})" # Create a more descriptive description for Notion pages - description = chunk.get("content", "")[:100] + description = chunk.get("content", "") if len(description) == 100: description += "..." @@ -829,7 +827,7 @@ class ConnectorService: title += f" (visited: {visit_date})" # Create a more descriptive description for extension data - description = chunk.get("content", "")[:100] + description = chunk.get("content", "") if len(description) == 100: description += "..." @@ -936,9 +934,7 @@ class ConnectorService: title += f" - {channel_name}" # Create a more descriptive description for YouTube videos - description = metadata.get( - "description", chunk.get("content", "")[:100] - ) + description = metadata.get("description", chunk.get("content", "")) if len(description) == 100: description += "..." @@ -1024,7 +1020,7 @@ class ConnectorService: "title", "GitHub Document" ), # Use specific title if available "description": metadata.get( - "description", chunk.get("content", "")[:100] + "description", chunk.get("content", "") ), # Use description or content preview "url": metadata.get("url", ""), # Use URL if available in metadata } @@ -1110,7 +1106,7 @@ class ConnectorService: title += f" ({issue_state})" # Create a more descriptive description for Linear issues - description = chunk.get("content", "")[:100] + description = chunk.get("content", "") if len(description) == 100: description += "..." @@ -1222,7 +1218,7 @@ class ConnectorService: title += f" ({status})" # Create a more descriptive description for Jira issues - description = chunk.get("content", "")[:100] + description = chunk.get("content", "") if len(description) == 100: description += "..." @@ -1356,9 +1352,7 @@ class ConnectorService: title += f" ({start_time})" # Create a more descriptive description for calendar events - description = chunk.get("content", "")[:100] - if len(description) == 100: - description += "..." + description = chunk.get("content", "") # Add event info to description info_parts = [] @@ -1573,9 +1567,7 @@ class ConnectorService: title += f" (from {sender})" # Create a more descriptive description for Gmail messages - description = chunk.get("content", "")[:150] - if len(description) == 150: - description += "..." + description = chunk.get("content", "") # Add message info to description info_parts = [] @@ -1689,9 +1681,7 @@ class ConnectorService: title += f" ({space_key})" # Create a more descriptive description for Confluence pages - description = chunk.get("content", "")[:100] - if len(description) == 100: - description += "..." + description = chunk.get("content", "") # For URL, we can use a placeholder or construct a URL to the Confluence page if available url = "" # TODO: Add base_url to metadata @@ -1908,7 +1898,7 @@ class ConnectorService: result.name if hasattr(result, "name") else "Linkup Result" ), "description": ( - result.content[:100] if hasattr(result, "content") else "" + result.content if hasattr(result, "content") else "" ), "url": result.url if hasattr(result, "url") else "", } @@ -2024,9 +2014,7 @@ class ConnectorService: title += f" ({message_date})" # Create a more descriptive description for Discord messages - description = chunk.get("content", "")[:100] - if len(description) == 100: - description += "..." + description = chunk.get("content", "") url = "" guild_id = metadata.get("guild_id", "") @@ -2143,10 +2131,7 @@ class ConnectorService: except Exception: title += f" ({start_time})" - # Create a more descriptive description for Luma events - description = chunk.get("content", "")[:150] - if len(description) == 150: - description += "..." + description = chunk.get("content", "") # Add event info to description info_parts = [] diff --git a/surfsense_backend/app/services/streaming_service.py b/surfsense_backend/app/services/streaming_service.py index 40bd430cd..98c0d3ac5 100644 --- a/surfsense_backend/app/services/streaming_service.py +++ b/surfsense_backend/app/services/streaming_service.py @@ -66,7 +66,7 @@ class StreamingService: for source in group.get("sources", []): node = { "id": str(source.get("id", "")), - "text": source.get("description", ""), + "text": source.get("description", "").strip(), "url": source.get("url", ""), "metadata": { "title": source.get("title", ""), diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 28cb3b1f4..f9d424b30 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -260,8 +260,8 @@ async def index_confluence_pages( ) if page_content: # Take first 500 characters of content for summary - content_preview = page_content[:500] - if len(page_content) > 500: + content_preview = page_content[:1000] + if len(page_content) > 1000: content_preview += "..." summary_content += f"Content Preview: {content_preview}\n\n" summary_content += f"Comments: {comment_count}" diff --git a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py index be5169612..22173a41a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_calendar_indexer.py @@ -296,8 +296,8 @@ async def index_google_calendar_events( if location: summary_content += f"Location: {location}\n" if description: - desc_preview = description[:300] - if len(description) > 300: + desc_preview = description[:1000] + if len(description) > 1000: desc_preview += "..." summary_content += f"Description: {desc_preview}\n" summary_embedding = config.embedding_model_instance.embed( diff --git a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py index aca1e2040..4730fa868 100644 --- a/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/linear_indexer.py @@ -254,8 +254,8 @@ async def index_linear_issues( else: # Fallback to simple summary if no LLM configured # Truncate description if it's too long for the summary - if description and len(description) > 500: - description = description[:497] + "..." + if description and len(description) > 1000: + description = description[:997] + "..." summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n" if description: summary_content += f"Description: {description}\n\n" diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index 3d8970654..226494008 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -310,8 +310,8 @@ async def index_luma_events( if host_names: summary_content += f"Hosts: {host_names}\n" if description: - desc_preview = description[:300] - if len(description) > 300: + desc_preview = description[:1000] + if len(description) > 1000: desc_preview += "..." summary_content += f"Description: {desc_preview}\n" diff --git a/surfsense_web/components/homepage/navbar.tsx b/surfsense_web/components/homepage/navbar.tsx index 731db4469..8b2041c3b 100644 --- a/surfsense_web/components/homepage/navbar.tsx +++ b/surfsense_web/components/homepage/navbar.tsx @@ -86,7 +86,7 @@ const DesktopNav = ({ navItems, isScrolled }: any) => { className="hidden rounded-full px-3 py-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors md:flex items-center gap-1.5" > - 8.3k + 9.5k