From f6eb955676ae5c8b2d06f7dc03c9c4defd61e045 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Fri, 29 May 2026 10:17:28 +0530 Subject: [PATCH] refactor(gateway): share outbound text splitting --- .../app/gateway/base/formatting.py | 38 +++++++++++++++++++ .../app/gateway/telegram/formatting.py | 20 ++++++---- 2 files changed, 50 insertions(+), 8 deletions(-) create mode 100644 surfsense_backend/app/gateway/base/formatting.py diff --git a/surfsense_backend/app/gateway/base/formatting.py b/surfsense_backend/app/gateway/base/formatting.py new file mode 100644 index 000000000..d0ea6a52d --- /dev/null +++ b/surfsense_backend/app/gateway/base/formatting.py @@ -0,0 +1,38 @@ +"""Provider-neutral message formatting helpers.""" + +from __future__ import annotations + +MAX_GATEWAY_TEXT_CHARS = 4096 + + +def split_text_message( + text: str, + *, + max_chars: int = MAX_GATEWAY_TEXT_CHARS, +) -> list[str]: + """Split outbound text at readable boundaries without exceeding platform caps.""" + if not text: + return [""] + + chunks: list[str] = [] + remaining = text + while remaining: + if len(remaining) <= max_chars: + chunks.append(remaining) + break + + candidate = remaining[:max_chars] + boundary = max( + candidate.rfind("\n\n"), + candidate.rfind("\n"), + candidate.rfind(". "), + candidate.rfind(" "), + ) + if boundary <= max(200, max_chars // 2): + boundary = max_chars + split_at = boundary + (2 if candidate[boundary : boundary + 2] == ". " else 1) + chunk = remaining[:split_at].rstrip() + chunks.append(chunk or remaining[:max_chars]) + remaining = remaining[split_at:].lstrip() + + return chunks diff --git a/surfsense_backend/app/gateway/telegram/formatting.py b/surfsense_backend/app/gateway/telegram/formatting.py index ecc7064bd..a9bb73ed5 100644 --- a/surfsense_backend/app/gateway/telegram/formatting.py +++ b/surfsense_backend/app/gateway/telegram/formatting.py @@ -4,6 +4,8 @@ from __future__ import annotations import re +from app.gateway.base.formatting import split_text_message + MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!" MAX_TELEGRAM_MESSAGE_UNITS = 4096 @@ -43,13 +45,15 @@ def chunk_message( max_units: int = MAX_TELEGRAM_MESSAGE_UNITS, ) -> list[str]: """Split a Telegram message at paragraph/sentence boundaries.""" - if not text: - return [""] + if max_units == MAX_TELEGRAM_MESSAGE_UNITS: + if not text: + return [""] - chunks: list[str] = [] - remaining = text - while remaining: - chunk, remaining = _split_at_boundary(remaining, max_units) - chunks.append(chunk) - return chunks + chunks: list[str] = [] + remaining = text + while remaining: + chunk, remaining = _split_at_boundary(remaining, max_units) + chunks.append(chunk) + return chunks + return split_text_message(text, max_chars=max_units)