refactor(gateway): share outbound text splitting

This commit is contained in:
Anish Sarkar 2026-05-29 10:17:28 +05:30
parent 3faaa25af6
commit f6eb955676
2 changed files with 50 additions and 8 deletions

View file

@ -0,0 +1,38 @@
"""Provider-neutral message formatting helpers."""
from __future__ import annotations
MAX_GATEWAY_TEXT_CHARS = 4096
def split_text_message(
text: str,
*,
max_chars: int = MAX_GATEWAY_TEXT_CHARS,
) -> list[str]:
"""Split outbound text at readable boundaries without exceeding platform caps."""
if not text:
return [""]
chunks: list[str] = []
remaining = text
while remaining:
if len(remaining) <= max_chars:
chunks.append(remaining)
break
candidate = remaining[:max_chars]
boundary = max(
candidate.rfind("\n\n"),
candidate.rfind("\n"),
candidate.rfind(". "),
candidate.rfind(" "),
)
if boundary <= max(200, max_chars // 2):
boundary = max_chars
split_at = boundary + (2 if candidate[boundary : boundary + 2] == ". " else 1)
chunk = remaining[:split_at].rstrip()
chunks.append(chunk or remaining[:max_chars])
remaining = remaining[split_at:].lstrip()
return chunks

View file

@ -4,6 +4,8 @@ from __future__ import annotations
import re
from app.gateway.base.formatting import split_text_message
MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!"
MAX_TELEGRAM_MESSAGE_UNITS = 4096
@ -43,13 +45,15 @@ def chunk_message(
max_units: int = MAX_TELEGRAM_MESSAGE_UNITS,
) -> list[str]:
"""Split a Telegram message at paragraph/sentence boundaries."""
if not text:
return [""]
if max_units == MAX_TELEGRAM_MESSAGE_UNITS:
if not text:
return [""]
chunks: list[str] = []
remaining = text
while remaining:
chunk, remaining = _split_at_boundary(remaining, max_units)
chunks.append(chunk)
return chunks
chunks: list[str] = []
remaining = text
while remaining:
chunk, remaining = _split_at_boundary(remaining, max_units)
chunks.append(chunk)
return chunks
return split_text_message(text, max_chars=max_units)