mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-02 19:55:18 +02:00
refactor(gateway): share outbound text splitting
This commit is contained in:
parent
3faaa25af6
commit
f6eb955676
2 changed files with 50 additions and 8 deletions
38
surfsense_backend/app/gateway/base/formatting.py
Normal file
38
surfsense_backend/app/gateway/base/formatting.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""Provider-neutral message formatting helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
MAX_GATEWAY_TEXT_CHARS = 4096
|
||||
|
||||
|
||||
def split_text_message(
|
||||
text: str,
|
||||
*,
|
||||
max_chars: int = MAX_GATEWAY_TEXT_CHARS,
|
||||
) -> list[str]:
|
||||
"""Split outbound text at readable boundaries without exceeding platform caps."""
|
||||
if not text:
|
||||
return [""]
|
||||
|
||||
chunks: list[str] = []
|
||||
remaining = text
|
||||
while remaining:
|
||||
if len(remaining) <= max_chars:
|
||||
chunks.append(remaining)
|
||||
break
|
||||
|
||||
candidate = remaining[:max_chars]
|
||||
boundary = max(
|
||||
candidate.rfind("\n\n"),
|
||||
candidate.rfind("\n"),
|
||||
candidate.rfind(". "),
|
||||
candidate.rfind(" "),
|
||||
)
|
||||
if boundary <= max(200, max_chars // 2):
|
||||
boundary = max_chars
|
||||
split_at = boundary + (2 if candidate[boundary : boundary + 2] == ". " else 1)
|
||||
chunk = remaining[:split_at].rstrip()
|
||||
chunks.append(chunk or remaining[:max_chars])
|
||||
remaining = remaining[split_at:].lstrip()
|
||||
|
||||
return chunks
|
||||
|
|
@ -4,6 +4,8 @@ from __future__ import annotations
|
|||
|
||||
import re
|
||||
|
||||
from app.gateway.base.formatting import split_text_message
|
||||
|
||||
MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!"
|
||||
MAX_TELEGRAM_MESSAGE_UNITS = 4096
|
||||
|
||||
|
|
@ -43,13 +45,15 @@ def chunk_message(
|
|||
max_units: int = MAX_TELEGRAM_MESSAGE_UNITS,
|
||||
) -> list[str]:
|
||||
"""Split a Telegram message at paragraph/sentence boundaries."""
|
||||
if not text:
|
||||
return [""]
|
||||
if max_units == MAX_TELEGRAM_MESSAGE_UNITS:
|
||||
if not text:
|
||||
return [""]
|
||||
|
||||
chunks: list[str] = []
|
||||
remaining = text
|
||||
while remaining:
|
||||
chunk, remaining = _split_at_boundary(remaining, max_units)
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
chunks: list[str] = []
|
||||
remaining = text
|
||||
while remaining:
|
||||
chunk, remaining = _split_at_boundary(remaining, max_units)
|
||||
chunks.append(chunk)
|
||||
return chunks
|
||||
return split_text_message(text, max_chars=max_units)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue