mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-02 19:55:18 +02:00
refactor(gateway): share outbound text splitting
This commit is contained in:
parent
3faaa25af6
commit
f6eb955676
2 changed files with 50 additions and 8 deletions
38
surfsense_backend/app/gateway/base/formatting.py
Normal file
38
surfsense_backend/app/gateway/base/formatting.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
"""Provider-neutral message formatting helpers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
MAX_GATEWAY_TEXT_CHARS = 4096
|
||||||
|
|
||||||
|
|
||||||
|
def split_text_message(
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
max_chars: int = MAX_GATEWAY_TEXT_CHARS,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Split outbound text at readable boundaries without exceeding platform caps."""
|
||||||
|
if not text:
|
||||||
|
return [""]
|
||||||
|
|
||||||
|
chunks: list[str] = []
|
||||||
|
remaining = text
|
||||||
|
while remaining:
|
||||||
|
if len(remaining) <= max_chars:
|
||||||
|
chunks.append(remaining)
|
||||||
|
break
|
||||||
|
|
||||||
|
candidate = remaining[:max_chars]
|
||||||
|
boundary = max(
|
||||||
|
candidate.rfind("\n\n"),
|
||||||
|
candidate.rfind("\n"),
|
||||||
|
candidate.rfind(". "),
|
||||||
|
candidate.rfind(" "),
|
||||||
|
)
|
||||||
|
if boundary <= max(200, max_chars // 2):
|
||||||
|
boundary = max_chars
|
||||||
|
split_at = boundary + (2 if candidate[boundary : boundary + 2] == ". " else 1)
|
||||||
|
chunk = remaining[:split_at].rstrip()
|
||||||
|
chunks.append(chunk or remaining[:max_chars])
|
||||||
|
remaining = remaining[split_at:].lstrip()
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
@ -4,6 +4,8 @@ from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from app.gateway.base.formatting import split_text_message
|
||||||
|
|
||||||
MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!"
|
MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!"
|
||||||
MAX_TELEGRAM_MESSAGE_UNITS = 4096
|
MAX_TELEGRAM_MESSAGE_UNITS = 4096
|
||||||
|
|
||||||
|
|
@ -43,6 +45,7 @@ def chunk_message(
|
||||||
max_units: int = MAX_TELEGRAM_MESSAGE_UNITS,
|
max_units: int = MAX_TELEGRAM_MESSAGE_UNITS,
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
"""Split a Telegram message at paragraph/sentence boundaries."""
|
"""Split a Telegram message at paragraph/sentence boundaries."""
|
||||||
|
if max_units == MAX_TELEGRAM_MESSAGE_UNITS:
|
||||||
if not text:
|
if not text:
|
||||||
return [""]
|
return [""]
|
||||||
|
|
||||||
|
|
@ -52,4 +55,5 @@ def chunk_message(
|
||||||
chunk, remaining = _split_at_boundary(remaining, max_units)
|
chunk, remaining = _split_at_boundary(remaining, max_units)
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
return chunks
|
return chunks
|
||||||
|
return split_text_message(text, max_chars=max_units)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue