mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-02 19:55:18 +02:00
feat(gateway): add Telegram adapter and formatting
This commit is contained in:
parent
c9b7d7b572
commit
59e6475348
5 changed files with 298 additions and 0 deletions
2
surfsense_backend/app/gateway/telegram/__init__.py
Normal file
2
surfsense_backend/app/gateway/telegram/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
"""Telegram gateway adapter."""
|
||||||
|
|
||||||
114
surfsense_backend/app/gateway/telegram/adapter.py
Normal file
114
surfsense_backend/app/gateway/telegram/adapter.py
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
"""Telegram platform adapter."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.gateway.base.adapter import (
|
||||||
|
BasePlatformAdapter,
|
||||||
|
ParsedInboundEvent,
|
||||||
|
PlatformSendResult,
|
||||||
|
)
|
||||||
|
from app.gateway.telegram.client import TelegramClient
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramAdapter(BasePlatformAdapter):
|
||||||
|
platform = "telegram"
|
||||||
|
|
||||||
|
def __init__(self, token: str) -> None:
|
||||||
|
self.client = TelegramClient(token)
|
||||||
|
|
||||||
|
def parse_inbound(self, raw_payload: dict[str, Any]) -> ParsedInboundEvent:
|
||||||
|
event_kind = "other"
|
||||||
|
message = raw_payload.get("message")
|
||||||
|
if message is not None:
|
||||||
|
event_kind = "message"
|
||||||
|
else:
|
||||||
|
message = raw_payload.get("edited_message")
|
||||||
|
if message is not None:
|
||||||
|
event_kind = "edited_message"
|
||||||
|
|
||||||
|
if message is None:
|
||||||
|
return ParsedInboundEvent(
|
||||||
|
platform=self.platform,
|
||||||
|
event_kind=event_kind,
|
||||||
|
external_peer_id=None,
|
||||||
|
external_peer_kind="unknown",
|
||||||
|
external_message_id=None,
|
||||||
|
external_user_id=None,
|
||||||
|
text=None,
|
||||||
|
raw_payload=raw_payload,
|
||||||
|
)
|
||||||
|
|
||||||
|
chat = message.get("chat") or {}
|
||||||
|
sender = message.get("from") or {}
|
||||||
|
chat_type = str(chat.get("type") or "unknown")
|
||||||
|
peer_kind = {
|
||||||
|
"private": "direct",
|
||||||
|
"group": "group",
|
||||||
|
"supergroup": "group",
|
||||||
|
"channel": "channel",
|
||||||
|
}.get(chat_type, "unknown")
|
||||||
|
display_name = chat.get("title") or " ".join(
|
||||||
|
part
|
||||||
|
for part in (sender.get("first_name"), sender.get("last_name"))
|
||||||
|
if part
|
||||||
|
)
|
||||||
|
|
||||||
|
return ParsedInboundEvent(
|
||||||
|
platform=self.platform,
|
||||||
|
event_kind=event_kind,
|
||||||
|
external_peer_id=str(chat["id"]) if chat.get("id") is not None else None,
|
||||||
|
external_peer_kind=peer_kind,
|
||||||
|
external_message_id=(
|
||||||
|
str(message["message_id"]) if message.get("message_id") is not None else None
|
||||||
|
),
|
||||||
|
external_user_id=str(sender["id"]) if sender.get("id") is not None else None,
|
||||||
|
text=message.get("text") or message.get("caption"),
|
||||||
|
raw_payload=raw_payload,
|
||||||
|
display_name=display_name or None,
|
||||||
|
username=sender.get("username") or chat.get("username"),
|
||||||
|
metadata={"chat_type": chat_type, "update_id": raw_payload.get("update_id")},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def send_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
external_peer_id: str,
|
||||||
|
text: str,
|
||||||
|
parse_mode: str | None = None,
|
||||||
|
reply_to_message_id: str | None = None,
|
||||||
|
) -> PlatformSendResult:
|
||||||
|
return await self.client.send_message(
|
||||||
|
chat_id=external_peer_id,
|
||||||
|
text=text,
|
||||||
|
parse_mode=parse_mode,
|
||||||
|
reply_to_message_id=reply_to_message_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def edit_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
external_peer_id: str,
|
||||||
|
external_message_id: str,
|
||||||
|
text: str,
|
||||||
|
parse_mode: str | None = None,
|
||||||
|
) -> PlatformSendResult:
|
||||||
|
return await self.client.edit_message(
|
||||||
|
chat_id=external_peer_id,
|
||||||
|
message_id=external_message_id,
|
||||||
|
text=text,
|
||||||
|
parse_mode=parse_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def validate_credentials(self) -> dict[str, Any]:
|
||||||
|
return await self.client.validate()
|
||||||
|
|
||||||
|
async def leave_chat(self, *, external_peer_id: str) -> None:
|
||||||
|
await self.client.leave_chat(chat_id=external_peer_id)
|
||||||
|
|
||||||
|
async def fetch_updates(self, *, offset: int | None) -> AsyncIterator[dict[str, Any]]:
|
||||||
|
async for update in self.client.get_updates(offset=offset):
|
||||||
|
yield update
|
||||||
|
|
||||||
109
surfsense_backend/app/gateway/telegram/client.py
Normal file
109
surfsense_backend/app/gateway/telegram/client.py
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
"""Thin async Telegram Bot API client."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from telegram import Bot
|
||||||
|
from telegram.error import BadRequest, RetryAfter
|
||||||
|
|
||||||
|
from app.gateway.base.adapter import PlatformSendResult
|
||||||
|
|
||||||
|
|
||||||
|
def retry_after_seconds(value: int | timedelta) -> float:
|
||||||
|
if isinstance(value, timedelta):
|
||||||
|
return value.total_seconds()
|
||||||
|
return float(value)
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramClient:
|
||||||
|
def __init__(self, token: str) -> None:
|
||||||
|
self.token = token
|
||||||
|
self.bot = Bot(token=token)
|
||||||
|
|
||||||
|
async def send_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
chat_id: str,
|
||||||
|
text: str,
|
||||||
|
parse_mode: str | None = None,
|
||||||
|
reply_to_message_id: str | None = None,
|
||||||
|
) -> PlatformSendResult:
|
||||||
|
kwargs: dict[str, Any] = {}
|
||||||
|
if parse_mode:
|
||||||
|
kwargs["parse_mode"] = parse_mode
|
||||||
|
if reply_to_message_id:
|
||||||
|
kwargs["reply_to_message_id"] = int(reply_to_message_id)
|
||||||
|
try:
|
||||||
|
msg = await self.bot.send_message(chat_id=chat_id, text=text, **kwargs)
|
||||||
|
except RetryAfter as exc:
|
||||||
|
await asyncio.sleep(retry_after_seconds(exc.retry_after))
|
||||||
|
msg = await self.bot.send_message(chat_id=chat_id, text=text, **kwargs)
|
||||||
|
return PlatformSendResult(
|
||||||
|
external_message_id=str(msg.message_id),
|
||||||
|
raw_response=msg.to_dict(),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def edit_message(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
chat_id: str,
|
||||||
|
message_id: str,
|
||||||
|
text: str,
|
||||||
|
parse_mode: str | None = None,
|
||||||
|
) -> PlatformSendResult:
|
||||||
|
kwargs: dict[str, Any] = {}
|
||||||
|
if parse_mode:
|
||||||
|
kwargs["parse_mode"] = parse_mode
|
||||||
|
try:
|
||||||
|
msg = await self.bot.edit_message_text(
|
||||||
|
chat_id=chat_id,
|
||||||
|
message_id=int(message_id),
|
||||||
|
text=text,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
except RetryAfter as exc:
|
||||||
|
await asyncio.sleep(retry_after_seconds(exc.retry_after))
|
||||||
|
msg = await self.bot.edit_message_text(
|
||||||
|
chat_id=chat_id,
|
||||||
|
message_id=int(message_id),
|
||||||
|
text=text,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
return PlatformSendResult(
|
||||||
|
external_message_id=str(msg.message_id),
|
||||||
|
raw_response=msg.to_dict(),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def validate(self) -> dict[str, Any]:
|
||||||
|
me = await self.bot.get_me()
|
||||||
|
return me.to_dict()
|
||||||
|
|
||||||
|
async def leave_chat(self, *, chat_id: str) -> None:
|
||||||
|
await self.bot.leave_chat(chat_id=chat_id)
|
||||||
|
|
||||||
|
async def get_updates(self, *, offset: int | None) -> AsyncIterator[dict[str, Any]]:
|
||||||
|
next_offset = offset
|
||||||
|
while True:
|
||||||
|
updates = await self.bot.get_updates(
|
||||||
|
offset=next_offset,
|
||||||
|
timeout=30,
|
||||||
|
allowed_updates=["message", "edited_message"],
|
||||||
|
)
|
||||||
|
for update in updates:
|
||||||
|
next_offset = update.update_id + 1
|
||||||
|
yield update.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
async def retry_plaintext_on_bad_markdown(call, *args, **kwargs) -> PlatformSendResult:
|
||||||
|
try:
|
||||||
|
return await call(*args, **kwargs)
|
||||||
|
except BadRequest as exc:
|
||||||
|
if "can't parse entities" not in str(exc).lower():
|
||||||
|
raise
|
||||||
|
kwargs["parse_mode"] = None
|
||||||
|
return await call(*args, **kwargs)
|
||||||
|
|
||||||
55
surfsense_backend/app/gateway/telegram/formatting.py
Normal file
55
surfsense_backend/app/gateway/telegram/formatting.py
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
"""Telegram formatting helpers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
MARKDOWN_V2_RESERVED = r"_*[]()~`>#+-=|{}.!"
|
||||||
|
MAX_TELEGRAM_MESSAGE_UNITS = 4096
|
||||||
|
|
||||||
|
_RESERVED_RE = re.compile(r"([_\*\[\]\(\)~`>#+\-=|{}\.!])")
|
||||||
|
|
||||||
|
|
||||||
|
def escape_markdown_v2(text: str) -> str:
|
||||||
|
"""Escape all Telegram MarkdownV2 reserved characters."""
|
||||||
|
return _RESERVED_RE.sub(r"\\\1", text)
|
||||||
|
|
||||||
|
|
||||||
|
def _utf16_len(text: str) -> int:
|
||||||
|
return len(text.encode("utf-16-le")) // 2
|
||||||
|
|
||||||
|
|
||||||
|
def _split_at_boundary(text: str, max_units: int) -> tuple[str, str]:
|
||||||
|
if _utf16_len(text) <= max_units:
|
||||||
|
return text, ""
|
||||||
|
|
||||||
|
# Build a hard upper bound by code point, then walk back to natural
|
||||||
|
# boundaries. Telegram's limit is UTF-16 code units, so verify candidates.
|
||||||
|
end = min(len(text), max_units)
|
||||||
|
while end > 0 and _utf16_len(text[:end]) > max_units:
|
||||||
|
end -= 1
|
||||||
|
|
||||||
|
candidate = text[:end]
|
||||||
|
boundary = max(candidate.rfind("\n\n"), candidate.rfind(". "), candidate.rfind("\n"))
|
||||||
|
if boundary > max(200, end // 2):
|
||||||
|
end = boundary + (2 if candidate[boundary : boundary + 2] in {"\n\n", ". "} else 1)
|
||||||
|
|
||||||
|
return text[:end], text[end:]
|
||||||
|
|
||||||
|
|
||||||
|
def chunk_message(
|
||||||
|
text: str,
|
||||||
|
*,
|
||||||
|
max_units: int = MAX_TELEGRAM_MESSAGE_UNITS,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Split a Telegram message at paragraph/sentence boundaries."""
|
||||||
|
if not text:
|
||||||
|
return [""]
|
||||||
|
|
||||||
|
chunks: list[str] = []
|
||||||
|
remaining = text
|
||||||
|
while remaining:
|
||||||
|
chunk, remaining = _split_at_boundary(remaining, max_units)
|
||||||
|
chunks.append(chunk)
|
||||||
|
return chunks
|
||||||
|
|
||||||
18
surfsense_backend/tests/unit/gateway/test_formatting.py
Normal file
18
surfsense_backend/tests/unit/gateway/test_formatting.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
from app.gateway.telegram.formatting import chunk_message, escape_markdown_v2
|
||||||
|
|
||||||
|
|
||||||
|
def test_escape_markdown_v2_reserved_chars():
|
||||||
|
text = r"_*[]()~`>#+-=|{}.!"
|
||||||
|
|
||||||
|
assert escape_markdown_v2(text) == r"\_\*\[\]\(\)\~\`\>\#\+\-\=\|\{\}\.\!"
|
||||||
|
|
||||||
|
|
||||||
|
def test_chunk_message_preserves_content_and_limits_size():
|
||||||
|
text = "First paragraph.\n\n" + ("x" * 5000)
|
||||||
|
|
||||||
|
chunks = chunk_message(text, max_units=4096)
|
||||||
|
|
||||||
|
assert "".join(chunks) == text
|
||||||
|
assert len(chunks) > 1
|
||||||
|
assert all(len(chunk.encode("utf-16-le")) // 2 <= 4096 for chunk in chunks)
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue