mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-30 02:46:23 +02:00
Native CLI i18n: The TrustGraph CLI has built-in translation support that dynamically loads language strings. You can test and use different languages by simply passing the --lang flag (e.g., --lang es for Spanish, --lang ru for Russian) or by configuring your environment's LANG variable. Automated Docs Translations: This PR introduces autonomously translated Markdown documentation into several target languages, including Spanish, Swahili, Portuguese, Turkish, Hindi, Hebrew, Arabic, Simplified Chinese, and Russian.
156 lines
3.9 KiB
Python
156 lines
3.9 KiB
Python
"""Minimal i18n support for TrustGraph.
|
|
|
|
This module intentionally stays lightweight:
|
|
- No runtime translation calls
|
|
- Translations are pre-generated and shipped as language packs
|
|
|
|
Consumers (CLI/API/Workbench) select a language code (e.g. "es") and
|
|
use `Translator.t(key, **kwargs)` to format localized strings.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from functools import lru_cache
|
|
from typing import Any, Dict, Mapping, Optional
|
|
|
|
import importlib.resources as importlib_resources
|
|
|
|
|
|
SUPPORTED_LANGUAGES: Mapping[str, str] = {
|
|
"en": "English",
|
|
"es": "Spanish",
|
|
"sw": "Swahili",
|
|
"pt": "Portuguese",
|
|
"tr": "Turkish",
|
|
"hi": "Hindi",
|
|
"he": "Hebrew",
|
|
"ar": "Arabic",
|
|
"zh-cn": "Chinese (simplified)",
|
|
"ru": "Russian",
|
|
}
|
|
|
|
_LANGUAGE_ALIASES: Mapping[str, str] = {
|
|
"zh": "zh-cn",
|
|
"zh-hans": "zh-cn",
|
|
"zh-hans-cn": "zh-cn",
|
|
"zh-cn": "zh-cn",
|
|
"zh_cn": "zh-cn",
|
|
}
|
|
|
|
|
|
def normalize_language(value: Optional[str]) -> str:
|
|
"""Normalize language inputs to our supported codes.
|
|
|
|
Accepts:
|
|
- Simple codes: "es"
|
|
- Region tags: "es-ES", "en-US"
|
|
- Accept-Language style: "es-ES,es;q=0.9,en;q=0.8"
|
|
|
|
Falls back to "en" when unknown.
|
|
"""
|
|
|
|
if not value:
|
|
return "en"
|
|
|
|
# Accept-Language: take first entry
|
|
token = value.split(",", 1)[0].strip()
|
|
if not token:
|
|
return "en"
|
|
|
|
token = token.replace("_", "-").lower()
|
|
|
|
# Exact alias mapping
|
|
if token in _LANGUAGE_ALIASES:
|
|
token = _LANGUAGE_ALIASES[token]
|
|
|
|
# Collapse common regional tags
|
|
if token.startswith("en-"):
|
|
token = "en"
|
|
elif token.startswith("es-"):
|
|
token = "es"
|
|
elif token.startswith("pt-"):
|
|
token = "pt"
|
|
elif token.startswith("tr-"):
|
|
token = "tr"
|
|
elif token.startswith("hi-"):
|
|
token = "hi"
|
|
elif token.startswith("he-"):
|
|
token = "he"
|
|
elif token.startswith("ar-"):
|
|
token = "ar"
|
|
elif token.startswith("sw-"):
|
|
token = "sw"
|
|
elif token.startswith("ru-"):
|
|
token = "ru"
|
|
elif token.startswith("zh-"):
|
|
token = "zh-cn"
|
|
|
|
# Otherwise use primary subtag
|
|
primary = token.split("-", 1)[0]
|
|
if primary in SUPPORTED_LANGUAGES:
|
|
return primary
|
|
|
|
if token in SUPPORTED_LANGUAGES:
|
|
return token
|
|
|
|
return "en"
|
|
|
|
|
|
# Returns a mutable object - caller must not mutate!
|
|
@lru_cache(maxsize=32)
|
|
def get_language_pack(language: str) -> Dict[str, str]:
|
|
"""Load the language pack for `language` from package resources."""
|
|
|
|
lang = normalize_language(language)
|
|
if lang not in SUPPORTED_LANGUAGES:
|
|
lang = "en"
|
|
|
|
try:
|
|
with importlib_resources.open_text(
|
|
"trustgraph.i18n.packs", f"{lang}.json", encoding="utf-8"
|
|
) as f:
|
|
data = json.load(f)
|
|
except FileNotFoundError:
|
|
data = {}
|
|
|
|
if not isinstance(data, dict):
|
|
return {}
|
|
|
|
# Ensure values are strings
|
|
out: Dict[str, str] = {}
|
|
for k, v in data.items():
|
|
if isinstance(k, str) and isinstance(v, str):
|
|
out[k] = v
|
|
return out
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Translator:
|
|
language: str
|
|
|
|
def t(self, key: str, **kwargs: Any) -> str:
|
|
"""Translate `key` using the current language pack.
|
|
|
|
Falls back to English pack, then the key itself.
|
|
Supports `.format(**kwargs)` placeholder substitution.
|
|
"""
|
|
|
|
lang = normalize_language(self.language)
|
|
pack = get_language_pack(lang)
|
|
fallback = get_language_pack("en")
|
|
|
|
template = pack.get(key) or fallback.get(key) or key
|
|
if not kwargs:
|
|
return template
|
|
|
|
try:
|
|
return template.format(**kwargs)
|
|
except Exception:
|
|
# If formatting fails, return the untranslated template
|
|
return template
|
|
|
|
|
|
def get_translator(language: Optional[str]) -> Translator:
|
|
return Translator(language=normalize_language(language))
|