mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
fix(gateway): accept raw utf-8 text in text-load (#729)
Co-authored-by: nanqinhu <139929317+nanqinhu@users.noreply.github.com>
This commit is contained in:
parent
5a9db2da50
commit
7af1d60db8
4 changed files with 91 additions and 9 deletions
54
tests/unit/test_gateway/test_text_document_translator.py
Normal file
54
tests/unit/test_gateway/test_text_document_translator.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
"""
|
||||
Unit tests for text document gateway translation compatibility.
|
||||
"""
|
||||
|
||||
import base64
|
||||
|
||||
from trustgraph.messaging.translators.document_loading import TextDocumentTranslator
|
||||
|
||||
|
||||
class TestTextDocumentTranslator:
|
||||
def test_to_pulsar_decodes_base64_text(self):
|
||||
translator = TextDocumentTranslator()
|
||||
payload = "Cancer survival: 2.74× higher hazard ratio"
|
||||
|
||||
msg = translator.to_pulsar(
|
||||
{
|
||||
"id": "doc-1",
|
||||
"user": "alice",
|
||||
"collection": "research",
|
||||
"charset": "utf-8",
|
||||
"text": base64.b64encode(payload.encode("utf-8")).decode("ascii"),
|
||||
}
|
||||
)
|
||||
|
||||
assert msg.metadata.id == "doc-1"
|
||||
assert msg.metadata.user == "alice"
|
||||
assert msg.metadata.collection == "research"
|
||||
assert msg.text == payload.encode("utf-8")
|
||||
|
||||
def test_to_pulsar_accepts_raw_utf8_text(self):
|
||||
translator = TextDocumentTranslator()
|
||||
payload = "Cancer survival: 2.74× higher hazard ratio"
|
||||
|
||||
msg = translator.to_pulsar(
|
||||
{
|
||||
"charset": "utf-8",
|
||||
"text": payload,
|
||||
}
|
||||
)
|
||||
|
||||
assert msg.text == payload.encode("utf-8")
|
||||
|
||||
def test_to_pulsar_falls_back_to_raw_non_base64_ascii(self):
|
||||
translator = TextDocumentTranslator()
|
||||
payload = "plain-text payload"
|
||||
|
||||
msg = translator.to_pulsar(
|
||||
{
|
||||
"charset": "utf-8",
|
||||
"text": payload,
|
||||
}
|
||||
)
|
||||
|
||||
assert msg.text == payload.encode("utf-8")
|
||||
Loading…
Add table
Add a link
Reference in a new issue