feat: add plaintext parser to ETL pipeline for reading text files

This commit is contained in:
Anish Sarkar 2026-04-05 17:26:42 +05:30
parent 35582c9389
commit 2824410be2

View file

@ -0,0 +1,8 @@
def read_plaintext(file_path: str) -> str:
with open(file_path, encoding="utf-8", errors="replace") as f:
content = f.read()
if "\x00" in content:
raise ValueError(
f"File contains null bytes — likely a binary file opened as text: {file_path}"
)
return content