diff --git a/surfsense_backend/app/etl_pipeline/parsers/plaintext.py b/surfsense_backend/app/etl_pipeline/parsers/plaintext.py new file mode 100644 index 000000000..24bfb71e5 --- /dev/null +++ b/surfsense_backend/app/etl_pipeline/parsers/plaintext.py @@ -0,0 +1,8 @@ +def read_plaintext(file_path: str) -> str: + with open(file_path, encoding="utf-8", errors="replace") as f: + content = f.read() + if "\x00" in content: + raise ValueError( + f"File contains null bytes — likely a binary file opened as text: {file_path}" + ) + return content