mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-09 07:42:39 +02:00
feat: add plaintext parser to ETL pipeline for reading text files
This commit is contained in:
parent
35582c9389
commit
2824410be2
1 changed files with 8 additions and 0 deletions
8
surfsense_backend/app/etl_pipeline/parsers/plaintext.py
Normal file
8
surfsense_backend/app/etl_pipeline/parsers/plaintext.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
def read_plaintext(file_path: str) -> str:
|
||||
with open(file_path, encoding="utf-8", errors="replace") as f:
|
||||
content = f.read()
|
||||
if "\x00" in content:
|
||||
raise ValueError(
|
||||
f"File contains null bytes — likely a binary file opened as text: {file_path}"
|
||||
)
|
||||
return content
|
||||
Loading…
Add table
Add a link
Reference in a new issue