Keep pdf_parser default in code, not config.yaml

This commit is contained in:
Ray 2026-05-11 16:08:50 +08:00
parent 9539fe7513
commit 3b2ddef822
2 changed files with 6 additions and 3 deletions

View file

@ -7,5 +7,4 @@ max_token_num_each_node: 20000
if_add_node_id: "yes"
if_add_node_summary: "yes"
if_add_doc_description: "no"
if_add_node_text: "no"
pdf_parser: "PyPDF2" # text extractor: "PyPDF2" (default, no extra install), "pypdfium2" (pip install pypdfium2), or "PyMuPDF"
if_add_node_text: "no"

View file

@ -685,10 +685,14 @@ def format_structure(structure, order=None):
class ConfigLoader:
# Code-side defaults for non-tuning settings (kept out of config.yaml).
# yaml entries override these if present.
_CODE_DEFAULTS = {"pdf_parser": "PyPDF2"}
def __init__(self, default_path: str = None):
if default_path is None:
default_path = Path(__file__).parent / "config.yaml"
self._default_dict = self._load_yaml(default_path)
self._default_dict = {**self._CODE_DEFAULTS, **self._load_yaml(default_path)}
@staticmethod
def _load_yaml(path):