mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
Keep pdf_parser default in code, not config.yaml
This commit is contained in:
parent
9539fe7513
commit
3b2ddef822
2 changed files with 6 additions and 3 deletions
|
|
@ -7,5 +7,4 @@ max_token_num_each_node: 20000
|
|||
if_add_node_id: "yes"
|
||||
if_add_node_summary: "yes"
|
||||
if_add_doc_description: "no"
|
||||
if_add_node_text: "no"
|
||||
pdf_parser: "PyPDF2" # text extractor: "PyPDF2" (default, no extra install), "pypdfium2" (pip install pypdfium2), or "PyMuPDF"
|
||||
if_add_node_text: "no"
|
||||
|
|
@ -685,10 +685,14 @@ def format_structure(structure, order=None):
|
|||
|
||||
|
||||
class ConfigLoader:
|
||||
# Code-side defaults for non-tuning settings (kept out of config.yaml).
|
||||
# yaml entries override these if present.
|
||||
_CODE_DEFAULTS = {"pdf_parser": "PyPDF2"}
|
||||
|
||||
def __init__(self, default_path: str = None):
|
||||
if default_path is None:
|
||||
default_path = Path(__file__).parent / "config.yaml"
|
||||
self._default_dict = self._load_yaml(default_path)
|
||||
self._default_dict = {**self._CODE_DEFAULTS, **self._load_yaml(default_path)}
|
||||
|
||||
@staticmethod
|
||||
def _load_yaml(path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue