mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-25 18:55:17 +02:00
fix config and logger
This commit is contained in:
parent
9daa4101d1
commit
e2cf8bb271
3 changed files with 53 additions and 18 deletions
|
|
@ -2,13 +2,10 @@ import os
|
|||
import json
|
||||
import copy
|
||||
import math
|
||||
import sys
|
||||
import random
|
||||
sys.path.append('../..')
|
||||
import re
|
||||
from utils import *
|
||||
from .utils import *
|
||||
import os
|
||||
from types import SimpleNamespace as config
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import argparse
|
||||
|
||||
|
|
@ -1015,6 +1012,8 @@ def tree_parser(page_list, opt, logger=None):
|
|||
|
||||
|
||||
def page_index_main(doc, opt=None):
|
||||
opt = merge_config(opt, get_default_opt())
|
||||
|
||||
logger = JsonLogger(doc)
|
||||
|
||||
is_valid_pdf = (
|
||||
|
|
@ -1039,12 +1038,12 @@ def page_index_main(doc, opt=None):
|
|||
if opt.if_add_doc_description == 'yes':
|
||||
doc_description = generate_doc_description(structure, model=opt.model)
|
||||
return {
|
||||
'doc_name': os.path.basename(doc),
|
||||
'doc_name': get_pdf_name(doc),
|
||||
'doc_description': doc_description,
|
||||
'structure': structure,
|
||||
}
|
||||
return {
|
||||
'doc_name': os.path.basename(doc),
|
||||
'doc_name': get_pdf_name(doc),
|
||||
'structure': structure,
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue