diff --git a/docs/earthmover.pdf b/docs/earthmover.pdf
new file mode 100644
index 0000000..eb5d5e4
Binary files /dev/null and b/docs/earthmover.pdf differ
diff --git a/page_index.py b/page_index.py
index eb4a5f6..7dc44cb 100644
--- a/page_index.py
+++ b/page_index.py
@@ -27,7 +27,7 @@ def check_title_appearance(item, page_list, start_index=1, model=None):
     prompt = f"""
     Your job is to check if the given section appears or starts in the given page_text.
 
-    Note: ignore any space inconsistency in the page_text.
+    Note: do fuzzy matching, ignore any space inconsistency in the page_text.
 
     The given section title is {title}.
     The given page_text is {page_text}.
@@ -178,7 +178,7 @@ def extract_toc_content(content, model=None):
     prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
     new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
     response = response + new_response
-    if_complete = check_if_toc_transformation_is_complete(content, response)
+    if_complete = check_if_toc_transformation_is_complete(content, response, model)
     
     while not (if_complete == "yes" and finish_reason == "finished"):
         chat_history = [
@@ -188,7 +188,7 @@ def extract_toc_content(content, model=None):
         prompt = f"""please continue the generation of table of contents , directly output the remaining part of the structure"""
         new_response, finish_reason = ChatGPT_API_with_finish_reason(model=model, prompt=prompt, chat_history=chat_history)
         response = response + new_response
-        if_complete = check_if_toc_transformation_is_complete(content, response)
+        if_complete = check_if_toc_transformation_is_complete(content, response, model)
         
         # Optional: Add a maximum retry limit to prevent infinite loops
         if len(chat_history) > 5:  # Arbitrary limit of 10 attempts
@@ -207,6 +207,7 @@ def detect_page_index(toc_content, model=None):
 
     Reply format:
     {{
+        "thinking": <why do you think there are page numbers/indices given within the table of contents>
         "page_index_given_in_toc": "<yes or no>"
     }}
     Directly return the final JSON structure. Do not output anything else."""
@@ -318,7 +319,7 @@ def toc_transformer(toc_content, model=None):
             new_complete =  get_json_content(new_complete)
             last_complete = last_complete+new_complete
 
-        if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete)
+        if_complete = check_if_toc_transformation_is_complete(toc_content, last_complete, model)
         
 
     last_complete = json.loads(last_complete)
@@ -615,7 +616,7 @@ def process_toc_with_page_numbers(toc_content, toc_page_list, page_list, model=N
     
     start_page_index = toc_page_list[-1] + 1
     main_content = ""
-    for page_index in range(start_page_index, start_page_index + 20):
+    for page_index in range(start_page_index, min(start_page_index + opt.toc_check_page_num, len(page_list))):
         main_content += f"<physical_index_{page_index+1}>\n{page_list[page_index][0]}\n<physical_index_{page_index+1}>\n\n"
 
     toc_with_physical_index = toc_index_extractor(toc_no_page_number, main_content, model)
@@ -784,10 +785,7 @@ def fix_incorrect_toc(toc_with_page_number, page_list, incorrect_results, start_
             page_contents.append(page_text)
         content_range = ''.join(page_contents)
         
-        physical_index = single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
-        
-        # Convert to int for checking
-        physical_index_int = convert_physical_index_to_int(physical_index)
+        physical_index_int = single_toc_item_index_fixer(incorrect_item['title'], content_range, model)
         
         # Check if the result is correct
         check_item = incorrect_item.copy()
@@ -978,33 +976,23 @@ def tree_parser(page_list, opt, logger=None):
     check_toc_result = check_toc(page_list, opt)    
     logger.info(check_toc_result)
 
-    if check_toc_result['toc_content'] is None:
+    if check_toc_result['toc_content'] is not None and check_toc_result['page_index_given_in_toc'] == 'yes':
+        toc_with_page_number = meta_processor(
+            page_list, 
+            mode='process_toc_with_page_numbers', 
+            start_index=1, 
+            toc_content=check_toc_result['toc_content'], 
+            toc_page_list=check_toc_result['toc_page_list'], 
+            opt=opt,
+            logger=logger)
+    else:
         toc_with_page_number = meta_processor(
             page_list, 
             mode='process_no_toc', 
             start_index=1, 
             opt=opt,
             logger=logger)
-    else:
-        if check_toc_result['page_index_given_in_toc'] == 'yes':
-            toc_with_page_number = meta_processor(
-                page_list, 
-                mode='process_toc_with_page_numbers', 
-                start_index=1, 
-                toc_content=check_toc_result['toc_content'], 
-                toc_page_list=check_toc_result['toc_page_list'], 
-                opt=opt,
-                logger=logger)
-        else:
-            toc_with_page_number = meta_processor(
-                page_list, 
-                mode='process_toc_no_page_numbers',
-                start_index=1,
-                toc_content=check_toc_result['toc_content'],
-                toc_page_list=check_toc_result['toc_page_list'], 
-                opt=opt,
-                logger=logger)
-    
+
     toc_with_page_number = add_preface_if_needed(toc_with_page_number)
     toc_with_page_number = check_title_appearance_in_start_parallel(toc_with_page_number, page_list, model=opt.model, logger=logger)
     toc_tree = post_processing(toc_with_page_number, len(page_list))
@@ -1026,6 +1014,12 @@ def page_index_main(doc, opt=None):
 
     print('Parsing PDF...')
     page_list = get_page_tokens(doc)
+    ### store text in page_list to file with their physical index
+    with open(f'./logs/{os.path.basename(doc)}_page_list.txt', 'w', encoding='utf-8') as f:
+        for page_index, page_text in enumerate(page_list):
+            page_text = f"<physical_index_{page_index+1}>\n{page_text[0]}\n<physical_index_{page_index+1}>\n\n"
+            f.write(page_text)
+
     logger.info({'total_page_number': len(page_list)})
     logger.info({'total_token': sum([page[1] for page in page_list])})
     
diff --git a/results/earthmover_structure.json b/results/earthmover_structure.json
new file mode 100644
index 0000000..a570bc4
--- /dev/null
+++ b/results/earthmover_structure.json
@@ -0,0 +1,137 @@
+{
+  "doc_name": "earthmover.pdf",
+  "structure": [
+    {
+      "title": "Earth Mover\u2019s Distance based Similarity Search at Scale",
+      "start_index": 1,
+      "end_index": 1,
+      "node_id": "0000"
+    },
+    {
+      "title": "ABSTRACT",
+      "start_index": 1,
+      "end_index": 1,
+      "node_id": "0001"
+    },
+    {
+      "title": "INTRODUCTION",
+      "start_index": 1,
+      "end_index": 2,
+      "node_id": "0002"
+    },
+    {
+      "title": "PRELIMINARIES",
+      "start_index": 2,
+      "end_index": 2,
+      "nodes": [
+        {
+          "title": "Computing the EMD",
+          "start_index": 3,
+          "end_index": 3,
+          "node_id": "0004"
+        },
+        {
+          "title": "Filter-and-Refinement Framework",
+          "start_index": 3,
+          "end_index": 4,
+          "node_id": "0005"
+        }
+      ],
+      "node_id": "0003"
+    },
+    {
+      "title": "SCALING UP SSP",
+      "start_index": 4,
+      "end_index": 5,
+      "node_id": "0006"
+    },
+    {
+      "title": "BOOSTING THE REFINEMENT PHASE",
+      "start_index": 5,
+      "end_index": 5,
+      "nodes": [
+        {
+          "title": "Analysis of EMD Calculation",
+          "start_index": 5,
+          "end_index": 6,
+          "node_id": "0008"
+        },
+        {
+          "title": "Progressive Bounding",
+          "start_index": 6,
+          "end_index": 6,
+          "node_id": "0009"
+        },
+        {
+          "title": "Sensitivity to Refinement Order",
+          "start_index": 6,
+          "end_index": 7,
+          "node_id": "0010"
+        },
+        {
+          "title": "Dynamic Refinement Ordering",
+          "start_index": 7,
+          "end_index": 8,
+          "node_id": "0011"
+        },
+        {
+          "title": "Running Upper Bound",
+          "start_index": 8,
+          "end_index": 8,
+          "node_id": "0012"
+        }
+      ],
+      "node_id": "0007"
+    },
+    {
+      "title": "EXPERIMENTAL EVALUATION",
+      "start_index": 8,
+      "end_index": 9,
+      "nodes": [
+        {
+          "title": "Performance Improvement",
+          "start_index": 9,
+          "end_index": 10,
+          "node_id": "0014"
+        },
+        {
+          "title": "Scalability Experiments",
+          "start_index": 10,
+          "end_index": 11,
+          "node_id": "0015"
+        },
+        {
+          "title": "Parameter Tuning in DRO",
+          "start_index": 11,
+          "end_index": 12,
+          "node_id": "0016"
+        }
+      ],
+      "node_id": "0013"
+    },
+    {
+      "title": "RELATED WORK",
+      "start_index": 12,
+      "end_index": 12,
+      "node_id": "0017"
+    },
+    {
+      "title": "CONCLUSION",
+      "start_index": 12,
+      "end_index": 12,
+      "node_id": "0018"
+    },
+    {
+      "title": "ACKNOWLEDGMENT",
+      "start_index": 12,
+      "end_index": 12,
+      "node_id": "0019"
+    },
+    {
+      "title": "REFERENCES",
+      "start_index": 12,
+      "end_index": 12,
+      "node_id": "0020"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/results/four-lectures_structure.json b/results/four-lectures_structure.json
index cf73815..1c1cbef 100644
--- a/results/four-lectures_structure.json
+++ b/results/four-lectures_structure.json
@@ -2,78 +2,80 @@
   "doc_name": "four-lectures.pdf",
   "structure": [
     {
-      "title": "Preface",
+      "title": "Four Lectures on Standard ML",
       "start_index": 1,
       "end_index": 1,
-      "node_id": "0000"
-    },
-    {
-      "title": "ML at a Glance",
-      "start_index": 2,
-      "end_index": 2,
       "nodes": [
         {
-          "title": "An ML session",
+          "title": "ML at a Glance",
           "start_index": 2,
-          "end_index": 3,
-          "node_id": "0002"
-        },
-        {
-          "title": "Types and Values",
-          "start_index": 3,
-          "end_index": 4,
-          "node_id": "0003"
-        },
-        {
-          "title": "Recursive Functions",
-          "start_index": 4,
-          "end_index": 4,
-          "node_id": "0004"
-        },
-        {
-          "title": "Raising Exceptions",
-          "start_index": 4,
-          "end_index": 5,
-          "node_id": "0005"
-        },
-        {
-          "title": "Structures",
-          "start_index": 5,
-          "end_index": 6,
-          "node_id": "0006"
-        },
-        {
-          "title": "Signatures",
-          "start_index": 6,
-          "end_index": 7,
-          "node_id": "0007"
-        },
-        {
-          "title": "Coercive Signature Matching",
-          "start_index": 7,
-          "end_index": 8,
-          "node_id": "0008"
-        },
-        {
-          "title": "Functor Declaration",
-          "start_index": 8,
-          "end_index": 9,
-          "node_id": "0009"
-        },
-        {
-          "title": "Functor Application",
-          "start_index": 9,
-          "end_index": 9,
-          "node_id": "0010"
-        },
-        {
-          "title": "Summary",
-          "start_index": 9,
-          "end_index": 9,
-          "node_id": "0011"
+          "end_index": 2,
+          "nodes": [
+            {
+              "title": "An ML session",
+              "start_index": 2,
+              "end_index": 3,
+              "node_id": "0002"
+            },
+            {
+              "title": "Types and Values",
+              "start_index": 3,
+              "end_index": 4,
+              "node_id": "0003"
+            },
+            {
+              "title": "Recursive Functions",
+              "start_index": 4,
+              "end_index": 4,
+              "node_id": "0004"
+            },
+            {
+              "title": "Raising Exceptions",
+              "start_index": 4,
+              "end_index": 5,
+              "node_id": "0005"
+            },
+            {
+              "title": "Structures",
+              "start_index": 5,
+              "end_index": 6,
+              "node_id": "0006"
+            },
+            {
+              "title": "Signatures",
+              "start_index": 6,
+              "end_index": 7,
+              "node_id": "0007"
+            },
+            {
+              "title": "Coercive Signature Matching",
+              "start_index": 7,
+              "end_index": 8,
+              "node_id": "0008"
+            },
+            {
+              "title": "Functor Declaration",
+              "start_index": 8,
+              "end_index": 9,
+              "node_id": "0009"
+            },
+            {
+              "title": "Functor Application",
+              "start_index": 9,
+              "end_index": 9,
+              "node_id": "0010"
+            },
+            {
+              "title": "Summary",
+              "start_index": 9,
+              "end_index": 9,
+              "node_id": "0011"
+            }
+          ],
+          "node_id": "0001"
         }
       ],
-      "node_id": "0001"
+      "node_id": "0000"
     },
     {
       "title": "Programming with ML Modules",
@@ -264,70 +266,14 @@
     {
       "title": "Appendix A: The Bare Interpreter",
       "start_index": 44,
-      "end_index": 44,
-      "nodes": [
-        {
-          "title": "Syntax",
-          "start_index": 44,
-          "end_index": 44,
-          "node_id": "0043"
-        },
-        {
-          "title": "Parsing",
-          "start_index": 44,
-          "end_index": 45,
-          "node_id": "0044"
-        },
-        {
-          "title": "Environments",
-          "start_index": 45,
-          "end_index": 46,
-          "node_id": "0045"
-        },
-        {
-          "title": "Evaluation",
-          "start_index": 46,
-          "end_index": 46,
-          "node_id": "0046"
-        },
-        {
-          "title": "Type Checking",
-          "start_index": 46,
-          "end_index": 46,
-          "node_id": "0047"
-        },
-        {
-          "title": "The Interpreter",
-          "start_index": 46,
-          "end_index": 47,
-          "node_id": "0048"
-        },
-        {
-          "title": "The Evaluator",
-          "start_index": 47,
-          "end_index": 49,
-          "node_id": "0049"
-        },
-        {
-          "title": "The Typechecker",
-          "start_index": 49,
-          "end_index": 50,
-          "node_id": "0050"
-        },
-        {
-          "title": "The Basics",
-          "start_index": 50,
-          "end_index": 52,
-          "node_id": "0051"
-        }
-      ],
+      "end_index": 52,
       "node_id": "0042"
     },
     {
       "title": "Appendix B: Files",
       "start_index": 53,
       "end_index": 53,
-      "node_id": "0052"
+      "node_id": "0043"
     }
   ]
 }
\ No newline at end of file
diff --git a/utils.py b/utils.py
index 6306aee..f8a39a0 100644
--- a/utils.py
+++ b/utils.py
@@ -492,24 +492,25 @@ def check_token_limit(structure, limit=110000):
             print("Start Index:", node['start_index'])
             print("End Index:", node['end_index'])
             print("Title:", node['title'])
-            # print(node['text'])
             print("\n")
 
 
 def convert_physical_index_to_int(data):
     if isinstance(data, list):
         for i in range(len(data)):
-            if isinstance(data[i]['physical_index'], str):
-                if data[i]['physical_index'].startswith('<physical_index_'):
-                    data[i]['physical_index'] = int(data[i]['physical_index'].split('_')[-1].rstrip('>').strip())
-                elif data[i]['physical_index'].startswith('physical_index_'):
-                    data[i]['physical_index'] = int(data[i]['physical_index'].split('_')[-1].strip())
+            # Check if item is a dictionary and has 'physical_index' key
+            if isinstance(data[i], dict) and 'physical_index' in data[i]:
+                if isinstance(data[i]['physical_index'], str):
+                    if data[i]['physical_index'].startswith('<physical_index_'):
+                        data[i]['physical_index'] = int(data[i]['physical_index'].split('_')[-1].rstrip('>').strip())
+                    elif data[i]['physical_index'].startswith('physical_index_'):
+                        data[i]['physical_index'] = int(data[i]['physical_index'].split('_')[-1].strip())
     elif isinstance(data, str):
         if data.startswith('<physical_index_'):
             data = int(data.split('_')[-1].rstrip('>').strip())
         elif data.startswith('physical_index_'):
             data = int(data.split('_')[-1].strip())
-        ###check data is int
+        # Check data is int
         if isinstance(data, int):
             return data
         else: