chore: linting

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-15 17:33:44 -07:00
parent 219a5977b7
commit c187b04e82
25 changed files with 102 additions and 108 deletions

View file

@ -181,9 +181,7 @@ class EtlPipelineService:
# Common case: the configured ETL service can't OCR
# this image format (or no service is configured at
# all). Don't spam warnings -- just no OCR for it.
logging.debug(
"Skipping per-image OCR for %s: %s", image_name, exc
)
logging.debug("Skipping per-image OCR for %s: %s", image_name, exc)
return ""
return ocr_result.markdown_content

View file

@ -96,9 +96,7 @@ async def parse_with_vision_llm(file_path: str, filename: str, llm) -> str:
return await _invoke_vision(llm, _PROMPT, data_url, filename)
async def parse_image_for_description(
file_path: str, filename: str, llm
) -> str:
async def parse_image_for_description(file_path: str, filename: str, llm) -> str:
"""Visual-description-only call for per-image-in-PDF use.
Used by ``picture_describer`` when an image is embedded inside a

View file

@ -99,12 +99,12 @@ class PictureDescription:
when no OCR was requested or OCR found no text.
"""
page_number: int # 1-indexed
ordinal_in_page: int # 0-indexed within the page
name: str # name pypdf assigned (e.g. "Im0")
sha256: str # hash of the raw image bytes
description: str # visual description (markdown)
ocr_text: str | None = None # OCR text from the ETL service, if any
page_number: int # 1-indexed
ordinal_in_page: int # 0-indexed within the page
name: str # name pypdf assigned (e.g. "Im0")
sha256: str # hash of the raw image bytes
description: str # visual description (markdown)
ocr_text: str | None = None # OCR text from the ETL service, if any
@dataclass
@ -478,7 +478,7 @@ def _replace_one_match(
name = captured_name or desc.name
block = _format_image_block(name, desc.description, desc.ocr_text)
new_markdown = markdown[: match.start()] + block + markdown[match.end():]
new_markdown = markdown[: match.start()] + block + markdown[match.end() :]
return new_markdown, desc_idx + 1
@ -521,7 +521,7 @@ def _splice_after_figures(
match = matches[i]
desc = descriptions[desc_idx + i]
block = _format_image_block(desc.name, desc.description, desc.ocr_text)
out = out[: match.end()] + block + out[match.end():]
out = out[: match.end()] + block + out[match.end() :]
return out, desc_idx + n_to_splice
@ -583,9 +583,7 @@ def inject_descriptions_inline(
)
if desc_idx > before_idx:
continue
out, desc_idx = _replace_one_match(
out, _CAPTION_ONLY, descriptions, desc_idx
)
out, desc_idx = _replace_one_match(out, _CAPTION_ONLY, descriptions, desc_idx)
if desc_idx > before_idx:
continue
# No more positions to splice into.
@ -612,9 +610,7 @@ def render_appended_section(
parts: list[str] = ["", heading, ""]
for desc in descriptions:
parts.append(
_format_image_block(desc.name, desc.description, desc.ocr_text)
)
parts.append(_format_image_block(desc.name, desc.description, desc.ocr_text))
parts.append("")
if skip_notes is not None: