chore: ran linting

This commit is contained in:
Anish Sarkar 2026-03-30 01:50:41 +05:30
parent 74826b3714
commit 04691d572b
61 changed files with 1962 additions and 1516 deletions

View file

@ -39,7 +39,9 @@ class OneDriveClient:
cfg = connector.config or {}
is_encrypted = cfg.get("_token_encrypted", False)
token_encryption = TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None
token_encryption = (
TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None
)
access_token = cfg.get("access_token", "")
refresh_token = cfg.get("refresh_token")
@ -206,18 +208,20 @@ class OneDriveClient:
async def download_file_to_disk(self, item_id: str, dest_path: str) -> str | None:
"""Stream file content to disk. Returns error message on failure."""
token = await self._get_valid_token()
async with httpx.AsyncClient(follow_redirects=True) as client:
async with client.stream(
async with (
httpx.AsyncClient(follow_redirects=True) as client,
client.stream(
"GET",
f"{GRAPH_API_BASE}/me/drive/items/{item_id}/content",
headers={"Authorization": f"Bearer {token}"},
timeout=120.0,
) as resp:
if resp.status_code != 200:
return f"Download failed: {resp.status_code}"
with open(dest_path, "wb") as f:
async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024):
f.write(chunk)
) as resp,
):
if resp.status_code != 200:
return f"Download failed: {resp.status_code}"
with open(dest_path, "wb") as f:
async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024):
f.write(chunk)
return None
async def create_file(

View file

@ -5,6 +5,7 @@ extension-based, not provider-specific.
"""
import asyncio
import contextlib
import logging
import os
import tempfile
@ -60,7 +61,9 @@ async def download_and_extract_content(
temp_file_path = None
try:
extension = Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin"
extension = (
Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin"
)
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp:
temp_file_path = tmp.name
@ -76,10 +79,8 @@ async def download_and_extract_content(
return None, metadata, str(e)
finally:
if temp_file_path and os.path.exists(temp_file_path):
try:
with contextlib.suppress(Exception):
os.unlink(temp_file_path)
except Exception:
pass
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
@ -94,9 +95,10 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
return f.read()
if lower.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")):
from app.config import config as app_config
from litellm import atranscription
from app.config import config as app_config
stt_service_type = (
"local"
if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/")
@ -106,9 +108,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
from app.services.stt_service import stt_service
t0 = time.monotonic()
logger.info(f"[local-stt] START file={filename} thread={threading.current_thread().name}")
logger.info(
f"[local-stt] START file={filename} thread={threading.current_thread().name}"
)
result = await asyncio.to_thread(stt_service.transcribe_file, file_path)
logger.info(f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s")
logger.info(
f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s"
)
text = result.get("text", "")
else:
with open(file_path, "rb") as audio_file:
@ -150,7 +156,9 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
parse_with_llamacloud_retry,
)
result = await parse_with_llamacloud_retry(file_path=file_path, estimated_pages=50)
result = await parse_with_llamacloud_retry(
file_path=file_path, estimated_pages=50
)
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
if not markdown_documents:
raise RuntimeError(f"LlamaCloud returned no documents for {filename}")
@ -161,9 +169,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
converter = DocumentConverter()
t0 = time.monotonic()
logger.info(f"[docling] START file={filename} thread={threading.current_thread().name}")
logger.info(
f"[docling] START file={filename} thread={threading.current_thread().name}"
)
result = await asyncio.to_thread(converter.convert, file_path)
logger.info(f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s")
logger.info(
f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s"
)
return result.document.export_to_markdown()
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")

View file

@ -27,7 +27,10 @@ async def list_folder_contents(
if item["isFolder"]:
item.setdefault("mimeType", "application/vnd.ms-folder")
else:
item.setdefault("mimeType", item.get("file", {}).get("mimeType", "application/octet-stream"))
item.setdefault(
"mimeType",
item.get("file", {}).get("mimeType", "application/octet-stream"),
)
items.sort(key=lambda x: (not x["isFolder"], x.get("name", "").lower()))
@ -63,7 +66,9 @@ async def get_files_in_folder(
client, item["id"], include_subfolders=True
)
if sub_error:
logger.warning(f"Error recursing into folder {item.get('name')}: {sub_error}")
logger.warning(
f"Error recursing into folder {item.get('name')}: {sub_error}"
)
continue
files.extend(sub_files)
elif not should_skip_file(item):