mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-08 15:22:39 +02:00
chore: ran linting
This commit is contained in:
parent
74826b3714
commit
04691d572b
61 changed files with 1962 additions and 1516 deletions
|
|
@ -39,7 +39,9 @@ class OneDriveClient:
|
|||
|
||||
cfg = connector.config or {}
|
||||
is_encrypted = cfg.get("_token_encrypted", False)
|
||||
token_encryption = TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None
|
||||
token_encryption = (
|
||||
TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None
|
||||
)
|
||||
|
||||
access_token = cfg.get("access_token", "")
|
||||
refresh_token = cfg.get("refresh_token")
|
||||
|
|
@ -206,18 +208,20 @@ class OneDriveClient:
|
|||
async def download_file_to_disk(self, item_id: str, dest_path: str) -> str | None:
|
||||
"""Stream file content to disk. Returns error message on failure."""
|
||||
token = await self._get_valid_token()
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
async with client.stream(
|
||||
async with (
|
||||
httpx.AsyncClient(follow_redirects=True) as client,
|
||||
client.stream(
|
||||
"GET",
|
||||
f"{GRAPH_API_BASE}/me/drive/items/{item_id}/content",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=120.0,
|
||||
) as resp:
|
||||
if resp.status_code != 200:
|
||||
return f"Download failed: {resp.status_code}"
|
||||
with open(dest_path, "wb") as f:
|
||||
async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024):
|
||||
f.write(chunk)
|
||||
) as resp,
|
||||
):
|
||||
if resp.status_code != 200:
|
||||
return f"Download failed: {resp.status_code}"
|
||||
with open(dest_path, "wb") as f:
|
||||
async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024):
|
||||
f.write(chunk)
|
||||
return None
|
||||
|
||||
async def create_file(
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ extension-based, not provider-specific.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
|
@ -60,7 +61,9 @@ async def download_and_extract_content(
|
|||
|
||||
temp_file_path = None
|
||||
try:
|
||||
extension = Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin"
|
||||
extension = (
|
||||
Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp:
|
||||
temp_file_path = tmp.name
|
||||
|
||||
|
|
@ -76,10 +79,8 @@ async def download_and_extract_content(
|
|||
return None, metadata, str(e)
|
||||
finally:
|
||||
if temp_file_path and os.path.exists(temp_file_path):
|
||||
try:
|
||||
with contextlib.suppress(Exception):
|
||||
os.unlink(temp_file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
||||
|
|
@ -94,9 +95,10 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
|||
return f.read()
|
||||
|
||||
if lower.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")):
|
||||
from app.config import config as app_config
|
||||
from litellm import atranscription
|
||||
|
||||
from app.config import config as app_config
|
||||
|
||||
stt_service_type = (
|
||||
"local"
|
||||
if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/")
|
||||
|
|
@ -106,9 +108,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
|||
from app.services.stt_service import stt_service
|
||||
|
||||
t0 = time.monotonic()
|
||||
logger.info(f"[local-stt] START file={filename} thread={threading.current_thread().name}")
|
||||
logger.info(
|
||||
f"[local-stt] START file={filename} thread={threading.current_thread().name}"
|
||||
)
|
||||
result = await asyncio.to_thread(stt_service.transcribe_file, file_path)
|
||||
logger.info(f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s")
|
||||
logger.info(
|
||||
f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s"
|
||||
)
|
||||
text = result.get("text", "")
|
||||
else:
|
||||
with open(file_path, "rb") as audio_file:
|
||||
|
|
@ -150,7 +156,9 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
|||
parse_with_llamacloud_retry,
|
||||
)
|
||||
|
||||
result = await parse_with_llamacloud_retry(file_path=file_path, estimated_pages=50)
|
||||
result = await parse_with_llamacloud_retry(
|
||||
file_path=file_path, estimated_pages=50
|
||||
)
|
||||
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
|
||||
if not markdown_documents:
|
||||
raise RuntimeError(f"LlamaCloud returned no documents for {filename}")
|
||||
|
|
@ -161,9 +169,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str:
|
|||
|
||||
converter = DocumentConverter()
|
||||
t0 = time.monotonic()
|
||||
logger.info(f"[docling] START file={filename} thread={threading.current_thread().name}")
|
||||
logger.info(
|
||||
f"[docling] START file={filename} thread={threading.current_thread().name}"
|
||||
)
|
||||
result = await asyncio.to_thread(converter.convert, file_path)
|
||||
logger.info(f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s")
|
||||
logger.info(
|
||||
f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s"
|
||||
)
|
||||
return result.document.export_to_markdown()
|
||||
|
||||
raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}")
|
||||
|
|
|
|||
|
|
@ -27,7 +27,10 @@ async def list_folder_contents(
|
|||
if item["isFolder"]:
|
||||
item.setdefault("mimeType", "application/vnd.ms-folder")
|
||||
else:
|
||||
item.setdefault("mimeType", item.get("file", {}).get("mimeType", "application/octet-stream"))
|
||||
item.setdefault(
|
||||
"mimeType",
|
||||
item.get("file", {}).get("mimeType", "application/octet-stream"),
|
||||
)
|
||||
|
||||
items.sort(key=lambda x: (not x["isFolder"], x.get("name", "").lower()))
|
||||
|
||||
|
|
@ -63,7 +66,9 @@ async def get_files_in_folder(
|
|||
client, item["id"], include_subfolders=True
|
||||
)
|
||||
if sub_error:
|
||||
logger.warning(f"Error recursing into folder {item.get('name')}: {sub_error}")
|
||||
logger.warning(
|
||||
f"Error recursing into folder {item.get('name')}: {sub_error}"
|
||||
)
|
||||
continue
|
||||
files.extend(sub_files)
|
||||
elif not should_skip_file(item):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue