mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
- Introduced a new utility for parsing .xlsx files into markdown format, enhancing the ability to process Excel documents natively. - Updated the Google Drive content extractor to utilize the new Excel parsing functionality, allowing for better handling of spreadsheet files. - Enhanced file type detection and export logic to support various document formats, improving overall content extraction accuracy. - Added unit tests to ensure the correctness of the new Excel parsing feature and its integration with existing content extraction workflows.
200 lines
4.8 KiB
TOML
200 lines
4.8 KiB
TOML
[project]
|
|
name = "surf-new-backend"
|
|
version = "0.0.13"
|
|
description = "SurfSense Backend"
|
|
requires-python = ">=3.12"
|
|
dependencies = [
|
|
"alembic>=1.13.0",
|
|
"asyncpg>=0.30.0",
|
|
"authlib>=1.6.9",
|
|
"PyJWT>=2.12.0",
|
|
"tornado>=6.5.5",
|
|
"datasets>=2.21.0",
|
|
"pyarrow>=15.0.0,<19.0.0",
|
|
"discord-py>=2.5.2",
|
|
"docling>=2.15.0",
|
|
"fastapi>=0.115.8",
|
|
"github3.py==4.0.1",
|
|
"google-api-python-client>=2.156.0",
|
|
"google-auth-oauthlib>=1.2.1",
|
|
"kokoro>=0.9.4",
|
|
"linkup-sdk>=0.2.4",
|
|
"llama-cloud-services>=0.6.25",
|
|
"Markdown>=3.7",
|
|
"markdownify>=0.14.1",
|
|
"notion-client>=2.3.0",
|
|
"numpy>=1.24.0",
|
|
"pgvector>=0.3.6",
|
|
"playwright>=1.50.0",
|
|
"pypdf>=5.1.0",
|
|
"python-ffmpeg>=2.0.12",
|
|
"rerankers[flashrank]>=0.7.1",
|
|
"sentence-transformers>=3.4.1",
|
|
"slack-sdk>=3.34.0",
|
|
"soundfile>=0.13.1",
|
|
"spacy>=3.8.7",
|
|
"en-core-web-sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
|
|
"static-ffmpeg>=2.13",
|
|
"tavily-python>=0.3.2",
|
|
"uvicorn[standard]>=0.34.0",
|
|
"validators>=0.34.0",
|
|
"youtube-transcript-api>=1.0.3",
|
|
"elasticsearch>=9.1.1",
|
|
"faster-whisper>=1.1.0",
|
|
"celery[redis]>=5.5.3",
|
|
"flower>=2.0.1",
|
|
"redis>=5.2.1",
|
|
"firecrawl-py>=4.9.0",
|
|
"boto3>=1.35.0",
|
|
"langchain-community>=0.3.31",
|
|
"litellm>=1.80.10",
|
|
"langchain-litellm>=0.3.5",
|
|
"fake-useragent>=2.2.0",
|
|
"trafilatura>=2.0.0",
|
|
"fastapi-users[oauth,sqlalchemy]>=15.0.3",
|
|
"chonkie[all]>=1.5.0",
|
|
"langgraph-checkpoint-postgres>=3.0.2",
|
|
"psycopg[binary,pool]>=3.3.2",
|
|
"mcp>=1.25.0",
|
|
"starlette>=0.40.0,<0.51.0",
|
|
"sse-starlette>=3.1.1,<3.1.2",
|
|
"gitingest>=0.3.1",
|
|
"composio>=0.10.9",
|
|
"langchain>=1.2.6",
|
|
"langgraph>=1.0.5",
|
|
"unstructured[all-docs]>=0.18.31",
|
|
"unstructured-client>=0.42.3",
|
|
"langchain-unstructured>=1.0.1",
|
|
"slowapi>=0.1.9",
|
|
"pypandoc_binary>=1.16.2",
|
|
"typst>=0.14.0",
|
|
"deepagents>=0.4.3",
|
|
"daytona>=0.146.0",
|
|
"langchain-daytona>=0.0.2",
|
|
"pypandoc>=1.16.2",
|
|
"notion-markdown>=0.7.0",
|
|
"openpyxl>=3.1.5",
|
|
]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"ruff>=0.12.5",
|
|
"pytest>=9.0.2",
|
|
"pytest-asyncio>=1.3.0",
|
|
"pytest-mock>=3.14",
|
|
"httpx>=0.28.1",
|
|
]
|
|
|
|
[tool.ruff]
|
|
# Exclude a variety of commonly ignored directories.
|
|
exclude = [
|
|
".bzr",
|
|
".direnv",
|
|
".eggs",
|
|
".git",
|
|
".git-rewrite",
|
|
".hg",
|
|
".ipynb_checkpoints",
|
|
".mypy_cache",
|
|
".nox",
|
|
".pants.d",
|
|
".pyenv",
|
|
".pytest_cache",
|
|
".pytype",
|
|
".ruff_cache",
|
|
".svn",
|
|
".tox",
|
|
".venv",
|
|
".vscode",
|
|
"__pypackages__",
|
|
"_build",
|
|
"buck-out",
|
|
"build",
|
|
"dist",
|
|
"node_modules",
|
|
"site-packages",
|
|
"venv",
|
|
]
|
|
|
|
line-length = 88
|
|
indent-width = 4
|
|
|
|
# Python 3.12
|
|
target-version = "py312"
|
|
|
|
[tool.ruff.lint]
|
|
select = [
|
|
"E4", # pycodestyle errors
|
|
"E7", # pycodestyle errors
|
|
"E9", # pycodestyle errors
|
|
"F", # Pyflakes
|
|
"I", # isort
|
|
"N", # pep8-naming
|
|
"UP", # pyupgrade
|
|
"B", # flake8-bugbear
|
|
"C4", # flake8-comprehensions
|
|
"T20", # flake8-print
|
|
"SIM", # flake8-simplify
|
|
"RUF", # Ruff-specific rules
|
|
]
|
|
|
|
ignore = [
|
|
"E501", # Line too long (handled by formatter)
|
|
"B008", # Do not perform function calls in argument defaults
|
|
"T201", # Print found (allow print statements)
|
|
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
|
|
]
|
|
|
|
extend-select = ["I"]
|
|
|
|
# Allow fix for all enabled rules (when `--fix`) is provided.
|
|
fixable = ["ALL"]
|
|
unfixable = []
|
|
|
|
# Allow unused variables when underscore-prefixed.
|
|
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
|
|
[tool.ruff.format]
|
|
# Use double quotes for strings.
|
|
quote-style = "double"
|
|
|
|
# Indent with spaces, rather than tabs.
|
|
indent-style = "space"
|
|
|
|
# Respect magic trailing commas.
|
|
skip-magic-trailing-comma = false
|
|
|
|
# Automatically detect the appropriate line ending.
|
|
line-ending = "auto"
|
|
|
|
|
|
[tool.ruff.lint.isort]
|
|
# Group imports by type
|
|
known-first-party = ["app", "tests"]
|
|
force-single-line = false
|
|
combine-as-imports = true
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
asyncio_default_fixture_loop_scope = "session"
|
|
asyncio_default_test_loop_scope = "session"
|
|
testpaths = ["tests"]
|
|
python_files = ["test_*.py"]
|
|
python_classes = ["Test*"]
|
|
python_functions = ["test_*"]
|
|
addopts = "-v --tb=short -x --strict-markers -ra --durations=5"
|
|
markers = [
|
|
"unit: pure logic tests, no DB or external services",
|
|
"integration: tests that require a real PostgreSQL database"
|
|
]
|
|
filterwarnings = [
|
|
"ignore::UserWarning:chonkie",
|
|
]
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
include = ["app*", "alembic*"]
|
|
|
|
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|