From b82c24ad07a452fcd6fb101ed64926102b2bf362 Mon Sep 17 00:00:00 2001 From: Sabin Shrestha Date: Mon, 21 Jul 2025 22:46:12 +0545 Subject: [PATCH 1/4] chore: add pre-commit --- .github/workflows/pre-commit.yml | 33 ++++++++ .pre-commit-config.yaml | 130 +++++++++++++++++++++++++++++++ .secrets.baseline | 115 +++++++++++++++++++++++++++ 3 files changed, 278 insertions(+) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets.baseline diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 000000000..91ae6fdf4 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,33 @@ +name: pre-commit + +on: + push: + pull_request: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for detecting diffs + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install pre-commit + run: | + pip install pre-commit + + - name: Run pre-commit on changed files + run: | + CHANGED_FILES=$(git diff --name-only origin/main...HEAD | tr '\n' ' ') + if [ -n "$CHANGED_FILES" ]; then + pre-commit run --files $CHANGED_FILES + else + echo "No changed files to check." + fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..4fd428d2f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,130 @@ +# Pre-commit configuration for SurfSense +# See https://pre-commit.com for more information + +repos: + # General file quality hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + exclude: '\.md$' + - id: check-yaml + args: [--multi, --unsafe] + - id: check-json + exclude: '(tsconfig\.json|\.vscode/.*\.json)$' + - id: check-toml + - id: check-merge-conflict + - id: check-added-large-files + args: [--maxkb=10240] # 10MB limit + - id: debug-statements + - id: check-case-conflict + + # Security - detect secrets across all file types + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] + exclude: | + (?x)^( + .*\.env\.example| + .*\.env\.template| + .*/tests/.*| + .*test.*\.py| + .github/workflows/.*\.yml| + .github/workflows/.*\.yaml| + .*pnpm-lock\.yaml| + .*alembic\.ini| + .*\.mdx$ + )$ + + # Python Backend Hooks (surfsense_backend) + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + files: ^surfsense_backend/ + language_version: python3 + + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + files: ^surfsense_backend/ + args: ["--profile", "black", "--line-length", "88"] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.4 + hooks: + - id: ruff + files: ^surfsense_backend/ + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + files: ^surfsense_backend/ + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.17.0 + hooks: + - id: mypy + files: ^surfsense_backend/ + additional_dependencies: [] + args: [--ignore-missing-imports, --disallow-untyped-defs] + + - repo: https://github.com/PyCQA/bandit + rev: 1.8.6 + hooks: + - id: bandit + files: ^surfsense_backend/ + args: ['-r', '.', '-f', 'json'] + exclude: ^surfsense_backend/(tests/|alembic/) + + # Frontend/Extension Hooks (TypeScript/JavaScript) + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + files: ^(surfsense_web|surfsense_browser_extension)/ + types_or: [javascript, jsx, ts, tsx, json, yaml, markdown] + exclude: '(package-lock\.json|\.next/|build/|dist/)' + + - repo: https://github.com/pre-commit/mirrors-eslint + rev: v9.31.0 + hooks: + - id: eslint + files: ^surfsense_web/ + types: [file] + types_or: [javascript, jsx, ts, tsx] + additional_dependencies: + - 'eslint@^9' + - 'eslint-config-next@15.2.0' + - '@eslint/eslintrc@^3' + args: [--fix] + exclude: '(\.next/|build/|dist/)' + + # TypeScript compilation check + - repo: local + hooks: + - id: typescript-check-web + name: TypeScript Check (Web) + entry: bash -c 'cd surfsense_web && (command -v pnpm >/dev/null 2>&1 && pnpm build --dry-run || npx next build --dry-run)' + language: system + files: ^surfsense_web/.*\.(ts|tsx)$ + pass_filenames: false + + - id: typescript-check-extension + name: TypeScript Check (Browser Extension) + entry: bash -c 'cd surfsense_browser_extension && npx tsc --noEmit' + language: system + files: ^surfsense_browser_extension/.*\.(ts|tsx)$ + pass_filenames: false + + # Commit message linting + - repo: https://github.com/commitizen-tools/commitizen + rev: v4.8.3 + hooks: + - id: commitizen + stages: [commit-msg] + +# Global configuration +default_stages: [pre-commit] +fail_fast: false \ No newline at end of file diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 000000000..254d9ded0 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,115 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": {}, + "generated_at": "2025-01-20T12:00:00Z" +} \ No newline at end of file From 5d4012b14fa52cd89f0498788d22b0c8d4d8ac17 Mon Sep 17 00:00:00 2001 From: Sabin Shrestha Date: Mon, 21 Jul 2025 22:56:17 +0545 Subject: [PATCH 2/4] docs: add pre-commit hooks section to CONTRIBUTING.md and create PRE_COMMIT.md guide --- CONTRIBUTING.md | 7 ++ PRE_COMMIT.md | 237 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 PRE_COMMIT.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0078e90c2..cc5dde3cc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -76,6 +76,13 @@ SurfSense consists of three main components: ## ๐Ÿงช Development Guidelines +### Code Quality & Pre-commit Hooks +We use pre-commit hooks to maintain code quality, security, and consistency across the codebase. Before you start developing: + +1. **Install and set up pre-commit hooks** - See our detailed [Pre-commit Guide](./PRE_COMMIT.md) +2. **Understand the automated checks** that will run on your code +3. **Learn about bypassing hooks** when necessary (use sparingly!) + ### Code Style - **Backend**: Follow Python PEP 8 style guidelines - **Frontend**: Use TypeScript and follow the existing code patterns diff --git a/PRE_COMMIT.md b/PRE_COMMIT.md new file mode 100644 index 000000000..96d123de3 --- /dev/null +++ b/PRE_COMMIT.md @@ -0,0 +1,237 @@ +# Pre-commit Hooks for SurfSense Contributors + +Welcome to SurfSense! As an open-source project, we use pre-commit hooks to maintain code quality, security, and consistency across our multi-component codebase. This guide will help you set up and work with our pre-commit configuration. + +## ๐Ÿš€ What is Pre-commit? + +Pre-commit is a framework for managing multi-language pre-commit hooks. It runs automatically before each commit to catch issues early, ensuring high code quality and consistency across the project. + +## ๐Ÿ“ Project Structure + +SurfSense consists of three main components: +- **`surfsense_backend/`** - Python backend API +- **`surfsense_web/`** - Next.js web application +- **`surfsense_browser_extension/`** - TypeScript browser extension + +## ๐Ÿ›  Installation + +### Prerequisites +- Python 3.8 or higher +- Node.js 18+ and pnpm (for frontend components) +- Git + +### Install Pre-commit + +```bash +# Install pre-commit globally +pip install pre-commit + +# Or using your preferred package manager +# pipx install pre-commit # Recommended for isolation +``` + +### Setup Pre-commit Hooks + +1. **Clone the repository**: + ```bash + git clone https://github.com/masabinhok/SurfSense.git + cd SurfSense + ``` + +2. **Install the pre-commit hooks**: + ```bash + pre-commit install + ``` + +3. **Install commit message hooks** (optional, for conventional commits): + ```bash + pre-commit install --hook-type commit-msg + ``` + +## ๐Ÿ”ง Configuration Files Added + +When you install pre-commit, the following files are part of the setup: + +- **`.pre-commit-config.yaml`** - Main pre-commit configuration +- **`.secrets.baseline`** - Baseline file for secret detection (prevents false positives) +- **`.github/workflows/pre-commit.yml`** - CI workflow that runs pre-commit on PRs + +## ๐ŸŽฏ What Gets Checked + +### All Files +- โœ… Trailing whitespace removal +- โœ… YAML, JSON, and TOML validation +- โœ… Large file detection (>10MB) +- โœ… Merge conflict markers +- ๐Ÿ”’ **Secret detection** using detect-secrets + +### Python Backend (`surfsense_backend/`) +- ๐Ÿ **Black** - Code formatting +- ๐Ÿ“ฆ **isort** - Import sorting +- โšก **Ruff** - Fast linting and formatting +- ๐Ÿ” **MyPy** - Static type checking +- ๐Ÿ›ก๏ธ **Bandit** - Security vulnerability scanning + +### Frontend (`surfsense_web/` & `surfsense_browser_extension/`) +- ๐Ÿ’… **Prettier** - Code formatting +- ๐Ÿ” **ESLint** - Linting (Next.js config) +- ๐Ÿ“ **TypeScript** - Compilation checks + +### Commit Messages +- ๐Ÿ“ **Commitizen** - Conventional commit format validation + +## ๐Ÿš€ Usage + +### Normal Workflow +Pre-commit will run automatically when you commit: + +```bash +git add . +git commit -m "feat: add new feature" +# Pre-commit hooks will run automatically +``` + +### Manual Execution + +Run on staged files only: +```bash +pre-commit run +``` + +Run on specific files: +```bash +pre-commit run --files path/to/file.py path/to/file.ts +``` + +Run all hooks on all files: +```bash +pre-commit run --all-files +``` + +โš ๏ธ **Warning**: Running `--all-files` may generate numerous errors as this codebase has existing linting and type issues that are being gradually resolved. + +### Advanced Commands + +Update all hooks to latest versions: +```bash +pre-commit autoupdate +``` + +Run only specific hooks: +```bash +pre-commit run black # Run only black +pre-commit run --all-files prettier # Run prettier on all files +``` + +Clean pre-commit cache: +```bash +pre-commit clean +``` + +## ๐Ÿ†˜ Bypassing Pre-commit (When Necessary) + +Sometimes you might need to bypass pre-commit hooks (use sparingly!): + +### Skip all hooks for one commit: +```bash +git commit -m "fix: urgent hotfix" --no-verify +``` + +### Skip specific hooks: +```bash +SKIP=mypy,black git commit -m "feat: work in progress" +``` + +Available hook IDs to skip: +- `trailing-whitespace`, `check-yaml`, `check-json` +- `detect-secrets` +- `black`, `isort`, `ruff`, `ruff-format`, `mypy`, `bandit` +- `prettier`, `eslint` +- `typescript-check-web`, `typescript-check-extension` +- `commitizen` + +## ๐Ÿ› Common Issues & Solutions + +### Secret Detection False Positives + +If detect-secrets flags legitimate content as secrets: + +1. **Review the detection** - Ensure it's not actually a secret +2. **Update baseline**: + ```bash + detect-secrets scan --baseline .secrets.baseline --update + git add .secrets.baseline + ``` + +### TypeScript/Node.js Issues + +Ensure dependencies are installed: +```bash +cd surfsense_web && pnpm install +cd surfsense_browser_extension && pnpm install +``` + +### Python Environment Issues + +For Python hooks, ensure you're in the correct environment: +```bash +cd surfsense_backend +# If using uv +uv sync +# Or traditional pip +pip install -r requirements.txt +``` + +### Hook Installation Issues + +If hooks aren't running: +```bash +pre-commit uninstall +pre-commit install --install-hooks +``` + +## ๐Ÿ“Š Performance Tips + +- **Incremental runs**: Pre-commit only runs on changed files by default +- **Parallel execution**: Many hooks run in parallel for speed +- **Caching**: Pre-commit caches environments to speed up subsequent runs + +## ๐Ÿ”„ CI Integration + +Pre-commit also runs in our GitHub Actions CI pipeline on every PR to `main`. The CI: +- Runs only on changed files for efficiency +- Provides the same feedback as local pre-commit +- Prevents merging code that doesn't pass quality checks + +## ๐Ÿ“‹ Best Practices + +1. **Install pre-commit early** in your development setup +2. **Fix issues incrementally** rather than bypassing hooks +3. **Update your branch regularly** to avoid conflicts with formatting changes +4. **Run `--all-files` periodically** on feature branches (in small chunks) +5. **Keep the `.secrets.baseline` updated** when legitimate secrets-like strings are added + +## ๐Ÿ’ก Contributing to Pre-commit Config + +To modify the pre-commit configuration: + +1. Edit `.pre-commit-config.yaml` +2. Test your changes: + ```bash + pre-commit run --all-files # Test with caution! + ``` +3. Update the baseline if needed: + ```bash + detect-secrets scan --baseline .secrets.baseline --update + ``` +4. Submit a PR with your changes + +## ๐Ÿ†˜ Getting Help + +- **Pre-commit docs**: https://pre-commit.com/ +- **Project issues**: Open an issue on GitHub +- **Hook-specific help**: Check individual tool documentation (Black, Ruff, ESLint, etc.) + +--- + +Thank you for contributing to SurfSense! ๐Ÿ„โ€โ™€๏ธ Quality code makes everyone's surfing experience smoother. \ No newline at end of file From cf9bc0166433557df2a04bb05f9439580cac0b29 Mon Sep 17 00:00:00 2001 From: Sabin Shrestha Date: Mon, 21 Jul 2025 23:21:27 +0545 Subject: [PATCH 3/4] refactor: enhance pre-commit workflow with improved diff detection and caching --- .github/workflows/pre-commit.yml | 34 ++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 91ae6fdf4..348ed8516 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -14,20 +14,46 @@ jobs: with: fetch-depth: 0 # Required for detecting diffs + - name: Fetch main branch + run: | + # Ensure we have the main branch reference for comparison + git fetch origin main:main 2>/dev/null || git fetch origin main 2>/dev/null || true + - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' + - name: Cache pre-commit environments + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} + restore-keys: | + pre-commit- + - name: Install pre-commit run: | pip install pre-commit + - name: Install hook environments (cache) + run: | + pre-commit install-hooks + - name: Run pre-commit on changed files run: | - CHANGED_FILES=$(git diff --name-only origin/main...HEAD | tr '\n' ' ') - if [ -n "$CHANGED_FILES" ]; then - pre-commit run --files $CHANGED_FILES + # Use pre-commit's native diff detection with fallback strategies + if git show-ref --verify --quiet refs/heads/main; then + # Main branch exists locally, use pre-commit's native diff mode + echo "Running pre-commit with native diff detection against main branch" + pre-commit run --from-ref main --to-ref HEAD + elif git show-ref --verify --quiet refs/remotes/origin/main; then + # Origin/main exists, use it as reference + echo "Running pre-commit with native diff detection against origin/main" + pre-commit run --from-ref origin/main --to-ref HEAD else - echo "No changed files to check." + # Fallback: run on all files (for first commits or when main is unavailable) + echo "Main branch reference not found, running pre-commit on all files" + echo "โš ๏ธ This may take longer and show more issues than normal" + pre-commit run --all-files fi From 285d57e0006e59c461828b4f3167975e18a3feca Mon Sep 17 00:00:00 2001 From: Sabin Shrestha Date: Mon, 21 Jul 2025 23:23:45 +0545 Subject: [PATCH 4/4] fix: correct formatting for mypy hook in pre-commit configuration --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4fd428d2f..0548e6667 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,10 +65,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.17.0 hooks: - - id: mypy - files: ^surfsense_backend/ - additional_dependencies: [] - args: [--ignore-missing-imports, --disallow-untyped-defs] + - id: mypy + files: ^surfsense_backend/ + additional_dependencies: [] + args: [--ignore-missing-imports, --disallow-untyped-defs] - repo: https://github.com/PyCQA/bandit rev: 1.8.6