diff --git a/docker-compose.yml b/docker-compose.yml index bbf33a9b1..873de6a5f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,6 +74,25 @@ services: - redis - backend + celery_beat: + build: ./surfsense_backend + # image: ghcr.io/modsetter/surfsense_backend:latest + command: celery -A app.celery_app beat --loglevel=info + volumes: + - ./surfsense_backend:/app + - shared_temp:/tmp + env_file: + - ./surfsense_backend/.env + environment: + - DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense} + - CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0 + - CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0 + - PYTHONPATH=/app + depends_on: + - db + - redis + - celery_worker + # flower: # build: ./surfsense_backend # # image: ghcr.io/modsetter/surfsense_backend:latest diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example index 289b45e2f..56d18c698 100644 --- a/surfsense_backend/.env.example +++ b/surfsense_backend/.env.example @@ -3,7 +3,23 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense #Celery Config CELERY_BROKER_URL=redis://localhost:6379/0 CELERY_RESULT_BACKEND=redis://localhost:6379/0 -# Dynamic Periodic task creation uv run celery -A celery_worker.celery_app beat --loglevel=info +# Periodic task interval +# # Run every minute (default) +# SCHEDULE_CHECKER_INTERVAL=1m + +# # Run every 5 minutes +# SCHEDULE_CHECKER_INTERVAL=5m + +# # Run every 10 minutes +# SCHEDULE_CHECKER_INTERVAL=10m + +# # Run every hour +# SCHEDULE_CHECKER_INTERVAL=1h + +# # Run every 2 hours +# SCHEDULE_CHECKER_INTERVAL=2h + +SCHEDULE_CHECKER_INTERVAL=5m SECRET_KEY=SECRET NEXT_FRONTEND_URL=http://localhost:3000 diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index 6cbb5c901..a2a613777 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -13,6 +13,46 @@ load_dotenv() CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") +# Get schedule checker interval from environment +# Format: "" where unit is 'm' (minutes) or 'h' (hours) +# Examples: "1m" (every minute), "5m" (every 5 minutes), "1h" (every hour) +SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m") + + +def parse_schedule_interval(interval: str) -> dict: + """Parse interval string into crontab parameters. + + Args: + interval: String like "1m", "5m", "1h", etc. + + Returns: + Dict with crontab parameters (minute, hour) + """ + interval = interval.strip().lower() + + # Extract number and unit + if interval.endswith("m") or interval.endswith("min"): + # Minutes + num = int(interval.rstrip("min")) + if num == 1: + return {"minute": "*", "hour": "*"} + else: + return {"minute": f"*/{num}", "hour": "*"} + elif interval.endswith("h") or interval.endswith("hour"): + # Hours + num = int(interval.rstrip("hour")) + if num == 1: + return {"minute": "0", "hour": "*"} + else: + return {"minute": "0", "hour": f"*/{num}"} + else: + # Default to every minute if parsing fails + return {"minute": "*", "hour": "*"} + + +# Parse the schedule interval +schedule_params = parse_schedule_interval(SCHEDULE_CHECKER_INTERVAL) + # Create Celery app celery_app = Celery( "surfsense", @@ -55,12 +95,12 @@ celery_app.conf.update( # Configure Celery Beat schedule # This uses a meta-scheduler pattern: instead of creating individual Beat schedules -# for each connector, we have ONE schedule that checks the database every minute +# for each connector, we have ONE schedule that checks the database at the configured interval # for connectors that need indexing. This provides dynamic scheduling without restarts. celery_app.conf.beat_schedule = { "check-periodic-connector-schedules": { "task": "check_periodic_schedules", - "schedule": crontab(minute="*"), # Run every minute + "schedule": crontab(**schedule_params), "options": { "expires": 30, # Task expires after 30 seconds if not picked up }, diff --git a/surfsense_web/components/homepage/navbar.tsx b/surfsense_web/components/homepage/navbar.tsx index 1341431f5..d3a2e4c4f 100644 --- a/surfsense_web/components/homepage/navbar.tsx +++ b/surfsense_web/components/homepage/navbar.tsx @@ -129,11 +129,18 @@ const MobileNav = ({ navItems, isScrolled }: any) => { SurfSense - {open ? ( - setOpen(!open)} /> - ) : ( - setOpen(!open)} /> - )} + @@ -155,10 +162,10 @@ const MobileNav = ({ navItems, isScrolled }: any) => { ))}
@@ -166,7 +173,7 @@ const MobileNav = ({ navItems, isScrolled }: any) => { href="https://github.com/MODSetter/SurfSense" target="_blank" rel="noopener noreferrer" - className="flex items-center gap-1.5 rounded-lg px-3 py-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors" + className="flex items-center gap-1.5 rounded-lg px-3 py-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors touch-manipulation" > {loadingGithubStars ? ( @@ -179,12 +186,12 @@ const MobileNav = ({ navItems, isScrolled }: any) => {
- + )}
diff --git a/surfsense_web/content/docs/docker-installation.mdx b/surfsense_web/content/docs/docker-installation.mdx index 245b95d61..0394bd2c6 100644 --- a/surfsense_web/content/docs/docker-installation.mdx +++ b/surfsense_web/content/docs/docker-installation.mdx @@ -98,6 +98,8 @@ Before you begin, ensure you have: | LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) | | CELERY_BROKER_URL | Redis connection URL for Celery broker (e.g., `redis://localhost:6379/0`) | | CELERY_RESULT_BACKEND | Redis connection URL for Celery result backend (e.g., `redis://localhost:6379/0`) | +| SCHEDULE_CHECKER_INTERVAL | (Optional) How often to check for scheduled connector tasks. Format: `` where unit is `m` (minutes) or `h` (hours). Examples: `1m`, `5m`, `1h`, `2h` (default: `1m`) | +| REGISTRATION_ENABLED | (Optional) Enable or disable new user registration (e.g., `TRUE` or `FALSE`, default: `TRUE`) | **Optional Backend LangSmith Observability:** @@ -181,6 +183,22 @@ For more details, see the [Uvicorn documentation](https://www.uvicorn.org/#comma - API Documentation: [http://localhost:8000/docs](http://localhost:8000/docs) - pgAdmin: [http://localhost:5050](http://localhost:5050) +## Docker Services Overview + +The Docker setup includes several services that work together: + +- **Backend**: FastAPI application server +- **Frontend**: Next.js web application +- **PostgreSQL (db)**: Database with pgvector extension +- **Redis**: Message broker for Celery +- **Celery Worker**: Handles background tasks (document processing, indexing, etc.) +- **Celery Beat**: Scheduler for periodic tasks (enables scheduled connector indexing) + - The schedule interval can be configured using the `SCHEDULE_CHECKER_INTERVAL` environment variable in your backend `.env` file + - Default: checks every minute for connectors that need indexing +- **pgAdmin**: Database management interface + +All services start automatically with `docker compose up`. The Celery Beat service ensures that periodic indexing functionality works out of the box. + ## Using pgAdmin pgAdmin is included in the Docker setup to help manage your PostgreSQL database. To connect: diff --git a/surfsense_web/content/docs/manual-installation.mdx b/surfsense_web/content/docs/manual-installation.mdx index 822b7f55e..a81e531ce 100644 --- a/surfsense_web/content/docs/manual-installation.mdx +++ b/surfsense_web/content/docs/manual-installation.mdx @@ -86,6 +86,8 @@ Edit the `.env` file and set the following variables: | LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) | | CELERY_BROKER_URL | Redis connection URL for Celery broker (e.g., `redis://localhost:6379/0`) | | CELERY_RESULT_BACKEND | Redis connection URL for Celery result backend (e.g., `redis://localhost:6379/0`) | +| SCHEDULE_CHECKER_INTERVAL | (Optional) How often to check for scheduled connector tasks. Format: `` where unit is `m` (minutes) or `h` (hours). Examples: `1m`, `5m`, `1h`, `2h` (default: `1m`) | +| REGISTRATION_ENABLED | (Optional) Enable or disable new user registration (e.g., `TRUE` or `FALSE`, default: `TRUE`) | **(Optional) Backend LangSmith Observability:** @@ -249,7 +251,23 @@ uv run celery -A celery_worker.celery_app flower --port=5555 Access Flower at [http://localhost:5555](http://localhost:5555) to monitor your Celery tasks. -### 5. Run the Backend +### 5. Start Celery Beat (Scheduler) + +In another new terminal window, start Celery Beat to enable periodic tasks (like scheduled connector indexing): + +**Linux/macOS/Windows:** + +```bash +# Make sure you're in the surfsense_backend directory +cd surfsense_backend + +# Start Celery Beat +uv run celery -A celery_worker.celery_app beat --loglevel=info +``` + +**Important**: Celery Beat is required for the periodic indexing functionality to work. Without it, scheduled connector tasks won't run automatically. The schedule interval can be configured using the `SCHEDULE_CHECKER_INTERVAL` environment variable. + +### 6. Run the Backend Start the backend server: