mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-23 19:05:16 +02:00
feat: added celery beat to docker
This commit is contained in:
parent
8706df6716
commit
aed8163aee
6 changed files with 134 additions and 16 deletions
|
|
@ -74,6 +74,25 @@ services:
|
|||
- redis
|
||||
- backend
|
||||
|
||||
celery_beat:
|
||||
build: ./surfsense_backend
|
||||
# image: ghcr.io/modsetter/surfsense_backend:latest
|
||||
command: celery -A app.celery_app beat --loglevel=info
|
||||
volumes:
|
||||
- ./surfsense_backend:/app
|
||||
- shared_temp:/tmp
|
||||
env_file:
|
||||
- ./surfsense_backend/.env
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-surfsense}
|
||||
- CELERY_BROKER_URL=redis://redis:${REDIS_PORT:-6379}/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:${REDIS_PORT:-6379}/0
|
||||
- PYTHONPATH=/app
|
||||
depends_on:
|
||||
- db
|
||||
- redis
|
||||
- celery_worker
|
||||
|
||||
# flower:
|
||||
# build: ./surfsense_backend
|
||||
# # image: ghcr.io/modsetter/surfsense_backend:latest
|
||||
|
|
|
|||
|
|
@ -3,7 +3,23 @@ DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
|
|||
#Celery Config
|
||||
CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://localhost:6379/0
|
||||
# Dynamic Periodic task creation uv run celery -A celery_worker.celery_app beat --loglevel=info
|
||||
# Periodic task interval
|
||||
# # Run every minute (default)
|
||||
# SCHEDULE_CHECKER_INTERVAL=1m
|
||||
|
||||
# # Run every 5 minutes
|
||||
# SCHEDULE_CHECKER_INTERVAL=5m
|
||||
|
||||
# # Run every 10 minutes
|
||||
# SCHEDULE_CHECKER_INTERVAL=10m
|
||||
|
||||
# # Run every hour
|
||||
# SCHEDULE_CHECKER_INTERVAL=1h
|
||||
|
||||
# # Run every 2 hours
|
||||
# SCHEDULE_CHECKER_INTERVAL=2h
|
||||
|
||||
SCHEDULE_CHECKER_INTERVAL=5m
|
||||
|
||||
SECRET_KEY=SECRET
|
||||
NEXT_FRONTEND_URL=http://localhost:3000
|
||||
|
|
|
|||
|
|
@ -13,6 +13,46 @@ load_dotenv()
|
|||
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
|
||||
# Get schedule checker interval from environment
|
||||
# Format: "<number><unit>" where unit is 'm' (minutes) or 'h' (hours)
|
||||
# Examples: "1m" (every minute), "5m" (every 5 minutes), "1h" (every hour)
|
||||
SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m")
|
||||
|
||||
|
||||
def parse_schedule_interval(interval: str) -> dict:
|
||||
"""Parse interval string into crontab parameters.
|
||||
|
||||
Args:
|
||||
interval: String like "1m", "5m", "1h", etc.
|
||||
|
||||
Returns:
|
||||
Dict with crontab parameters (minute, hour)
|
||||
"""
|
||||
interval = interval.strip().lower()
|
||||
|
||||
# Extract number and unit
|
||||
if interval.endswith("m") or interval.endswith("min"):
|
||||
# Minutes
|
||||
num = int(interval.rstrip("min"))
|
||||
if num == 1:
|
||||
return {"minute": "*", "hour": "*"}
|
||||
else:
|
||||
return {"minute": f"*/{num}", "hour": "*"}
|
||||
elif interval.endswith("h") or interval.endswith("hour"):
|
||||
# Hours
|
||||
num = int(interval.rstrip("hour"))
|
||||
if num == 1:
|
||||
return {"minute": "0", "hour": "*"}
|
||||
else:
|
||||
return {"minute": "0", "hour": f"*/{num}"}
|
||||
else:
|
||||
# Default to every minute if parsing fails
|
||||
return {"minute": "*", "hour": "*"}
|
||||
|
||||
|
||||
# Parse the schedule interval
|
||||
schedule_params = parse_schedule_interval(SCHEDULE_CHECKER_INTERVAL)
|
||||
|
||||
# Create Celery app
|
||||
celery_app = Celery(
|
||||
"surfsense",
|
||||
|
|
@ -55,12 +95,12 @@ celery_app.conf.update(
|
|||
|
||||
# Configure Celery Beat schedule
|
||||
# This uses a meta-scheduler pattern: instead of creating individual Beat schedules
|
||||
# for each connector, we have ONE schedule that checks the database every minute
|
||||
# for each connector, we have ONE schedule that checks the database at the configured interval
|
||||
# for connectors that need indexing. This provides dynamic scheduling without restarts.
|
||||
celery_app.conf.beat_schedule = {
|
||||
"check-periodic-connector-schedules": {
|
||||
"task": "check_periodic_schedules",
|
||||
"schedule": crontab(minute="*"), # Run every minute
|
||||
"schedule": crontab(**schedule_params),
|
||||
"options": {
|
||||
"expires": 30, # Task expires after 30 seconds if not picked up
|
||||
},
|
||||
|
|
|
|||
|
|
@ -129,11 +129,18 @@ const MobileNav = ({ navItems, isScrolled }: any) => {
|
|||
<Logo className="h-8 w-8 rounded-md" />
|
||||
<span className="dark:text-white/90 text-gray-800 text-lg font-bold">SurfSense</span>
|
||||
</div>
|
||||
{open ? (
|
||||
<IconX className="text-black dark:text-white" onClick={() => setOpen(!open)} />
|
||||
) : (
|
||||
<IconMenu2 className="text-black dark:text-white" onClick={() => setOpen(!open)} />
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setOpen(!open)}
|
||||
className="relative z-50 flex items-center justify-center p-2 -mr-2 rounded-lg hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors touch-manipulation"
|
||||
aria-label={open ? "Close menu" : "Open menu"}
|
||||
>
|
||||
{open ? (
|
||||
<IconX className="h-6 w-6 text-black dark:text-white" />
|
||||
) : (
|
||||
<IconMenu2 className="h-6 w-6 text-black dark:text-white" />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<AnimatePresence>
|
||||
|
|
@ -155,10 +162,10 @@ const MobileNav = ({ navItems, isScrolled }: any) => {
|
|||
))}
|
||||
<div className="flex w-full items-center gap-2 pt-2">
|
||||
<Link
|
||||
href="https://discord.gg/your-server"
|
||||
href="https://discord.gg/ejRNvftDp9"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="flex items-center justify-center rounded-lg p-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors"
|
||||
className="flex items-center justify-center rounded-lg p-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors touch-manipulation"
|
||||
>
|
||||
<IconBrandDiscord className="h-5 w-5 text-neutral-600 dark:text-neutral-300" />
|
||||
</Link>
|
||||
|
|
@ -166,7 +173,7 @@ const MobileNav = ({ navItems, isScrolled }: any) => {
|
|||
href="https://github.com/MODSetter/SurfSense"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="flex items-center gap-1.5 rounded-lg px-3 py-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors"
|
||||
className="flex items-center gap-1.5 rounded-lg px-3 py-2 hover:bg-gray-100 dark:hover:bg-neutral-800 transition-colors touch-manipulation"
|
||||
>
|
||||
<IconBrandGithub className="h-5 w-5 text-neutral-600 dark:text-neutral-300" />
|
||||
{loadingGithubStars ? (
|
||||
|
|
@ -179,12 +186,12 @@ const MobileNav = ({ navItems, isScrolled }: any) => {
|
|||
</Link>
|
||||
<ThemeTogglerComponent />
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
className="w-full rounded-lg bg-black px-8 py-2 font-medium text-white shadow-[0px_-2px_0px_0px_rgba(255,255,255,0.4)_inset] dark:bg-white dark:text-black"
|
||||
<Link
|
||||
href="/contact"
|
||||
className="w-full rounded-lg bg-black px-8 py-2 font-medium text-white shadow-[0px_-2px_0px_0px_rgba(255,255,255,0.4)_inset] dark:bg-white dark:text-black text-center touch-manipulation"
|
||||
>
|
||||
Book a call
|
||||
</button>
|
||||
</Link>
|
||||
</motion.div>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ Before you begin, ensure you have:
|
|||
| LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) |
|
||||
| CELERY_BROKER_URL | Redis connection URL for Celery broker (e.g., `redis://localhost:6379/0`) |
|
||||
| CELERY_RESULT_BACKEND | Redis connection URL for Celery result backend (e.g., `redis://localhost:6379/0`) |
|
||||
| SCHEDULE_CHECKER_INTERVAL | (Optional) How often to check for scheduled connector tasks. Format: `<number><unit>` where unit is `m` (minutes) or `h` (hours). Examples: `1m`, `5m`, `1h`, `2h` (default: `1m`) |
|
||||
| REGISTRATION_ENABLED | (Optional) Enable or disable new user registration (e.g., `TRUE` or `FALSE`, default: `TRUE`) |
|
||||
|
||||
|
||||
**Optional Backend LangSmith Observability:**
|
||||
|
|
@ -181,6 +183,22 @@ For more details, see the [Uvicorn documentation](https://www.uvicorn.org/#comma
|
|||
- API Documentation: [http://localhost:8000/docs](http://localhost:8000/docs)
|
||||
- pgAdmin: [http://localhost:5050](http://localhost:5050)
|
||||
|
||||
## Docker Services Overview
|
||||
|
||||
The Docker setup includes several services that work together:
|
||||
|
||||
- **Backend**: FastAPI application server
|
||||
- **Frontend**: Next.js web application
|
||||
- **PostgreSQL (db)**: Database with pgvector extension
|
||||
- **Redis**: Message broker for Celery
|
||||
- **Celery Worker**: Handles background tasks (document processing, indexing, etc.)
|
||||
- **Celery Beat**: Scheduler for periodic tasks (enables scheduled connector indexing)
|
||||
- The schedule interval can be configured using the `SCHEDULE_CHECKER_INTERVAL` environment variable in your backend `.env` file
|
||||
- Default: checks every minute for connectors that need indexing
|
||||
- **pgAdmin**: Database management interface
|
||||
|
||||
All services start automatically with `docker compose up`. The Celery Beat service ensures that periodic indexing functionality works out of the box.
|
||||
|
||||
## Using pgAdmin
|
||||
|
||||
pgAdmin is included in the Docker setup to help manage your PostgreSQL database. To connect:
|
||||
|
|
|
|||
|
|
@ -86,6 +86,8 @@ Edit the `.env` file and set the following variables:
|
|||
| LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) |
|
||||
| CELERY_BROKER_URL | Redis connection URL for Celery broker (e.g., `redis://localhost:6379/0`) |
|
||||
| CELERY_RESULT_BACKEND | Redis connection URL for Celery result backend (e.g., `redis://localhost:6379/0`) |
|
||||
| SCHEDULE_CHECKER_INTERVAL | (Optional) How often to check for scheduled connector tasks. Format: `<number><unit>` where unit is `m` (minutes) or `h` (hours). Examples: `1m`, `5m`, `1h`, `2h` (default: `1m`) |
|
||||
| REGISTRATION_ENABLED | (Optional) Enable or disable new user registration (e.g., `TRUE` or `FALSE`, default: `TRUE`) |
|
||||
|
||||
|
||||
**(Optional) Backend LangSmith Observability:**
|
||||
|
|
@ -249,7 +251,23 @@ uv run celery -A celery_worker.celery_app flower --port=5555
|
|||
|
||||
Access Flower at [http://localhost:5555](http://localhost:5555) to monitor your Celery tasks.
|
||||
|
||||
### 5. Run the Backend
|
||||
### 5. Start Celery Beat (Scheduler)
|
||||
|
||||
In another new terminal window, start Celery Beat to enable periodic tasks (like scheduled connector indexing):
|
||||
|
||||
**Linux/macOS/Windows:**
|
||||
|
||||
```bash
|
||||
# Make sure you're in the surfsense_backend directory
|
||||
cd surfsense_backend
|
||||
|
||||
# Start Celery Beat
|
||||
uv run celery -A celery_worker.celery_app beat --loglevel=info
|
||||
```
|
||||
|
||||
**Important**: Celery Beat is required for the periodic indexing functionality to work. Without it, scheduled connector tasks won't run automatically. The schedule interval can be configured using the `SCHEDULE_CHECKER_INTERVAL` environment variable.
|
||||
|
||||
### 6. Run the Backend
|
||||
|
||||
Start the backend server:
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue