# surfsense_evals — environment template. # # Copy this file to `.env` (in the surfsense_evals/ project root or your # CWD) and fill in the values. `python-dotenv` loads it automatically # the first time `core.config` is imported, so every CLI subcommand # (`setup`, `ingest`, `run`, `report`, `teardown`, `models list`, …) # will pick the values up. # # cp .env.example .env # # then edit .env with your values # # `.env` is gitignored — never commit real secrets. # --------------------------------------------------------------------------- # 1. Backend target — REQUIRED (default works for a local dev backend) # --------------------------------------------------------------------------- SURFSENSE_API_BASE=http://localhost:8000 # --------------------------------------------------------------------------- # 2. OpenRouter — REQUIRED for any `run` invocation # --------------------------------------------------------------------------- # The `native_pdf` arm calls OpenRouter directly; the `surfsense` arm # routes through SurfSense which uses the same key under the hood. OPENROUTER_API_KEY=sk-or-... # Override only if you proxy OpenRouter through a private gateway: # OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 # Multimodal benchmarks (medxpertqa, mmlongbench) require a vision-capable # slug. Recommended (verify in your catalog with `models list --grep ...`): # anthropic/claude-sonnet-4.5 (default recommendation) # anthropic/claude-opus-4.7 (strongest) # openai/gpt-5 (top-tier vision) # google/gemini-2.5-pro (1M-token context, best for long PDFs) # DO NOT use openai/gpt-5.4-mini for image-bearing benchmarks — it's # text-only on PDF content and the runner emits a warning if pinned. # --------------------------------------------------------------------------- # 3. Auth — pick EXACTLY ONE of the two modes below # --------------------------------------------------------------------------- # --- Mode A: LOCAL (backend started with AUTH_TYPE=LOCAL) # The harness POSTs these to /auth/jwt/login automatically. # SURFSENSE_USER_EMAIL=you@example.com # SURFSENSE_USER_PASSWORD=... # --- Mode B: GOOGLE OAuth (or any pre-issued JWT) # Open the SurfSense web UI in your browser, log in via Google, then in # DevTools → Application → Local Storage copy: # surfsense_bearer_token → SURFSENSE_JWT # surfsense_refresh_token → SURFSENSE_REFRESH_TOKEN (optional, enables # auto-refresh on 401) # SURFSENSE_JWT=eyJhbGciOi... # SURFSENSE_REFRESH_TOKEN=eyJhbGciOi... # --------------------------------------------------------------------------- # 4. Filesystem paths — OPTIONAL (defaults below) # --------------------------------------------------------------------------- # Where datasets, rendered PDFs, ingestion id maps, run outputs, and # state.json live. Default: /data/ # EVAL_DATA_DIR=./data # Where generated reports (summary.md / summary.json) get written. # Default: /reports/ # EVAL_REPORTS_DIR=./reports # --------------------------------------------------------------------------- # 5. Parser SDKs — REQUIRED for the multimodal_doc / parser_compare suite # --------------------------------------------------------------------------- # parser_compare calls Azure Document Intelligence and LlamaParse SDKs # directly from the eval harness so each (basic / premium) extraction # is a clean A/B test independent of the SurfSense backend's ETL routing. # # Azure Document Intelligence — used for the `azure_basic_lc` (prebuilt-read) # and `azure_premium_lc` (prebuilt-layout) arms. Get an endpoint + key from # https://portal.azure.com (Document Intelligence resource, F0 / S0 tier). # AZURE_DI_ENDPOINT=https://.cognitiveservices.azure.com/ # AZURE_DI_KEY= # # LlamaCloud (LlamaParse) — used for `llamacloud_basic_lc` (parse_page_with_llm) # and `llamacloud_premium_lc` (parse_page_with_agent). Get a key from # https://cloud.llamaindex.ai/api-key. # LLAMA_CLOUD_API_KEY=llx-...