From a84c1cc113bc7e3a6f8cd469b663ec6ae7dcae10 Mon Sep 17 00:00:00 2001 From: willchen96 Date: Sun, 10 May 2026 22:36:29 +0800 Subject: [PATCH] docs: improve setup guidance and env examples --- README.md | 86 ++++++++++++++++++++++++++++++----- backend/.env.example | 3 +- backend/src/lib/access.ts | 29 ++++++------ backend/src/routes/tabular.ts | 26 +++-------- frontend/.env.local.example | 1 + 5 files changed, 100 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index aa673cb..0a5351d 100644 --- a/README.md +++ b/README.md @@ -7,32 +7,58 @@ Open-source release containing the Mike frontend and backend. - `frontend/` - Next.js application - `backend/` - Express API, Supabase access, document processing, and database schema - `backend/schema.sql` - Supabase schema for fresh databases +- `backend/migrations/` - incremental database updates for existing deployments + +## Prerequisites + +- Node.js 20 or newer +- npm +- git +- A Supabase project +- A Cloudflare R2 bucket, MinIO bucket, or another S3-compatible bucket +- At least one supported model provider API key: Anthropic, Google Gemini, or OpenAI +- LibreOffice installed locally if you need DOC/DOCX to PDF conversion ## Setup -Install dependencies: +### 1. Clone the repo + +```bash +git clone https://github.com/willchen96/mike.git +cd mike +``` + +### 2. Install dependencies ```bash npm install --prefix backend npm install --prefix frontend ``` -Create local env files from the examples: +### 3. Create env files ```bash cp backend/.env.example backend/.env cp frontend/.env.local.example frontend/.env.local ``` -Run `backend/schema.sql` in the Supabase SQL editor for a fresh database. +Then fill in the values. See [Environment Variables](#environment-variables) below. -Start the backend: +### 4. Run the database schema + +For a new Supabase database, open the Supabase SQL editor and run the contents of `backend/schema.sql`. + +For an existing database, do not run the full schema file over production data. Apply the incremental files in `backend/migrations/` instead. + +### 5. Start the backend ```bash npm run dev --prefix backend ``` -Start the frontend: +Backend runs on `http://localhost:3001`. + +### 6. Start the frontend ```bash npm run dev --prefix frontend @@ -40,14 +66,52 @@ npm run dev --prefix frontend Open `http://localhost:3000`. -## Required Services +### 7. Sign up and add a model API key -- Supabase Auth and Postgres -- S3-compatible object storage, such as Cloudflare R2 -- At least one supported model provider key, depending on which models you enable -- LibreOffice for DOC/DOCX to PDF conversion +Sign up in the app. If you did not set provider keys in `backend/.env`, open **Account > Models & API Keys** and add an Anthropic, Gemini, or OpenAI API key. -## Checks +## Environment Variables + +Supabase values come from the project dashboard. Use the project URL for `SUPABASE_URL` / `NEXT_PUBLIC_SUPABASE_URL`, the service role key for `SUPABASE_SECRET_KEY`, and the anon/public key for `NEXT_PUBLIC_SUPABASE_PUBLISHABLE_DEFAULT_KEY`. If your Supabase project shows multiple key formats, use the legacy JWT-style anon and service role keys expected by the Supabase client libraries. + +### Backend (`backend/.env`) + +| Variable | Notes | +| --- | --- | +| `PORT` | Defaults to `3001` | +| `FRONTEND_URL` | `http://localhost:3000` for local development | +| `SUPABASE_URL` | Supabase project URL | +| `SUPABASE_SECRET_KEY` | Supabase service role key | +| `R2_ENDPOINT_URL` | Cloudflare R2, MinIO, or another S3-compatible endpoint | +| `R2_ACCESS_KEY_ID` | Object storage access key | +| `R2_SECRET_ACCESS_KEY` | Object storage secret key | +| `R2_BUCKET_NAME` | Object storage bucket name | +| `GEMINI_API_KEY` | Optional Google Gemini key | +| `ANTHROPIC_API_KEY` | Optional Anthropic key | +| `OPENAI_API_KEY` | Optional OpenAI key | +| `RESEND_API_KEY` | Optional, for email features | +| `USER_API_KEYS_ENCRYPTION_SECRET` | Secret used to encrypt per-user API keys | + +### Frontend (`frontend/.env.local`) + +| Variable | Notes | +| --- | --- | +| `NEXT_PUBLIC_SUPABASE_URL` | Supabase project URL | +| `NEXT_PUBLIC_SUPABASE_PUBLISHABLE_DEFAULT_KEY` | Supabase anon/public key | +| `SUPABASE_SECRET_KEY` | Supabase service role key | +| `NEXT_PUBLIC_API_BASE_URL` | `http://localhost:3001` for local development | + +Provider keys are only needed for the models and email features you plan to use. Model provider keys can be configured in `backend/.env` for the whole instance, or per user in **Account > Models & API Keys**. If a provider key is present in `backend/.env`, that provider is available by default and the matching browser API key field is read-only. + +## Troubleshooting + +**Sign-up confirmation email never arrives.** Confirmation emails are sent by Supabase Auth, not by Mike. For local development, the simplest fix is to disable email confirmation in **Supabase > Authentication > Providers > Email**. For production, configure custom SMTP in Supabase; the built-in mailer is heavily rate-limited and may be restricted on newer projects. + +**The model picker shows a missing-key warning.** Add a key for that provider in **Account > Models & API Keys**, or configure the provider key in `backend/.env` and restart the backend. + +**DOC or DOCX conversion fails.** Install LibreOffice locally and restart the backend so document conversion commands are available on the process path. + +## Useful Checks ```bash npm run build --prefix backend diff --git a/backend/.env.example b/backend/.env.example index 1db370a..cb7b8de 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -10,5 +10,6 @@ R2_BUCKET_NAME=mike GEMINI_API_KEY=your-gemini-key ANTHROPIC_API_KEY=your-anthropic-key -OPENROUTER_API_KEY=your-openrouter-key +OPENAI_API_KEY=your-openai-key RESEND_API_KEY=your-resend-key +USER_API_KEYS_ENCRYPTION_SECRET=your-long-random-secret diff --git a/backend/src/lib/access.ts b/backend/src/lib/access.ts index 0caab62..5964578 100644 --- a/backend/src/lib/access.ts +++ b/backend/src/lib/access.ts @@ -120,14 +120,11 @@ export async function ensureReviewAccess( } /** - * Filter a list of document IDs down to those the caller is actually - * authorised to read — owners pass, plus any document whose `project_id` - * the caller has access to (own project or `shared_with` member). + * Filter user-supplied document IDs down to documents the caller can read. * - * The tabular-review routes accept user-supplied `document_ids` from - * request bodies; without this filter an attacker who has any review of - * their own can plant arbitrary doc UUIDs and have the server fetch + run - * an LLM extraction over their bytes (CWE-639). + * Tabular review routes accept document IDs from request bodies. Without this + * check, a caller with access to any review could attach arbitrary document + * UUIDs and later cause /generate or /regenerate-cell to extract those bytes. */ export async function filterAccessibleDocumentIds( documentIds: string[], @@ -146,18 +143,22 @@ export async function filterAccessibleDocumentIds( project_id: string | null; }[]; if (rows.length === 0) return []; + const accessibleProjectIds = new Set( await listAccessibleProjectIds(userId, userEmail, db), ); - const out: string[] = []; - for (const d of rows) { - if (d.user_id === userId) { - out.push(d.id); - } else if (d.project_id && accessibleProjectIds.has(d.project_id)) { - out.push(d.id); + const allowed: string[] = []; + for (const doc of rows) { + if (doc.user_id === userId) { + allowed.push(doc.id); + } else if ( + doc.project_id && + accessibleProjectIds.has(doc.project_id) + ) { + allowed.push(doc.id); } } - return out; + return allowed; } /** diff --git a/backend/src/routes/tabular.ts b/backend/src/routes/tabular.ts index 79578b0..446031a 100644 --- a/backend/src/routes/tabular.ts +++ b/backend/src/routes/tabular.ts @@ -193,10 +193,6 @@ tabularRouter.post("/", requireAuth, async (req, res) => { if (!access.ok) return void res.status(404).json({ detail: "Project not found" }); } - // Drop any document_ids the caller can't access. Without this filter a - // user can stuff foreign UUIDs into document_ids, then call /generate - // or /regenerate-cell to read those documents' bytes back through the - // LLM (CWE-639). const allowedDocumentIds = Array.isArray(document_ids) ? await filterAccessibleDocumentIds( document_ids, @@ -515,9 +511,6 @@ tabularRouter.patch("/:reviewId", requireAuth, async (req, res) => { const existingDocIds = (existingCells ?? []).map( (cell) => cell.document_id, ); - // Drop any newly-added doc_ids the caller can't read; preserve - // already-attached docs so a non-owner collaborator's PATCH - // doesn't accidentally orphan cells they can't directly access. const existingDocIdSet = new Set(existingDocIds); const newDocCandidates = requestedDocIds.filter( (id) => !existingDocIdSet.has(id), @@ -687,9 +680,6 @@ tabularRouter.post( if (!column) return void res.status(400).json({ detail: "Column not found" }); - // Defense-in-depth — refuse to extract bytes for a document the - // caller can't read, even if a stale tabular_cells row points at it - // from before the access filter was added (CWE-639). const docAllowed = await filterAccessibleDocumentIds( [document_id], userId, @@ -804,21 +794,19 @@ tabularRouter.post("/:reviewId/generate", requireAuth, async (req, res) => { cellMap.set(`${cell.document_id}:${cell.column_index}`, cell); const docIds = [...new Set((cells ?? []).map((c) => c.document_id))]; - // Same defense-in-depth as /regenerate-cell — filter to docs the caller - // can actually read, so legacy cells planted before the access check - // can't be coerced into running an LLM extraction (CWE-639). const allowedDocIds = new Set( await filterAccessibleDocumentIds(docIds, userId, userEmail, db), ); let docs: Record[] = []; if (docIds.length > 0) { const filteredIds = docIds.filter((id) => allowedDocIds.has(id)); - const { data } = filteredIds.length > 0 - ? await db - .from("documents") - .select("id, filename, file_type, page_count") - .in("id", filteredIds) - : { data: [] as Record[] }; + const { data } = + filteredIds.length > 0 + ? await db + .from("documents") + .select("id, filename, file_type, page_count") + .in("id", filteredIds) + : { data: [] as Record[] }; docs = data ?? []; } else if (review.project_id) { const { data } = await db diff --git a/frontend/.env.local.example b/frontend/.env.local.example index c0ceb71..4e00a72 100644 --- a/frontend/.env.local.example +++ b/frontend/.env.local.example @@ -1,3 +1,4 @@ NEXT_PUBLIC_SUPABASE_URL=https://your-project.supabase.co NEXT_PUBLIC_SUPABASE_PUBLISHABLE_DEFAULT_KEY=your-supabase-anon-key +SUPABASE_SECRET_KEY=your-supabase-service-role-key NEXT_PUBLIC_API_BASE_URL=http://localhost:3001