From caea83aecf2fa7e432254d7a14573cdeb62a1f70 Mon Sep 17 00:00:00 2001 From: arkml <6592213+arkml@users.noreply.github.com> Date: Fri, 29 May 2026 22:23:21 +0530 Subject: [PATCH] clamp sync to 7 days even after long hiatus (#590) --- .../packages/core/src/knowledge/sync_gmail.ts | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/apps/x/packages/core/src/knowledge/sync_gmail.ts b/apps/x/packages/core/src/knowledge/sync_gmail.ts index 5c32c7bf..77055f37 100644 --- a/apps/x/packages/core/src/knowledge/sync_gmail.ts +++ b/apps/x/packages/core/src/knowledge/sync_gmail.ts @@ -1060,16 +1060,20 @@ async function fullSync(auth: OAuth2Client, syncDir: string, attachmentsDir: str // If the state file holds a last_sync timestamp (e.g. left over from a // prior Composio sync, or from a previous successful native sync that // we're falling back to after a history.list 404), use that as the - // floor instead of the default lookback. Carries forward Composio's - // last_sync on first migration so we don't refetch the last 7 days. + // floor — but never reach back further than lookbackDays. This caps the + // window at "1 week at most": if last_sync is within the lookback window + // we resume from it (a smaller window), otherwise we clamp to lookbackDays + // ago. Mail older than the cap that arrived during a long offline gap is + // intentionally skipped rather than backfilled. const state = loadState(stateFile); + const lookbackFloor = new Date(); + lookbackFloor.setDate(lookbackFloor.getDate() - lookbackDays); let pastDate: Date; - if (state.last_sync) { + if (state.last_sync && new Date(state.last_sync) > lookbackFloor) { pastDate = new Date(state.last_sync); console.log(`Performing full sync from last_sync=${state.last_sync}...`); } else { - pastDate = new Date(); - pastDate.setDate(pastDate.getDate() - lookbackDays); + pastDate = lookbackFloor; console.log(`Performing full sync of last ${lookbackDays} days...`); } @@ -1335,12 +1339,22 @@ async function performSync() { // this runs once, the cache directory is populated and we fall back to // partial-sync on subsequent calls. const cacheMissing = !fs.existsSync(CACHE_DIR) || fs.readdirSync(CACHE_DIR).length === 0; + // partialSync replays *every* messageAdded since the stored historyId, + // regardless of date — so after a long offline gap a still-valid + // historyId would pull the entire gap (e.g. 3 weeks). To honor the + // "1 week at most" cap, bypass it when last_sync is older than the + // lookback window and run a (date-clamped) fullSync instead. + const gapMs = state.last_sync ? Date.now() - new Date(state.last_sync).getTime() : 0; + const gapTooLarge = gapMs > LOOKBACK_DAYS * 24 * 60 * 60 * 1000; if (!state.historyId) { console.log("No history ID found, starting full sync..."); await fullSync(auth, SYNC_DIR, ATTACHMENTS_DIR, STATE_FILE, LOOKBACK_DAYS); } else if (cacheMissing) { console.log("History ID present but inbox cache empty — running full sync to backfill snapshots..."); await fullSync(auth, SYNC_DIR, ATTACHMENTS_DIR, STATE_FILE, LOOKBACK_DAYS); + } else if (gapTooLarge) { + console.log(`Last sync older than ${LOOKBACK_DAYS} days — running full sync clamped to the lookback window instead of partial sync...`); + await fullSync(auth, SYNC_DIR, ATTACHMENTS_DIR, STATE_FILE, LOOKBACK_DAYS); } else { console.log("History ID found, starting partial sync..."); await partialSync(auth, state.historyId, SYNC_DIR, ATTACHMENTS_DIR, STATE_FILE, LOOKBACK_DAYS);