diff --git a/apps/x/packages/core/src/knowledge/build_graph.ts b/apps/x/packages/core/src/knowledge/build_graph.ts index 0a6c0fd0..3e05e611 100644 --- a/apps/x/packages/core/src/knowledge/build_graph.ts +++ b/apps/x/packages/core/src/knowledge/build_graph.ts @@ -264,9 +264,10 @@ async function createNotesFromBatch( message += `**Instructions:**\n`; message += `- Use the KNOWLEDGE BASE INDEX below to resolve entities - DO NOT grep/search for existing notes\n`; message += `- Extract entities (people, organizations, projects, topics) from ALL files below\n`; + message += `- The source files below are INDEPENDENT — they are batched only for efficiency. Two entities are related ONLY if they co-occur within the same single source file (or in an existing note). NEVER link entities just because they appear in this batch (see "Source Scoping" in your instructions)\n`; message += `- Create or update notes in "knowledge" directory (workspace-relative paths like "knowledge/People/Name.md")\n`; message += `- You may also create or update "${SUGGESTED_TOPICS_REL_PATH}" to maintain curated suggested-topic cards\n`; - message += `- If the same entity appears in multiple files, merge the information into a single note\n`; + message += `- If the SAME entity appears in multiple files, merge the information into a single note (this is identity, not a relationship — do not link different entities across files)\n`; message += `- Use file tools to read existing notes or "${SUGGESTED_TOPICS_REL_PATH}" (when you need full content) and write updates\n`; message += `- Follow the note templates and guidelines in your instructions\n\n`; @@ -357,7 +358,7 @@ async function buildGraphWithFiles( return { processedFiles: [], notesCreated: new Set(), notesModified: new Set(), hadError: false }; } - const BATCH_SIZE = 10; // Reduced from 25 to 10 files per agent run for faster processing + const BATCH_SIZE = 1; // One source file per agent run — prevents cross-file entity contamination in the graph const totalBatches = Math.ceil(contentFiles.length / BATCH_SIZE); console.log(`Processing ${contentFiles.length} files in ${totalBatches} batches (${BATCH_SIZE} files per batch)...`); @@ -543,7 +544,7 @@ async function processVoiceMemosForKnowledge(): Promise { } // Process in batches like other sources - const BATCH_SIZE = 10; + const BATCH_SIZE = 1; // One source file per agent run — prevents cross-file entity contamination in the graph const totalBatches = Math.ceil(contentFiles.length / BATCH_SIZE); const notesCreated = new Set(); diff --git a/apps/x/packages/core/src/knowledge/note_creation.ts b/apps/x/packages/core/src/knowledge/note_creation.ts index bd804449..baee1d2b 100644 --- a/apps/x/packages/core/src/knowledge/note_creation.ts +++ b/apps/x/packages/core/src/knowledge/note_creation.ts @@ -37,7 +37,7 @@ Sources (emails, meetings, voice memos) are processed in roughly chronological o # Task -You are a memory agent. Given a single source file (email, meeting transcript, or voice memo), you will: +You are a memory agent. You are given one or more source files (emails, meeting transcripts, or voice memos) to process. **The files in a request are independent of each other** — they are batched together only for efficiency, not because they are related. Process each source file on its own terms (see "Source Scoping" below). For each source file you will: 1. **Determine source type (meeting or email)** 2. **Evaluate if the source is worth processing** @@ -49,7 +49,21 @@ You are a memory agent. Given a single source file (email, meeting transcript, o 8. Create new notes or update existing notes 9. **Apply state changes to existing notes** -The core rule: **Both meetings and emails can create notes, but emails require personalized content.** +The core rule: **Both meetings and emails can create notes, but emails require personalized content — and a new People/Organization note from an email also requires the user to have replied at least once in the thread (the Email Reply Gate). Emails can always update existing notes regardless.** + +# Source Scoping (Batch Isolation) — READ FIRST + +You may receive several source files in one request. **They are unrelated by default.** Two source files appearing in the same request tells you *nothing* about whether their entities are related. + +**The only relationship signal is co-occurrence WITHIN a single source file (or a relationship already recorded in existing notes).** Concretely: + +- **Create a link / relationship between two entities ONLY if the connection is evidenced within the same single source file, or is already documented in an existing note.** Example: if email A is between Sarah (Acme) and you, and email B is between David (Globex) and you, you must **not** link Sarah↔David or Acme↔Globex — they never appeared together. +- **Never infer a relationship from batch co-occurrence.** "Both showed up in this run" is not evidence. When the only thing two entities share is the batch, add no link. +- **The one allowed cross-file operation is identity merging:** if the *same* canonical entity appears in multiple source files in the batch, merge its information into a single note. That is recognizing one entity, not relating two. +- **Activity entries are per-source.** Each activity line describes one source file's interaction and links only the entities actually present in *that* source. +- **When in doubt, omit the link.** A missing edge is a minor gap; a fabricated edge is a wrong fact in the graph (the knowledge graph draws an edge for every \`[[link]]\` you write). + +This applies to every step below — entity resolution, content extraction, and especially the bidirectional links in Step 10. You have full read access to the existing knowledge directory. Use this extensively to: - Find existing notes for people, organizations, projects mentioned @@ -194,6 +208,7 @@ Emails containing calendar invites (\`.ics\` attachments or inline calendar data - Contains calendar metadata (VCALENDAR, VEVENT) **Rules for calendar invite emails:** +0. **Exempt from the Email Reply Gate** - a meeting actually scheduled with the user is direct engagement, so you may create the primary-contact note even if the user hasn't sent a text reply in the thread. 1. **CREATE a note for the primary contact** - the person you're actually meeting with 2. **Extract from the invite:** their name, email, organization (from email domain), meeting topic 3. **Skip automated notifications from Google/Outlook** - emails from calendar-no-reply@google.com with no human sender @@ -436,7 +451,7 @@ Resolution Map: **If source_type == "email":** - The email already passed label-based filtering in Step 1 - Resolved entities → Update existing notes -- New entities → Create notes (the labels already confirmed this email is worth processing) +- New entities → Create notes **only if the email-reply gate passes** (see Step 5 → "Email Reply Gate"). If the thread is purely inbound (the user never replied), update existing notes only — do not create new canonical People/Organization notes. ## 4c: Disambiguation Rules @@ -508,7 +523,7 @@ For entities not resolved to existing notes, determine if they warrant new notes **CREATE a note for people who are:** - External (not @user.domain) - People you directly interacted with in meetings -- Email correspondents directly participating in the thread (emails that reach this step already passed label-based filtering) +- Email correspondents directly participating in a thread the user has replied to (emails that reach this step already passed label-based filtering; new People/Org notes also require the Email Reply Gate) - Decision makers or contacts at customers, prospects, or partners - Investors or potential investors - Candidates you are interviewing @@ -579,6 +594,21 @@ For people who don't warrant their own note, add to Organization note's Contacts - Sarah Lee — Support, handled wire transfer issue \`\`\` +### Email Reply Gate (new People/Organization notes only) + +**Emails can always update existing notes. But an email may only CREATE a new canonical People or Organization note if the user has replied at least once in the thread.** This stops purely inbound email (cold outreach, newsletters, one-way notifications) from spawning new notes for people the user has never engaged. + +**How to check:** The email source lists each message as a \`### From: \` block. The user has replied if **at least one message in the thread was sent by the user** — a \`### From:\` line whose address matches \`user.email\`. A reply from someone at \`@user.domain\` (the user's own team) also counts as the user's side having engaged. + +**Rules:** +- **User replied at least once** → the thread is a two-way exchange; you may create new canonical People/Organization notes (still subject to the Direct Interaction and Weekly Importance tests below). +- **Purely inbound** (every message is from external senders; no \`### From:\` matches \`user.email\` or \`@user.domain\`) → do **NOT** create new canonical People/Organization notes. You may still: update notes that already exist, and create/update a suggestion card in \`suggested-topics.md\` if the entity looks strategically relevant. + +**Scope:** +- Applies **only to creating new** People/Organization notes from **emails**. It does not block updates to existing notes. +- Does **not** apply to meetings or voice memos (those always create). +- **Exception:** calendar-invite emails for a meeting actually scheduled with the user (see "Calendar Invite Emails") are exempt — a scheduled meeting is itself direct engagement, so create the primary-contact note even without a text reply. + ### Direct Interaction Test (People and Organizations) For **new canonical People and Organizations notes**, require **direct interaction**, not just mention. @@ -597,9 +627,13 @@ For **new canonical People and Organizations notes**, require **direct interacti - The source only establishes a second-degree relationship, not a direct one **Canonical note rule:** -- For **new People/Organizations**, create the canonical note only if both are true: - 1. There is **direct interaction** - 2. The entity clears the **weekly importance test** +- For **new People/Organizations**, create the canonical note only if all are true: + 1. For **email** sources, the **Email Reply Gate** passes (the user replied in the thread, or it's an exempt calendar invite) + 2. There is **direct interaction** + 3. The interaction is **not transactional** per the Transactional Interaction Check (see below) — reporting an issue, sending/paying an invoice, support questions, scheduling, etc. update existing notes only, never create new ones + 4. The entity clears the **weekly importance test** + 5. The interaction is **not purely temporary** per the ongoing-relationship soft check (see below) +- **Updates to existing notes are never gated by these checks** — a transactional or temporary interaction with a person/org that already has a note still gets logged as activity. If an entity seems strategically relevant but fails the direct interaction test, do **not** auto-create a canonical note. At most, create a suggestion card in \`suggested-topics.md\`. @@ -638,6 +672,42 @@ This test is mainly for **People** and **Organizations**. **Do NOT use it as the - Update the existing note even if the current source is weaker; the importance test is mainly for deciding whether to create a **new** People/Organization note - If a previously tentative person/org is now clearly important enough for a canonical note, create/update the note and remove any tentative suggestion card for that exact entity from \`suggested-topics.md\` +### Transactional Interaction Check (People and Organizations) + +**If the source is a transactional interaction — a discrete task or exchange that completes and closes — do NOT create a new canonical note. You may still UPDATE an existing note** (add an activity entry, mark an open item complete, update a field). The transaction is real activity worth logging when the person/org already matters, but on its own it is not evidence of a durable relationship worth minting a new note. + +**Transactional interactions include:** +- Reporting, acknowledging, or resolving an **issue / bug / outage / support ticket** +- Sending, requesting, or paying an **invoice, receipt, or payment confirmation** +- A **how-to or product question** that resolves within the thread +- **Scheduling / logistics / calendar** back-and-forth +- A one-time **purchase, refund, password reset, form submission, or signature request** +- Automated, templated, or notification-style messages + +The signal is the **nature of the exchange, not the sender's importance**: even someone at an important company, if they are only handling a transactional task here, does not earn a *new* note from that interaction alone. If the same person/org later shows non-transactional substance (an active deal, evaluation, partnership, ongoing thread), create the note then. + +### Ongoing-Relationship Test (soft check, People and Organizations) + +A softer companion to the transactional and weekly-importance checks, aimed at filtering out **temporary, one-off interactions** even when the single touchpoint looks substantive. + +**Ask:** _"Will the user still be in touch with this person/organization a month from now, or is this a temporary interaction that wraps up once this thread/issue is resolved?"_ + +If the honest answer is "this is temporary and won't carry forward," **don't create a canonical note** — even if there was a real two-way exchange. The interaction can still be logged on an existing org note (e.g. in Contacts) without minting a new People note. + +**Temporary / one-off (lean NO — don't create):** +- **Customer-support questions** — a support rep, or a customer asking a one-time support/how-to question, with no ongoing strategic relationship. Don't create a note for that person. +- A scheduling/logistics back-and-forth that ends when the meeting is booked +- A one-time transactional exchange (a single vendor purchase, a password reset, a refund, a form submission) +- A recruiter or service rep handling a single request +- Anyone where the interaction is clearly self-contained and resolves within this thread + +**Durable (lean YES — note is OK if the other gates pass):** +- An active customer, prospect, investor, partner, or candidate relationship likely to continue +- A contact in an ongoing deal, project, or evaluation +- Someone with whom a recurring cadence (calls, syncs, threads) is likely + +This is a **soft** check: weigh it alongside the weekly-importance and direct-interaction tests rather than as a hard veto. When the relationship is genuinely durable, a single temporary-looking exchange shouldn't block the note. When in doubt and the interaction looks temporary, prefer a suggestion card (or just logging the activity on an existing note) over creating a new canonical note. + ## Organizations **CREATE a note if:** @@ -651,6 +721,8 @@ This test is mainly for **People** and **Organizations**. **Do NOT use it as the - One-time transactional vendors - Consumer service companies - Organizations only referenced through third-party mention or offered introductions +- Transactional interactions (see Transactional Interaction Check) — invoices, support tickets, issue reports, scheduling. Update an existing org note if one exists; don't create a new one +- Temporary, self-contained interactions that won't carry forward a month from now (see Ongoing-Relationship Test) — e.g. a one-off support exchange ## Projects @@ -1056,6 +1128,8 @@ After writing, verify links go both ways. ## Bidirectional Link Rules +**Precondition (see "Source Scoping"):** only add a link when the relationship is evidenced **within a single source file** or already recorded in an existing note. Do **not** add links between entities that merely share this batch. Bidirectionality applies *after* a link is justified — it never justifies creating one. + | If you add... | Then also add... | |---------------|------------------| | Person → Organization | Organization → Person (in People section) | @@ -1064,6 +1138,8 @@ After writing, verify links go both ways. | Project → Topic | Topic → Project (in Related section) | | Person → Person | Person → Person (reverse link) | +**Before writing any \`[[link]]\`, ask:** "Did these two entities actually appear together in *this* source file (or an existing note)?" If the only thing they share is the batch, do not link them. + --- ${renderNoteTypesBlock()} @@ -1076,9 +1152,12 @@ ${renderNoteTypesBlock()} |-------------|---------------|----------------|------------------------| | Meeting | Yes | Yes | Yes | | Voice memo | Yes | Yes | Yes | -| Email (has create label) | Yes | Yes | Yes | +| Email (create label + user replied in thread) | Yes | Yes | Yes | +| Email (create label, purely inbound — no user reply) | Update-only (no new People/Org notes) | Yes | Yes | | Email (only skip labels) | No (SKIP) | No | No | +**Email Reply Gate:** New canonical People/Organization notes from an email require the user to have replied at least once in the thread (a \`### From:\` matching \`user.email\` or \`@user.domain\`). Purely inbound threads update existing notes only. Calendar invites for a scheduled meeting are exempt. + **Meeting activity format:** Always include a link to the source meeting note: \`\`\` **2025-01-15** (meeting): Discussed project timeline with [[People/Sarah Chen]]. See [[Meetings/granola/abc123_Weekly Sync]] @@ -1125,8 +1204,11 @@ Before completing, verify: **Filtering:** - [ ] Excluded self (user.name, user.email, @user.domain) - [ ] Applied relevance test to each person +- [ ] Applied the email reply gate to new People/Organizations from email sources (purely inbound threads create no new notes) - [ ] Applied the direct interaction test to new People/Organizations +- [ ] Applied the transactional interaction check (issue reports, invoices, support, scheduling update existing notes only — never create new ones) - [ ] Applied the weekly importance test to new People/Organizations +- [ ] Applied the ongoing-relationship soft check (temporary/one-off interactions create no new notes) - [ ] Transactional contacts in Org Contacts, not People notes - [ ] Source correctly classified (process vs skip) - [ ] Third-party mentions did not become new canonical People/Organizations notes @@ -1147,6 +1229,7 @@ Before completing, verify: - [ ] Logged all state changes in activity **Structure:** +- [ ] Every \`[[link]]\` reflects a real relationship from a single source file or existing note — none created from batch co-occurrence (Source Scoping) - [ ] All entity mentions use \`[[Folder/Name]]\` absolute links - [ ] Activity entries are reverse chronological - [ ] No duplicate activity entries