diff --git a/apps/x/apps/renderer/src/components/bases-view.tsx b/apps/x/apps/renderer/src/components/bases-view.tsx index 80b45fb8..b3f035cc 100644 --- a/apps/x/apps/renderer/src/components/bases-view.tsx +++ b/apps/x/apps/renderer/src/components/bases-view.tsx @@ -153,7 +153,6 @@ function getSortValue(note: NoteEntry, column: string): string | number { if (column === 'mtimeMs') return note.mtimeMs const v = note.fields[column] if (!v) return '' - // Sort date-like columns numerically if (column === 'last_update' || column === 'first_met') { const s = Array.isArray(v) ? v[0] ?? '' : v const ms = Date.parse(s) diff --git a/apps/x/packages/core/src/knowledge/build_graph.ts b/apps/x/packages/core/src/knowledge/build_graph.ts index 9cc4d061..f408a844 100644 --- a/apps/x/packages/core/src/knowledge/build_graph.ts +++ b/apps/x/packages/core/src/knowledge/build_graph.ts @@ -37,8 +37,8 @@ const SOURCE_FOLDERS = [ const VOICE_MEMOS_KNOWLEDGE_DIR = path.join(NOTES_OUTPUT_DIR, 'Voice Memos'); /** - * Parse YAML frontmatter from a markdown file and check if it has any noise/skip labels. - * Returns true if the email should be skipped (has noise filter tags). + * Check if email frontmatter contains any noise/skip filter tags. + * Returns true if the email should be skipped. */ function hasNoiseLabels(content: string): boolean { if (!content.startsWith('---')) return false; @@ -48,36 +48,30 @@ function hasNoiseLabels(content: string): boolean { const frontmatter = content.slice(3, endIdx); - // Get all noise tags from the tag system const noiseTags = new Set( getTagDefinitions() .filter(t => t.type === 'noise') .map(t => t.tag) ); - // Extract filter array values from frontmatter - // Matches lines like " - cold-outreach" under the filter: key + // Match list items under filter: key const filterMatch = frontmatter.match(/filter:\s*\n((?:\s+-\s+.+\n?)*)/); if (filterMatch) { const filterLines = filterMatch[1].match(/^\s+-\s+(.+)$/gm); if (filterLines) { for (const line of filterLines) { const tag = line.replace(/^\s+-\s+/, '').trim().replace(/['"]/g, ''); - if (noiseTags.has(tag)) { - return true; - } + if (noiseTags.has(tag)) return true; } } } - // Also check for inline filter array like "filter: ['cold-outreach']" or "filter: [cold-outreach]" + // Match inline array like filter: ['cold-outreach'] or filter: [cold-outreach] const inlineMatch = frontmatter.match(/filter:\s*\[([^\]]*)\]/); if (inlineMatch && inlineMatch[1].trim()) { const tags = inlineMatch[1].split(',').map(t => t.trim().replace(/['"]/g, '')); for (const tag of tags) { - if (noiseTags.has(tag)) { - return true; - } + if (noiseTags.has(tag)) return true; } } @@ -423,7 +417,6 @@ export async function buildGraph(sourceDir: string): Promise { if (!content.startsWith('---')) return false; if (hasNoiseLabels(content)) { console.log(`[buildGraph] Skipping noise email: ${path.basename(filePath)}`); - // Mark as processed so we don't re-check it markFileAsProcessed(filePath, state); return false; } diff --git a/apps/x/packages/core/src/knowledge/labeling_agent.ts b/apps/x/packages/core/src/knowledge/labeling_agent.ts index e9568676..d28649b1 100644 --- a/apps/x/packages/core/src/knowledge/labeling_agent.ts +++ b/apps/x/packages/core/src/knowledge/labeling_agent.ts @@ -57,7 +57,7 @@ ${renderTagSystemForEmails()} # Instructions 1. For each email file provided in the message, read its content carefully. -2. Classify the email using the taxonomy above. Think like a **busy YC startup founder** triaging their inbox. You receive a mix of real business conversations and unsolicited inbound (cold pitches, marketing, newsletters). Your job is to tell them apart accurately — catch the noise without mislabeling real relationships. +2. Classify the email using the taxonomy above. Think like a **YC startup founder** triaging their inbox — your time is your scarcest resource: - **Relationship**: Who is this from? An investor, customer, team member, vendor, candidate, etc.? - **Topic**: What is this about? Legal, finance, hiring, fundraising, security, infrastructure, etc.? - **Email Type**: Is this a warm intro or a followup on an existing conversation? @@ -97,23 +97,23 @@ Before finalizing labels, ask: **"Would a busy YC founder want a note about this - Is a spam digest or Google Groups moderation report - Is routine operational correspondence where the transaction is complete and no follow-up remains -# Cold Outreach vs Real Relationships +# Cold Outreach Detection (Critical for Precision) -**First, check for engagement signals. If ANY are present, it is NOT cold outreach — classify normally:** -- The inbox owner replied in the thread -- There is a calendar invite or scheduled meeting between the sender and inbox owner -- The email references shared documents, Slack conversations, prior calls, or other collaboration -- The sender was introduced via a warm intro -- The sender is from a company the inbox owner is actively doing business with (customer, vendor under contract, investor) -- The thread has messages from both sides +Many emails disguise themselves as real relationships. Before assigning \`vendor\`, \`candidate\`, \`partner\`, or \`followup\`, apply these tests: -**Only if NONE of the above are present, check if it's cold outreach:** -- The sender is pitching a service or product TO the inbox owner (agencies, dev shops, freelancers, SaaS tools, etc.) → \`cold-outreach\` -- A one-sided thread where the sender follows up on their own unanswered messages → \`cold-outreach\` -- A stranger cold-emailing about jobs, internships, or offering free work → \`cold-outreach\`, not \`candidate\` -- Someone referencing your YC batch or company name to seem personal, but with no prior engagement → \`cold-outreach\` +**It's \`cold-outreach\` (noise), NOT a real relationship, if:** +- The sender is pitching their own product or service — design agencies, compliance firms, content/copy writers, dev shops, freelancers, trademark services, company closure/winding-down services, hiring platforms, etc. — even if they reference your company by name, your YC batch, or offer something "free" or "exclusive for YC founders." +- The thread consists entirely of the same sender following up on their own unanswered messages. A real followup requires prior two-way engagement. +- A student, job-seeker, freelancer, or founder cold-emails asking for your time, feedback, or offering free work/trials. These are NOT \`candidate\` — they are \`cold-outreach\`. +- Someone invites you to an event you didn't sign up for, especially if the email has marketing formatting (tracking links, unsubscribe footers, HTML banners). This is \`promotion\`, not \`event\`. -**Remember:** A \`prospect\` is someone who wants to BUY from you. Someone pitching their services to you is \`cold-outreach\`, not \`prospect\` or \`partner\`. +**It IS a real relationship (not noise) if:** +- You (the inbox owner) are a participant in the thread (you sent a reply, or someone on your team did). +- The sender is from a company you are already paying, or they are providing a service under contract (e.g., your law firm, your accountant, your cloud provider support). +- The sender was introduced to you by someone you know (warm intro present in the thread). +- The sender references a specific ongoing engagement with concrete details — e.g., they are your assigned compliance assessor for an audit you initiated, or they are following up after a call you participated in. This is NOT the same as a generic "I noticed your company uses X" pitch. + +**Key heuristic:** If every message in the thread is FROM the same external person and the inbox owner never replied, it's almost certainly cold outreach — regardless of how personalized it sounds. Label it \`cold-outreach\`. # Routine Operations & Finance (Often Missed as Noise) @@ -156,13 +156,9 @@ These are noise even from a vendor you recognize or a platform you use: If the sender is \`noreply-spamdigest\` (Google Groups spam moderation reports), label it \`filter: ['spam']\`. Google already flagged these as spam. Do not evaluate the held messages inside — the digest itself is noise. -# Filter and Relationship arrays — correct placement is critical +# Filter array must only contain tags from the Noise category -- The \`filter\` array must only contain tags from the **Noise** category. -- The \`relationship\` array must only contain tags from the **Relationship** category. -- **\`cold-outreach\` is a NOISE tag — it goes in \`filter\`, NEVER in \`relationship\`.** If an email is cold outreach, set \`filter: ['cold-outreach']\`. The relationship array should be empty \`[]\` for cold outreach emails. -- Do not put topic or relationship tags into the filter array. If an email is an event promotion, use \`promotion\` in filter — not \`event\`. -- If an email is cold outreach, do NOT also tag it as \`prospect\`, \`candidate\`, \`partner\`, or \`vendor\` in relationship. Cold outreach overrides — the relationship array should be \`[]\`. +Do not put topic or relationship tags into the filter array. If an email is an event promotion, use \`promotion\` in filter — not \`event\`. # Frontmatter Format diff --git a/apps/x/packages/core/src/knowledge/note_creation.ts b/apps/x/packages/core/src/knowledge/note_creation.ts index 61a27c54..1d8aa32d 100644 --- a/apps/x/packages/core/src/knowledge/note_creation.ts +++ b/apps/x/packages/core/src/knowledge/note_creation.ts @@ -221,14 +221,12 @@ Emails containing calendar invites (\`.ics\` attachments or inline calendar data --- -# Step 1: Source Filtering +# Step 1: Source Filtering (Label-Based) ## For Meetings and Voice Memos -Always process — no filtering needed. Skip to Step 2. +Always process — no filtering needed. -## For Emails — TWO mandatory gates, BOTH must pass - -### Gate 1: Label Check +## For Emails — Read YAML Frontmatter Emails have YAML frontmatter with labels prepended by the labeling agent: @@ -251,51 +249,15 @@ labeled_at: "2026-02-28T12:00:00Z" ${renderNoteEffectRules()} -**Gate 1 verdict — if ANY filter/noise label is present, you MUST output:** +## Filter Decision Output + +If skipping: \`\`\` -STOP — GATE 1 FAILED -Labels: {list the filter labels} -Action: Do not create any notes. Do not proceed to Gate 2. End here. +SKIP +Reason: Labels indicate skip-only categories: {list the labels} \`\`\` -**After outputting this, STOP. Do not write any files. Do not create any notes. Your task is complete.** -If no filter labels are present, proceed to Gate 2. - ---- - -### Gate 2: Engagement Test - -**This gate applies to ALL emails that passed Gate 1. It is a HARD STOP — not a suggestion.** - -The inbox owner is a busy startup founder who receives a high volume of unsolicited inbound. Most emails from strangers are noise. The labeling agent sometimes misclassifies cold outreach. - -**Read the email content and answer this single question: "Has the inbox owner ever engaged with this sender?"** - -Evidence of engagement (at least one MUST be true to proceed): -- The inbox owner sent a reply in the thread -- There is a prior meeting or introduction with this person -- The person is from a company the owner is already doing business with (paying customer, active vendor under contract, investor) - -**If NONE of the above are true, you MUST output:** -\`\`\` -STOP — GATE 2 FAILED -Reason: No evidence of two-way engagement — {brief explanation} -Action: Do not create any notes. End here. -\`\`\` -**After outputting this, STOP. Do not write any files. Do not create any notes. Your task is complete.** - -This means NO notes for: -- Cold outreach senders (even if labeled as prospect/candidate/partner) -- Newsletter/digest/promo senders -- Webinar speakers mentioned in marketing emails -- People from community digests (Bookface, YC digest) -- Anyone who followed up multiple times with no reply from the owner -- Students/freelancers/developers cold-emailing about jobs or offering work -- Event platform senders (Luma, Beehiiv, etc.) - -**When in doubt, STOP. A missed note can be created later. A junk note pollutes the knowledge base permanently.** - -If engagement evidence exists, proceed to Step 2. +If processing, continue to Step 2. --- @@ -544,21 +506,18 @@ For entities not resolved to existing notes, determine if they warrant new notes ### Who Gets a Note -**The golden rule: only create notes for people the inbox owner has a real relationship with.** A "real relationship" means two-way engagement — the owner has met, replied to, or is actively doing business with this person. Passing through the label filter is necessary but NOT sufficient. - **CREATE a note for people who are:** -- Attendees in meetings the owner participated in -- People the owner has replied to or engaged with in email -- People introduced via warm intros from known contacts -- Contacts at companies the owner is actively doing business with (customers, investors, vendors under contract, partners) -- Candidates the owner is actively interviewing (responded to, scheduled with) +- External (not @user.domain) +- Attendees in meetings +- Email correspondents (emails that reach this step already passed label-based filtering) +- Decision makers or contacts at customers, prospects, or partners +- Investors or potential investors +- Candidates you are interviewing +- Advisors or mentors +- Key collaborators +- Introducers who connect you to valuable contacts **DO NOT create notes for:** -- Anyone from a one-time inbound email the owner never responded to -- People mentioned in newsletters, digests, or community roundups -- Speakers listed in webinar or event promotion emails -- Senders from marketing/event platforms (Luma, Beehiiv, etc.) -- Cold outreach senders, even after multiple follow-ups with no reply - Large group meeting attendees you didn't interact with - Internal colleagues (@user.domain) - Assistants handling only logistics @@ -600,19 +559,16 @@ If role is not explicitly stated, infer from context: |-------------------|----------------------|------------------| | Customer (active deal) | Yes — key contacts | Yes | | Customer (support ticket) | No | Maybe update existing | -| Prospect (owner has engaged) | Yes — decision makers | Yes | -| Prospect (no engagement) | No — this is cold outreach | No | +| Prospect | Yes — decision makers | Yes | | Investor | Yes | Yes | -| Strategic partner (mutual engagement) | Yes — key contacts | Yes | +| Strategic partner | Yes — key contacts | Yes | | Vendor (strategic) | Yes — main contact only | Yes | | Vendor (transactional) | No | Optional | | Bank/Financial services | No | Yes (one note) | -| Candidate (owner is interviewing) | Yes | No | -| Candidate (unsolicited) | No — this is cold outreach | No | +| Candidate | Yes | No | | Service provider (one-time) | No | No | -| Cold outreach / unsolicited inbound | No | No | -| Newsletter / digest / promo sender | No | No | -| Event/webinar invite sender | No | No | +| Personalized outreach | Yes | Yes | +| Generic cold outreach | No | No | ### Handling Non-Note-Worthy People @@ -625,23 +581,15 @@ For people who don't warrant their own note, add to Organization note's Contacts ## Organizations -**Only create org notes for organizations the user directly does business with.** The test is: "Does the user have an active, ongoing relationship with this organization?" - **CREATE a note if:** -- They're a customer, investor, or partner the owner is actively engaged with -- The owner is actively working with someone there (meetings, email exchanges, contracts) -- They are a vendor the owner has a contract with or is actively evaluating +- Someone from that org attended a meeting +- They're a customer, prospect, investor, or partner +- Someone from that org sent relevant personalized correspondence **DO NOT create for:** -- Organizations mentioned only as background context (a contact's previous employer, university, portfolio company, etc.) -- A candidate's current or former employer — the person note is enough -- Organizations from cold outreach emails -- Organizations mentioned in newsletters, digests, or promotional emails - Tool/service providers mentioned in passing - One-time transactional vendors -- Consumer service companies (banks, airlines, etc.) -- Event/marketing platforms (Luma, Beehiiv, etc.) -- Organizations referenced only because a contact works there, unless the user is doing business with that org directly +- Consumer service companies ## Projects @@ -653,13 +601,8 @@ For people who don't warrant their own note, add to Organization note's Contacts ## Topics **CREATE a note if:** -- A substantive business topic discussed across multiple conversations with real contacts (e.g., "SOC 2 Compliance", "Series A Fundraise") -- Has concrete facts, decisions, or action items attached to it - -**DO NOT create topic notes for:** -- Internal system concepts (email filtering, labeling, noise categories, tags) -- Abstract or meta categories (e.g., "cold outreach", "newsletters", "noise") -- Topics only mentioned in skipped/noise emails +- Recurring theme discussed +- Will come up again across conversations --- diff --git a/apps/x/packages/core/src/knowledge/tag_system.ts b/apps/x/packages/core/src/knowledge/tag_system.ts index 317c7d70..d95ea816 100644 --- a/apps/x/packages/core/src/knowledge/tag_system.ts +++ b/apps/x/packages/core/src/knowledge/tag_system.ts @@ -32,10 +32,10 @@ const DEFAULT_TAG_DEFINITIONS: TagDefinition[] = [ // ── Relationship — who is this from/about (all create) ──────────────── { tag: 'investor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Investors, VCs, or angels', example: 'Following up on our meeting — we\'d like to move forward with the Series A term sheet.' }, { tag: 'customer', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Paying customers', example: 'We\'re seeing great results with Rowboat. Can we discuss expanding to more teams?' }, - { tag: 'prospect', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Potential customers — people who want to BUY from you. NOT someone pitching their services to you — that is cold-outreach.', example: 'Thanks for the demo yesterday. We\'re interested in starting a pilot.' }, + { tag: 'prospect', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Potential customers', example: 'Thanks for the demo yesterday. We\'re interested in starting a pilot.' }, { tag: 'partner', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Business partners, corp dev, or strategic contacts', example: 'Let\'s discuss how we can promote the integration to both our user bases.' }, { tag: 'vendor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Service providers you already pay or have a contract with (legal, accounting, infra). NOT someone pitching their services to you — that is cold-outreach.', example: 'Here are the updated employment agreements you requested.' }, - { tag: 'candidate', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Job applicants, recruiters, and anyone reaching out about roles — both solicited and unsolicited', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' }, + { tag: 'candidate', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Job applicants responding to a specific open role, or recruiters pitching candidates for your roles. NOT unsolicited students or strangers asking for your time — that is cold-outreach.', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' }, { tag: 'team', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Internal team members and co-founders', example: 'Here\'s the updated roadmap for Q2. Let\'s discuss in our sync.' }, { tag: 'advisor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Advisors, mentors, or board members', example: 'I\'ve reviewed the deck. Here are my thoughts on the GTM strategy.' }, { tag: 'personal', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Family or friends', example: 'Are you coming to Thanksgiving this year? Let me know your travel dates.' },