more tweaks

This commit is contained in:
Arjun 2026-04-01 17:46:04 +05:30
parent 4f8b738657
commit 5fab2b95f4
4 changed files with 177 additions and 52 deletions

View file

@ -15,6 +15,7 @@ import {
import { buildKnowledgeIndex, formatIndexForPrompt } from './knowledge_index.js';
import { limitEventItems } from './limit_event_items.js';
import { commitAll } from './version_history.js';
import { getTagDefinitions } from './tag_system.js';
/**
* Build obsidian-style knowledge graph by running topic extraction
@ -35,6 +36,54 @@ const SOURCE_FOLDERS = [
// Voice memos are now created directly in knowledge/Voice Memos/<date>/
const VOICE_MEMOS_KNOWLEDGE_DIR = path.join(NOTES_OUTPUT_DIR, 'Voice Memos');
/**
* Parse YAML frontmatter from a markdown file and check if it has any noise/skip labels.
* Returns true if the email should be skipped (has noise filter tags).
*/
function hasNoiseLabels(content: string): boolean {
if (!content.startsWith('---')) return false;
const endIdx = content.indexOf('---', 3);
if (endIdx === -1) return false;
const frontmatter = content.slice(3, endIdx);
// Get all noise tags from the tag system
const noiseTags = new Set(
getTagDefinitions()
.filter(t => t.type === 'noise')
.map(t => t.tag)
);
// Extract filter array values from frontmatter
// Matches lines like " - cold-outreach" under the filter: key
const filterMatch = frontmatter.match(/filter:\s*\n((?:\s+-\s+.+\n?)*)/);
if (filterMatch) {
const filterLines = filterMatch[1].match(/^\s+-\s+(.+)$/gm);
if (filterLines) {
for (const line of filterLines) {
const tag = line.replace(/^\s+-\s+/, '').trim().replace(/['"]/g, '');
if (noiseTags.has(tag)) {
return true;
}
}
}
}
// Also check for inline filter array like "filter: ['cold-outreach']" or "filter: [cold-outreach]"
const inlineMatch = frontmatter.match(/filter:\s*\[([^\]]*)\]/);
if (inlineMatch && inlineMatch[1].trim()) {
const tags = inlineMatch[1].split(',').map(t => t.trim().replace(/['"]/g, ''));
for (const tag of tags) {
if (noiseTags.has(tag)) {
return true;
}
}
}
return false;
}
function extractPathFromToolInput(input: string): string | null {
try {
const parsed = JSON.parse(input) as { path?: string };
@ -366,16 +415,24 @@ export async function buildGraph(sourceDir: string): Promise<void> {
// Get files that need processing (new or changed)
let filesToProcess = getFilesToProcess(sourceDir, state);
// For gmail_sync, only process emails that have been labeled (have YAML frontmatter)
// For gmail_sync, only process emails that have been labeled AND don't have noise filter tags
if (sourceDir.endsWith('gmail_sync')) {
filesToProcess = filesToProcess.filter(filePath => {
try {
const content = fs.readFileSync(filePath, 'utf-8');
return content.startsWith('---');
if (!content.startsWith('---')) return false;
if (hasNoiseLabels(content)) {
console.log(`[buildGraph] Skipping noise email: ${path.basename(filePath)}`);
// Mark as processed so we don't re-check it
markFileAsProcessed(filePath, state);
return false;
}
return true;
} catch {
return false;
}
});
saveState(state);
}
if (filesToProcess.length === 0) {
@ -568,16 +625,23 @@ export async function processAllSources(): Promise<void> {
try {
let filesToProcess = getFilesToProcess(sourceDir, state);
// For gmail_sync, only process emails that have been labeled (have YAML frontmatter)
// For gmail_sync, only process emails that have been labeled AND don't have noise filter tags
if (folder === 'gmail_sync') {
filesToProcess = filesToProcess.filter(filePath => {
try {
const content = fs.readFileSync(filePath, 'utf-8');
return content.startsWith('---');
if (!content.startsWith('---')) return false;
if (hasNoiseLabels(content)) {
console.log(`[GraphBuilder] Skipping noise email: ${path.basename(filePath)}`);
markFileAsProcessed(filePath, state);
return false;
}
return true;
} catch {
return false;
}
});
saveState(state);
}
if (filesToProcess.length > 0) {

View file

@ -57,7 +57,7 @@ ${renderTagSystemForEmails()}
# Instructions
1. For each email file provided in the message, read its content carefully.
2. Classify the email using the taxonomy above. Think like a **YC startup founder** triaging their inbox your time is your scarcest resource:
2. Classify the email using the taxonomy above. Think like a **busy YC startup founder** triaging their inbox. You receive a mix of real business conversations and unsolicited inbound (cold pitches, marketing, newsletters). Your job is to tell them apart accurately catch the noise without mislabeling real relationships.
- **Relationship**: Who is this from? An investor, customer, team member, vendor, candidate, etc.?
- **Topic**: What is this about? Legal, finance, hiring, fundraising, security, infrastructure, etc.?
- **Email Type**: Is this a warm intro or a followup on an existing conversation?
@ -97,23 +97,23 @@ Before finalizing labels, ask: **"Would a busy YC founder want a note about this
- Is a spam digest or Google Groups moderation report
- Is routine operational correspondence where the transaction is complete and no follow-up remains
# Cold Outreach Detection (Critical for Precision)
# Cold Outreach vs Real Relationships
Many emails disguise themselves as real relationships. Before assigning \`vendor\`, \`candidate\`, \`partner\`, or \`followup\`, apply these tests:
**First, check for engagement signals. If ANY are present, it is NOT cold outreach classify normally:**
- The inbox owner replied in the thread
- There is a calendar invite or scheduled meeting between the sender and inbox owner
- The email references shared documents, Slack conversations, prior calls, or other collaboration
- The sender was introduced via a warm intro
- The sender is from a company the inbox owner is actively doing business with (customer, vendor under contract, investor)
- The thread has messages from both sides
**It's \`cold-outreach\` (noise), NOT a real relationship, if:**
- The sender is pitching their own product or service design agencies, compliance firms, content/copy writers, dev shops, freelancers, trademark services, company closure/winding-down services, hiring platforms, etc. even if they reference your company by name, your YC batch, or offer something "free" or "exclusive for YC founders."
- The thread consists entirely of the same sender following up on their own unanswered messages. A real followup requires prior two-way engagement.
- A student, job-seeker, freelancer, or founder cold-emails asking for your time, feedback, or offering free work/trials. These are NOT \`candidate\` — they are \`cold-outreach\`.
- Someone invites you to an event you didn't sign up for, especially if the email has marketing formatting (tracking links, unsubscribe footers, HTML banners). This is \`promotion\`, not \`event\`.
**Only if NONE of the above are present, check if it's cold outreach:**
- The sender is pitching a service or product TO the inbox owner (agencies, dev shops, freelancers, SaaS tools, etc.) \`cold-outreach\`
- A one-sided thread where the sender follows up on their own unanswered messages \`cold-outreach\`
- A stranger cold-emailing about jobs, internships, or offering free work \`cold-outreach\`, not \`candidate\`
- Someone referencing your YC batch or company name to seem personal, but with no prior engagement \`cold-outreach\`
**It IS a real relationship (not noise) if:**
- You (the inbox owner) are a participant in the thread (you sent a reply, or someone on your team did).
- The sender is from a company you are already paying, or they are providing a service under contract (e.g., your law firm, your accountant, your cloud provider support).
- The sender was introduced to you by someone you know (warm intro present in the thread).
- The sender references a specific ongoing engagement with concrete details e.g., they are your assigned compliance assessor for an audit you initiated, or they are following up after a call you participated in. This is NOT the same as a generic "I noticed your company uses X" pitch.
**Key heuristic:** If every message in the thread is FROM the same external person and the inbox owner never replied, it's almost certainly cold outreach regardless of how personalized it sounds. Label it \`cold-outreach\`.
**Remember:** A \`prospect\` is someone who wants to BUY from you. Someone pitching their services to you is \`cold-outreach\`, not \`prospect\` or \`partner\`.
# Routine Operations & Finance (Often Missed as Noise)
@ -156,9 +156,13 @@ These are noise even from a vendor you recognize or a platform you use:
If the sender is \`noreply-spamdigest\` (Google Groups spam moderation reports), label it \`filter: ['spam']\`. Google already flagged these as spam. Do not evaluate the held messages inside — the digest itself is noise.
# Filter array must only contain tags from the Noise category
# Filter and Relationship arrays correct placement is critical
Do not put topic or relationship tags into the filter array. If an email is an event promotion, use \`promotion\` in filter — not \`event\`.
- The \`filter\` array must only contain tags from the **Noise** category.
- The \`relationship\` array must only contain tags from the **Relationship** category.
- **\`cold-outreach\` is a NOISE tag — it goes in \`filter\`, NEVER in \`relationship\`.** If an email is cold outreach, set \`filter: ['cold-outreach']\`. The relationship array should be empty \`[]\` for cold outreach emails.
- Do not put topic or relationship tags into the filter array. If an email is an event promotion, use \`promotion\` in filter — not \`event\`.
- If an email is cold outreach, do NOT also tag it as \`prospect\`, \`candidate\`, \`partner\`, or \`vendor\` in relationship. Cold outreach overrides — the relationship array should be \`[]\`.
# Frontmatter Format

View file

@ -221,12 +221,14 @@ Emails containing calendar invites (\`.ics\` attachments or inline calendar data
---
# Step 1: Source Filtering (Label-Based)
# Step 1: Source Filtering
## For Meetings and Voice Memos
Always process no filtering needed.
Always process no filtering needed. Skip to Step 2.
## For Emails Read YAML Frontmatter
## For Emails TWO mandatory gates, BOTH must pass
### Gate 1: Label Check
Emails have YAML frontmatter with labels prepended by the labeling agent:
@ -249,15 +251,51 @@ labeled_at: "2026-02-28T12:00:00Z"
${renderNoteEffectRules()}
## Filter Decision Output
If skipping:
**Gate 1 verdict if ANY filter/noise label is present, you MUST output:**
\`\`\`
SKIP
Reason: Labels indicate skip-only categories: {list the labels}
STOP GATE 1 FAILED
Labels: {list the filter labels}
Action: Do not create any notes. Do not proceed to Gate 2. End here.
\`\`\`
**After outputting this, STOP. Do not write any files. Do not create any notes. Your task is complete.**
If processing, continue to Step 2.
If no filter labels are present, proceed to Gate 2.
---
### Gate 2: Engagement Test
**This gate applies to ALL emails that passed Gate 1. It is a HARD STOP not a suggestion.**
The inbox owner is a busy startup founder who receives a high volume of unsolicited inbound. Most emails from strangers are noise. The labeling agent sometimes misclassifies cold outreach.
**Read the email content and answer this single question: "Has the inbox owner ever engaged with this sender?"**
Evidence of engagement (at least one MUST be true to proceed):
- The inbox owner sent a reply in the thread
- There is a prior meeting or introduction with this person
- The person is from a company the owner is already doing business with (paying customer, active vendor under contract, investor)
**If NONE of the above are true, you MUST output:**
\`\`\`
STOP GATE 2 FAILED
Reason: No evidence of two-way engagement {brief explanation}
Action: Do not create any notes. End here.
\`\`\`
**After outputting this, STOP. Do not write any files. Do not create any notes. Your task is complete.**
This means NO notes for:
- Cold outreach senders (even if labeled as prospect/candidate/partner)
- Newsletter/digest/promo senders
- Webinar speakers mentioned in marketing emails
- People from community digests (Bookface, YC digest)
- Anyone who followed up multiple times with no reply from the owner
- Students/freelancers/developers cold-emailing about jobs or offering work
- Event platform senders (Luma, Beehiiv, etc.)
**When in doubt, STOP. A missed note can be created later. A junk note pollutes the knowledge base permanently.**
If engagement evidence exists, proceed to Step 2.
---
@ -506,18 +544,21 @@ For entities not resolved to existing notes, determine if they warrant new notes
### Who Gets a Note
**The golden rule: only create notes for people the inbox owner has a real relationship with.** A "real relationship" means two-way engagement the owner has met, replied to, or is actively doing business with this person. Passing through the label filter is necessary but NOT sufficient.
**CREATE a note for people who are:**
- External (not @user.domain)
- Attendees in meetings
- Email correspondents (emails that reach this step already passed label-based filtering)
- Decision makers or contacts at customers, prospects, or partners
- Investors or potential investors
- Candidates you are interviewing
- Advisors or mentors
- Key collaborators
- Introducers who connect you to valuable contacts
- Attendees in meetings the owner participated in
- People the owner has replied to or engaged with in email
- People introduced via warm intros from known contacts
- Contacts at companies the owner is actively doing business with (customers, investors, vendors under contract, partners)
- Candidates the owner is actively interviewing (responded to, scheduled with)
**DO NOT create notes for:**
- Anyone from a one-time inbound email the owner never responded to
- People mentioned in newsletters, digests, or community roundups
- Speakers listed in webinar or event promotion emails
- Senders from marketing/event platforms (Luma, Beehiiv, etc.)
- Cold outreach senders, even after multiple follow-ups with no reply
- Large group meeting attendees you didn't interact with
- Internal colleagues (@user.domain)
- Assistants handling only logistics
@ -559,16 +600,19 @@ If role is not explicitly stated, infer from context:
|-------------------|----------------------|------------------|
| Customer (active deal) | Yes key contacts | Yes |
| Customer (support ticket) | No | Maybe update existing |
| Prospect | Yes decision makers | Yes |
| Prospect (owner has engaged) | Yes decision makers | Yes |
| Prospect (no engagement) | No this is cold outreach | No |
| Investor | Yes | Yes |
| Strategic partner | Yes key contacts | Yes |
| Strategic partner (mutual engagement) | Yes key contacts | Yes |
| Vendor (strategic) | Yes main contact only | Yes |
| Vendor (transactional) | No | Optional |
| Bank/Financial services | No | Yes (one note) |
| Candidate | Yes | No |
| Candidate (owner is interviewing) | Yes | No |
| Candidate (unsolicited) | No this is cold outreach | No |
| Service provider (one-time) | No | No |
| Personalized outreach | Yes | Yes |
| Generic cold outreach | No | No |
| Cold outreach / unsolicited inbound | No | No |
| Newsletter / digest / promo sender | No | No |
| Event/webinar invite sender | No | No |
### Handling Non-Note-Worthy People
@ -581,15 +625,23 @@ For people who don't warrant their own note, add to Organization note's Contacts
## Organizations
**Only create org notes for organizations the user directly does business with.** The test is: "Does the user have an active, ongoing relationship with this organization?"
**CREATE a note if:**
- Someone from that org attended a meeting
- They're a customer, prospect, investor, or partner
- Someone from that org sent relevant personalized correspondence
- They're a customer, investor, or partner the owner is actively engaged with
- The owner is actively working with someone there (meetings, email exchanges, contracts)
- They are a vendor the owner has a contract with or is actively evaluating
**DO NOT create for:**
- Organizations mentioned only as background context (a contact's previous employer, university, portfolio company, etc.)
- A candidate's current or former employer the person note is enough
- Organizations from cold outreach emails
- Organizations mentioned in newsletters, digests, or promotional emails
- Tool/service providers mentioned in passing
- One-time transactional vendors
- Consumer service companies
- Consumer service companies (banks, airlines, etc.)
- Event/marketing platforms (Luma, Beehiiv, etc.)
- Organizations referenced only because a contact works there, unless the user is doing business with that org directly
## Projects
@ -601,8 +653,13 @@ For people who don't warrant their own note, add to Organization note's Contacts
## Topics
**CREATE a note if:**
- Recurring theme discussed
- Will come up again across conversations
- A substantive business topic discussed across multiple conversations with real contacts (e.g., "SOC 2 Compliance", "Series A Fundraise")
- Has concrete facts, decisions, or action items attached to it
**DO NOT create topic notes for:**
- Internal system concepts (email filtering, labeling, noise categories, tags)
- Abstract or meta categories (e.g., "cold outreach", "newsletters", "noise")
- Topics only mentioned in skipped/noise emails
---

View file

@ -32,10 +32,10 @@ const DEFAULT_TAG_DEFINITIONS: TagDefinition[] = [
// ── Relationship — who is this from/about (all create) ────────────────
{ tag: 'investor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Investors, VCs, or angels', example: 'Following up on our meeting — we\'d like to move forward with the Series A term sheet.' },
{ tag: 'customer', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Paying customers', example: 'We\'re seeing great results with Rowboat. Can we discuss expanding to more teams?' },
{ tag: 'prospect', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Potential customers', example: 'Thanks for the demo yesterday. We\'re interested in starting a pilot.' },
{ tag: 'prospect', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Potential customers — people who want to BUY from you. NOT someone pitching their services to you — that is cold-outreach.', example: 'Thanks for the demo yesterday. We\'re interested in starting a pilot.' },
{ tag: 'partner', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Business partners, corp dev, or strategic contacts', example: 'Let\'s discuss how we can promote the integration to both our user bases.' },
{ tag: 'vendor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Service providers you already pay or have a contract with (legal, accounting, infra). NOT someone pitching their services to you — that is cold-outreach.', example: 'Here are the updated employment agreements you requested.' },
{ tag: 'candidate', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Job applicants responding to a specific open role, or recruiters pitching candidates for your roles. NOT unsolicited students or strangers asking for your time — that is cold-outreach.', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' },
{ tag: 'candidate', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Job applicants responding to a specific open role, or recruiters pitching candidates for your roles. NOT unsolicited students, strangers, or anyone cold-emailing about jobs, internships, or offering free work — that is cold-outreach.', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' },
{ tag: 'team', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Internal team members and co-founders', example: 'Here\'s the updated roadmap for Q2. Let\'s discuss in our sync.' },
{ tag: 'advisor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Advisors, mentors, or board members', example: 'I\'ve reviewed the deck. Here are my thoughts on the GTM strategy.' },
{ tag: 'personal', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Family or friends', example: 'Are you coming to Thanksgiving this year? Let me know your travel dates.' },