diff --git a/apps/x/apps/renderer/src/components/bases-view.tsx b/apps/x/apps/renderer/src/components/bases-view.tsx
index 5a6b92e1..a68eb360 100644
--- a/apps/x/apps/renderer/src/components/bases-view.tsx
+++ b/apps/x/apps/renderer/src/components/bases-view.tsx
@@ -153,6 +153,11 @@ function getSortValue(note: NoteEntry, column: string): string | number {
if (column === 'mtimeMs') return note.mtimeMs
const v = note.fields[column]
if (!v) return ''
+ if (column === 'last_update' || column === 'first_met') {
+ const s = Array.isArray(v) ? v[0] ?? '' : v
+ const ms = Date.parse(s)
+ return isNaN(ms) ? 0 : ms
+ }
return Array.isArray(v) ? v[0] ?? '' : v
}
@@ -776,6 +781,17 @@ function CellRenderer({
return {formatDate(note.mtimeMs)}
}
+ // Date-like frontmatter columns — render like Last Modified
+ if (column === 'last_update' || column === 'first_met') {
+ const value = note.fields[column]
+ if (!value || Array.isArray(value)) return null
+ const ms = Date.parse(value)
+ if (!isNaN(ms)) {
+ return {formatDate(ms)}
+ }
+ return {value}
+ }
+
// Frontmatter column
const value = note.fields[column]
if (!value) return null
diff --git a/apps/x/apps/renderer/src/components/settings-dialog.tsx b/apps/x/apps/renderer/src/components/settings-dialog.tsx
index 1e38f720..53a69335 100644
--- a/apps/x/apps/renderer/src/components/settings-dialog.tsx
+++ b/apps/x/apps/renderer/src/components/settings-dialog.tsx
@@ -843,7 +843,7 @@ const NOTE_TAG_TYPE_ORDER = [
]
const EMAIL_TAG_TYPE_ORDER = [
- "relationship", "topic", "email-type", "filter", "action", "status",
+ "relationship", "topic", "email-type", "noise", "action", "status",
]
const TAG_TYPE_LABELS: Record = {
@@ -851,73 +851,12 @@ const TAG_TYPE_LABELS: Record = {
"relationship-sub": "Relationship Sub-Tags",
"topic": "Topic",
"email-type": "Email Type",
- "filter": "Filter",
+ "noise": "Noise",
"action": "Action",
"status": "Status",
"source": "Source",
}
-const DEFAULT_TAGS: TagDef[] = [
- { tag: "investor", type: "relationship", applicability: "both", noteEffect: "create", description: "Investors, VCs, or angels", example: "Following up on our meeting — we'd like to move forward with the Series A term sheet." },
- { tag: "customer", type: "relationship", applicability: "both", noteEffect: "create", description: "Paying customers", example: "We're seeing great results with Rowboat. Can we discuss expanding to more teams?" },
- { tag: "prospect", type: "relationship", applicability: "both", noteEffect: "create", description: "Potential customers", example: "Thanks for the demo yesterday. We're interested in starting a pilot." },
- { tag: "partner", type: "relationship", applicability: "both", noteEffect: "create", description: "Business partners", example: "Let's discuss how we can promote the integration to both our user bases." },
- { tag: "vendor", type: "relationship", applicability: "both", noteEffect: "create", description: "Service providers you work with", example: "Here are the updated employment agreements you requested." },
- { tag: "product", type: "relationship", applicability: "both", noteEffect: "skip", description: "Products or services you use (automated)", example: "Your AWS bill for January 2025 is now available." },
- { tag: "candidate", type: "relationship", applicability: "both", noteEffect: "create", description: "Job applicants", example: "Thanks for reaching out. I'd love to learn more about the engineering role." },
- { tag: "team", type: "relationship", applicability: "both", noteEffect: "create", description: "Internal team members", example: "Here's the updated roadmap for Q2. Let's discuss in our sync." },
- { tag: "advisor", type: "relationship", applicability: "both", noteEffect: "create", description: "Advisors, mentors, or board members", example: "I've reviewed the deck. Here are my thoughts on the GTM strategy." },
- { tag: "personal", type: "relationship", applicability: "both", noteEffect: "create", description: "Family or friends", example: "Are you coming to Thanksgiving this year? Let me know your travel dates." },
- { tag: "press", type: "relationship", applicability: "both", noteEffect: "create", description: "Journalists or media", example: "I'm writing a piece on AI agents. Would you be available for an interview?" },
- { tag: "community", type: "relationship", applicability: "both", noteEffect: "create", description: "Users, peers, or open source contributors", example: "Love what you're building with Rowboat. Here's a bug I found..." },
- { tag: "government", type: "relationship", applicability: "both", noteEffect: "create", description: "Government agencies", example: "Your Delaware franchise tax is due by March 1, 2025." },
- { tag: "primary", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Main contact or decision maker", example: "Sarah Chen — VP Engineering, your main point of contact at Acme." },
- { tag: "secondary", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Supporting contact, involved but not the lead", example: "David Kim — Engineer CC'd on customer emails." },
- { tag: "executive-assistant", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "EA or admin handling scheduling and logistics", example: "Lisa — Sarah's EA who schedules all her meetings." },
- { tag: "cc", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Person who's CC'd but not actively engaged", example: "Manager looped in for visibility on deal." },
- { tag: "referred-by", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Person who made an introduction or referral", example: "David Park — Investor who intro'd you to Sarah." },
- { tag: "former", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Previously held this relationship, no longer active", example: "John — Former customer who churned last year." },
- { tag: "champion", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Internal advocate pushing for you", example: "Engineer who loves your product and is selling internally." },
- { tag: "blocker", type: "relationship-sub", applicability: "notes", noteEffect: "none", description: "Person opposing or blocking progress", example: "CFO resistant to spending on new tools." },
- { tag: "sales", type: "topic", applicability: "both", noteEffect: "create", description: "Sales conversations, deals, and revenue", example: "Here's the pricing proposal we discussed. Let me know if you have questions." },
- { tag: "support", type: "topic", applicability: "both", noteEffect: "create", description: "Help requests, issues, and customer support", example: "We're seeing an error when trying to export. Can you help?" },
- { tag: "legal", type: "topic", applicability: "both", noteEffect: "create", description: "Contracts, terms, compliance, and legal matters", example: "Legal has reviewed the MSA. Attached are our requested changes." },
- { tag: "finance", type: "topic", applicability: "both", noteEffect: "create", description: "Money, invoices, payments, banking, and taxes", example: "Your invoice #1234 for $5,000 is attached. Payment due in 30 days." },
- { tag: "hiring", type: "topic", applicability: "both", noteEffect: "create", description: "Recruiting, interviews, and employment", example: "We'd like to move forward with a final round interview. Are you available Thursday?" },
- { tag: "fundraising", type: "topic", applicability: "both", noteEffect: "create", description: "Raising money and investor relations", example: "Thanks for sending the deck. We'd like to schedule a partner meeting." },
- { tag: "travel", type: "topic", applicability: "both", noteEffect: "skip", description: "Flights, hotels, trips, and travel logistics", example: "Your flight to Tokyo on March 15 is confirmed. Confirmation #ABC123." },
- { tag: "event", type: "topic", applicability: "both", noteEffect: "create", description: "Conferences, meetups, and gatherings", example: "You're invited to speak at TechCrunch Disrupt. Can you confirm your availability?" },
- { tag: "shopping", type: "topic", applicability: "both", noteEffect: "skip", description: "Purchases, orders, and returns", example: "Your order #12345 has shipped. Track it here." },
- { tag: "health", type: "topic", applicability: "both", noteEffect: "skip", description: "Medical, wellness, and health-related matters", example: "Your appointment with Dr. Smith is confirmed for Monday at 2pm." },
- { tag: "learning", type: "topic", applicability: "both", noteEffect: "skip", description: "Courses, education, and skill-building", example: "Welcome to the Advanced Python course. Here's your access link." },
- { tag: "research", type: "topic", applicability: "both", noteEffect: "create", description: "Research requests and information gathering", example: "Here's the market analysis you requested on the AI agent space." },
- { tag: "intro", type: "email-type", applicability: "both", noteEffect: "create", description: "Warm introduction from someone you know", example: "I'd like to introduce you to Sarah Chen, VP Engineering at Acme." },
- { tag: "followup", type: "email-type", applicability: "both", noteEffect: "create", description: "Following up on a previous conversation", example: "Following up on our call last week. Have you had a chance to review the proposal?" },
- { tag: "scheduling", type: "email-type", applicability: "email", noteEffect: "skip", description: "Meeting and calendar scheduling", example: "Are you available for a call next Tuesday at 2pm?" },
- { tag: "cold-outreach", type: "email-type", applicability: "email", noteEffect: "skip", description: "Unsolicited contact from someone you don't know", example: "Hi, I noticed your company is growing fast. I'd love to show you how we can help with..." },
- { tag: "newsletter", type: "email-type", applicability: "email", noteEffect: "skip", description: "Newsletters, marketing emails, and subscriptions", example: "This week in AI: The latest developments in agent frameworks..." },
- { tag: "notification", type: "email-type", applicability: "email", noteEffect: "skip", description: "Automated alerts, receipts, and system notifications", example: "Your password was changed successfully. If this wasn't you, contact support." },
- { tag: "spam", type: "filter", applicability: "email", noteEffect: "skip", description: "Junk and unwanted email", example: "Congratulations! You've won $1,000,000..." },
- { tag: "promotion", type: "filter", applicability: "email", noteEffect: "skip", description: "Marketing offers and sales pitches", example: "50% off all items this weekend only!" },
- { tag: "social", type: "filter", applicability: "email", noteEffect: "skip", description: "Social media notifications", example: "John Smith commented on your post." },
- { tag: "forums", type: "filter", applicability: "email", noteEffect: "skip", description: "Mailing lists and group discussions", example: "Re: [dev-list] Question about API design" },
- { tag: "action-required", type: "action", applicability: "both", noteEffect: "create", description: "Needs a response or action from you", example: "Can you send me the pricing by Friday?" },
- { tag: "fyi", type: "action", applicability: "email", noteEffect: "skip", description: "Informational only, no action needed", example: "Just wanted to let you know the deal closed. Thanks for your help!" },
- { tag: "urgent", type: "action", applicability: "both", noteEffect: "create", description: "Time-sensitive, needs immediate attention", example: "We need your signature on the contract by EOD today or we lose the deal." },
- { tag: "waiting", type: "action", applicability: "both", noteEffect: "create", description: "Waiting on a response from them" },
- { tag: "unread", type: "status", applicability: "email", noteEffect: "none", description: "Not yet processed" },
- { tag: "to-reply", type: "status", applicability: "email", noteEffect: "none", description: "Need to respond" },
- { tag: "done", type: "status", applicability: "email", noteEffect: "none", description: "Handled, can be archived" },
- { tag: "active", type: "status", applicability: "notes", noteEffect: "none", description: "Currently relevant, recent activity" },
- { tag: "archived", type: "status", applicability: "notes", noteEffect: "none", description: "No longer active, kept for reference" },
- { tag: "stale", type: "status", applicability: "notes", noteEffect: "none", description: "No activity in 60+ days, needs attention or archive" },
- { tag: "email", type: "source", applicability: "notes", noteEffect: "none", description: "Created or updated from email" },
- { tag: "meeting", type: "source", applicability: "notes", noteEffect: "none", description: "Created or updated from meeting transcript" },
- { tag: "browser", type: "source", applicability: "notes", noteEffect: "none", description: "Content captured from web browsing" },
- { tag: "web-search", type: "source", applicability: "notes", noteEffect: "none", description: "Information from web search" },
- { tag: "manual", type: "source", applicability: "notes", noteEffect: "none", description: "Manually entered by user" },
- { tag: "import", type: "source", applicability: "notes", noteEffect: "none", description: "Imported from another system" },
-]
function TagGroupTable({
group,
@@ -1048,8 +987,8 @@ function NoteTaggingSettings({ dialogOpen }: { dialogOpen: boolean }) {
setTags(parsed)
setOriginalTags(parsed)
} catch {
- setTags([...DEFAULT_TAGS])
- setOriginalTags([...DEFAULT_TAGS])
+ setTags([])
+ setOriginalTags([])
} finally {
setLoading(false)
}
@@ -1110,7 +1049,7 @@ function NoteTaggingSettings({ dialogOpen }: { dialogOpen: boolean }) {
const isEmailSection = activeSection === "email"
const applicability = isEmailSection ? "email" as const : "notes" as const
// For email-only types, always use "email"; for notes-only types, always use "notes"; otherwise use "both"
- const emailOnlyTypes = ["email-type", "filter"]
+ const emailOnlyTypes = ["email-type", "noise"]
const notesOnlyTypes = ["relationship-sub", "source"]
let finalApplicability: "email" | "notes" | "both" = "both"
if (emailOnlyTypes.includes(type)) finalApplicability = "email"
@@ -1148,11 +1087,6 @@ function NoteTaggingSettings({ dialogOpen }: { dialogOpen: boolean }) {
}
}, [tags])
- const handleReset = useCallback(() => {
- if (!confirm("Reset all tags to defaults? This will discard your changes.")) return
- setTags([...DEFAULT_TAGS])
- }, [])
-
const toggleGroup = useCallback((type: string) => {
setCollapsedGroups(prev => {
const next = new Set(prev)
@@ -1224,9 +1158,6 @@ function NoteTaggingSettings({ dialogOpen }: { dialogOpen: boolean }) {
)}
-
diff --git a/apps/x/packages/core/src/config/config.ts b/apps/x/packages/core/src/config/config.ts
index f2bc6c8a..abd10ec5 100644
--- a/apps/x/packages/core/src/config/config.ts
+++ b/apps/x/packages/core/src/config/config.ts
@@ -4,7 +4,8 @@ import { homedir } from "os";
import { fileURLToPath } from "url";
// Resolve app root relative to compiled file location (dist/...)
-export const WorkDir = path.join(homedir(), ".rowboat");
+// Allow override via ROWBOAT_WORKDIR env var for standalone pipeline usage
+export const WorkDir = process.env.ROWBOAT_WORKDIR || path.join(homedir(), ".rowboat");
// Get the directory of this file (for locating bundled assets)
const __filename = fileURLToPath(import.meta.url);
diff --git a/apps/x/packages/core/src/knowledge/build_graph.ts b/apps/x/packages/core/src/knowledge/build_graph.ts
index ab76b5e4..f408a844 100644
--- a/apps/x/packages/core/src/knowledge/build_graph.ts
+++ b/apps/x/packages/core/src/knowledge/build_graph.ts
@@ -15,6 +15,7 @@ import {
import { buildKnowledgeIndex, formatIndexForPrompt } from './knowledge_index.js';
import { limitEventItems } from './limit_event_items.js';
import { commitAll } from './version_history.js';
+import { getTagDefinitions } from './tag_system.js';
/**
* Build obsidian-style knowledge graph by running topic extraction
@@ -35,6 +36,48 @@ const SOURCE_FOLDERS = [
// Voice memos are now created directly in knowledge/Voice Memos//
const VOICE_MEMOS_KNOWLEDGE_DIR = path.join(NOTES_OUTPUT_DIR, 'Voice Memos');
+/**
+ * Check if email frontmatter contains any noise/skip filter tags.
+ * Returns true if the email should be skipped.
+ */
+function hasNoiseLabels(content: string): boolean {
+ if (!content.startsWith('---')) return false;
+
+ const endIdx = content.indexOf('---', 3);
+ if (endIdx === -1) return false;
+
+ const frontmatter = content.slice(3, endIdx);
+
+ const noiseTags = new Set(
+ getTagDefinitions()
+ .filter(t => t.type === 'noise')
+ .map(t => t.tag)
+ );
+
+ // Match list items under filter: key
+ const filterMatch = frontmatter.match(/filter:\s*\n((?:\s+-\s+.+\n?)*)/);
+ if (filterMatch) {
+ const filterLines = filterMatch[1].match(/^\s+-\s+(.+)$/gm);
+ if (filterLines) {
+ for (const line of filterLines) {
+ const tag = line.replace(/^\s+-\s+/, '').trim().replace(/['"]/g, '');
+ if (noiseTags.has(tag)) return true;
+ }
+ }
+ }
+
+ // Match inline array like filter: ['cold-outreach'] or filter: [cold-outreach]
+ const inlineMatch = frontmatter.match(/filter:\s*\[([^\]]*)\]/);
+ if (inlineMatch && inlineMatch[1].trim()) {
+ const tags = inlineMatch[1].split(',').map(t => t.trim().replace(/['"]/g, ''));
+ for (const tag of tags) {
+ if (noiseTags.has(tag)) return true;
+ }
+ }
+
+ return false;
+}
+
function extractPathFromToolInput(input: string): string | null {
try {
const parsed = JSON.parse(input) as { path?: string };
@@ -366,16 +409,23 @@ export async function buildGraph(sourceDir: string): Promise {
// Get files that need processing (new or changed)
let filesToProcess = getFilesToProcess(sourceDir, state);
- // For gmail_sync, only process emails that have been labeled (have YAML frontmatter)
+ // For gmail_sync, only process emails that have been labeled AND don't have noise filter tags
if (sourceDir.endsWith('gmail_sync')) {
filesToProcess = filesToProcess.filter(filePath => {
try {
const content = fs.readFileSync(filePath, 'utf-8');
- return content.startsWith('---');
+ if (!content.startsWith('---')) return false;
+ if (hasNoiseLabels(content)) {
+ console.log(`[buildGraph] Skipping noise email: ${path.basename(filePath)}`);
+ markFileAsProcessed(filePath, state);
+ return false;
+ }
+ return true;
} catch {
return false;
}
});
+ saveState(state);
}
if (filesToProcess.length === 0) {
@@ -535,7 +585,7 @@ async function processVoiceMemosForKnowledge(): Promise {
/**
* Process all configured source directories
*/
-async function processAllSources(): Promise {
+export async function processAllSources(): Promise {
console.log('[GraphBuilder] Checking for new content in all sources...');
@@ -568,16 +618,23 @@ async function processAllSources(): Promise {
try {
let filesToProcess = getFilesToProcess(sourceDir, state);
- // For gmail_sync, only process emails that have been labeled (have YAML frontmatter)
+ // For gmail_sync, only process emails that have been labeled AND don't have noise filter tags
if (folder === 'gmail_sync') {
filesToProcess = filesToProcess.filter(filePath => {
try {
const content = fs.readFileSync(filePath, 'utf-8');
- return content.startsWith('---');
+ if (!content.startsWith('---')) return false;
+ if (hasNoiseLabels(content)) {
+ console.log(`[GraphBuilder] Skipping noise email: ${path.basename(filePath)}`);
+ markFileAsProcessed(filePath, state);
+ return false;
+ }
+ return true;
} catch {
return false;
}
});
+ saveState(state);
}
if (filesToProcess.length > 0) {
diff --git a/apps/x/packages/core/src/knowledge/label_emails.ts b/apps/x/packages/core/src/knowledge/label_emails.ts
index a72aa8f5..68bca5a1 100644
--- a/apps/x/packages/core/src/knowledge/label_emails.ts
+++ b/apps/x/packages/core/src/knowledge/label_emails.ts
@@ -14,6 +14,7 @@ import {
const SYNC_INTERVAL_MS = 15 * 1000; // 15 seconds
const BATCH_SIZE = 15;
+const DEFAULT_CONCURRENCY = 3;
const LABELING_AGENT = 'labeling_agent';
const GMAIL_SYNC_DIR = path.join(WorkDir, 'gmail_sync');
const MAX_CONTENT_LENGTH = 8000;
@@ -129,7 +130,7 @@ async function labelEmailBatch(
/**
* Process all unlabeled emails in batches
*/
-async function processUnlabeledEmails(): Promise {
+export async function processUnlabeledEmails(concurrency: number = DEFAULT_CONCURRENCY): Promise {
console.log('[EmailLabeling] Checking for unlabeled emails...');
const state = loadLabelingState();
@@ -140,7 +141,7 @@ async function processUnlabeledEmails(): Promise {
return;
}
- console.log(`[EmailLabeling] Found ${unlabeled.length} unlabeled emails`);
+ console.log(`[EmailLabeling] Found ${unlabeled.length} unlabeled emails (concurrency: ${concurrency})`);
const run = await serviceLogger.startRun({
service: 'email_labeling',
@@ -161,69 +162,81 @@ async function processUnlabeledEmails(): Promise {
truncated: limitedFiles.truncated,
});
- const totalBatches = Math.ceil(unlabeled.length / BATCH_SIZE);
- let totalEdited = 0;
- let hadError = false;
-
+ // Build all batches upfront
+ const batches: { batchNumber: number; files: { path: string; content: string }[] }[] = [];
for (let i = 0; i < unlabeled.length; i += BATCH_SIZE) {
const batchPaths = unlabeled.slice(i, i + BATCH_SIZE);
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
-
- try {
- // Read file contents for the batch
- const files: { path: string; content: string }[] = [];
- for (const filePath of batchPaths) {
- try {
- const content = fs.readFileSync(filePath, 'utf-8');
- files.push({ path: filePath, content });
- } catch (error) {
- console.error(`[EmailLabeling] Error reading ${filePath}:`, error);
- }
+ const files: { path: string; content: string }[] = [];
+ for (const filePath of batchPaths) {
+ try {
+ const content = fs.readFileSync(filePath, 'utf-8');
+ files.push({ path: filePath, content });
+ } catch (error) {
+ console.error(`[EmailLabeling] Error reading ${filePath}:`, error);
}
-
- if (files.length === 0) {
- continue;
- }
-
- console.log(`[EmailLabeling] Processing batch ${batchNumber}/${totalBatches} (${files.length} files)`);
- await serviceLogger.log({
- type: 'progress',
- service: run.service,
- runId: run.runId,
- level: 'info',
- message: `Processing batch ${batchNumber}/${totalBatches} (${files.length} files)`,
- step: 'batch',
- current: batchNumber,
- total: totalBatches,
- details: { filesInBatch: files.length },
- });
-
- const result = await labelEmailBatch(files);
- totalEdited += result.filesEdited.size;
-
- // Only mark files that were actually edited by the agent
- for (const file of files) {
- const relativePath = path.relative(WorkDir, file.path);
- if (result.filesEdited.has(relativePath)) {
- markFileAsLabeled(file.path, state);
- }
- }
-
- saveLabelingState(state);
- console.log(`[EmailLabeling] Batch ${batchNumber}/${totalBatches} complete, ${result.filesEdited.size} files edited`);
- } catch (error) {
- hadError = true;
- console.error(`[EmailLabeling] Error processing batch ${batchNumber}:`, error);
- await serviceLogger.log({
- type: 'error',
- service: run.service,
- runId: run.runId,
- level: 'error',
- message: `Error processing batch ${batchNumber}`,
- error: error instanceof Error ? error.message : String(error),
- context: { batchNumber },
- });
}
+ if (files.length > 0) {
+ batches.push({ batchNumber, files });
+ }
+ }
+
+ const totalBatches = batches.length;
+ let totalEdited = 0;
+ let hadError = false;
+
+ // Process batches with concurrency limit
+ for (let i = 0; i < batches.length; i += concurrency) {
+ const chunk = batches.slice(i, i + concurrency);
+
+ const promises = chunk.map(async ({ batchNumber, files }) => {
+ try {
+ console.log(`[EmailLabeling] Processing batch ${batchNumber}/${totalBatches} (${files.length} files)`);
+ await serviceLogger.log({
+ type: 'progress',
+ service: run.service,
+ runId: run.runId,
+ level: 'info',
+ message: `Processing batch ${batchNumber}/${totalBatches} (${files.length} files)`,
+ step: 'batch',
+ current: batchNumber,
+ total: totalBatches,
+ details: { filesInBatch: files.length },
+ });
+
+ const result = await labelEmailBatch(files);
+
+ // Only mark files that were actually edited by the agent
+ for (const file of files) {
+ const relativePath = path.relative(WorkDir, file.path);
+ if (result.filesEdited.has(relativePath)) {
+ markFileAsLabeled(file.path, state);
+ }
+ }
+
+ console.log(`[EmailLabeling] Batch ${batchNumber}/${totalBatches} complete, ${result.filesEdited.size} files edited`);
+ return result.filesEdited.size;
+ } catch (error) {
+ hadError = true;
+ console.error(`[EmailLabeling] Error processing batch ${batchNumber}:`, error);
+ await serviceLogger.log({
+ type: 'error',
+ service: run.service,
+ runId: run.runId,
+ level: 'error',
+ message: `Error processing batch ${batchNumber}`,
+ error: error instanceof Error ? error.message : String(error),
+ context: { batchNumber },
+ });
+ return 0;
+ }
+ });
+
+ const results = await Promise.all(promises);
+ totalEdited += results.reduce((sum, n) => sum + n, 0);
+
+ // Save state after each concurrent chunk completes
+ saveLabelingState(state);
}
state.lastRunTime = new Date().toISOString();
diff --git a/apps/x/packages/core/src/knowledge/labeling_agent.ts b/apps/x/packages/core/src/knowledge/labeling_agent.ts
index f6ff9597..d28649b1 100644
--- a/apps/x/packages/core/src/knowledge/labeling_agent.ts
+++ b/apps/x/packages/core/src/knowledge/labeling_agent.ts
@@ -18,15 +18,147 @@ tools:
You are an email labeling agent. Given a batch of email files, you will classify each email and prepend YAML frontmatter with structured labels.
+# Email File Format
+
+Each email is a markdown file with this structure:
+
+\`\`\`
+# {Subject line}
+
+**Thread ID:** {hex_id}
+**Message Count:** {n}
+
+---
+
+### From: {Display Name} <{email@address}>
+**Date:** {RFC 2822 date}
+
+{Plain-text body of the message}
+
+---
+
+### From: {Another Sender} <{email@address}>
+**Date:** {RFC 2822 date}
+
+{Next message in thread}
+
+---
+\`\`\`
+
+- The \`# Subject\` heading is always the first line.
+- Multi-message threads have multiple \`### From:\` blocks in chronological order, separated by \`---\`.
+- Single-message threads have \`Message Count: 1\` and one \`### From:\` block.
+- The body is plain text extracted from the email (HTML converted to markdown-ish text).
+
+Use the **Subject**, **From** addresses, **Message Count**, and **body content** to classify the email.
+
${renderTagSystemForEmails()}
# Instructions
1. For each email file provided in the message, read its content carefully.
-2. Classify the email using the taxonomy above. Be accurate and conservative — only apply labels that clearly fit.
-3. Use \`workspace-edit\` to prepend YAML frontmatter to the file. The oldString should be the first line of the file (the \`# Subject\` heading), and the newString should be the frontmatter followed by that same first line.
-4. Always include \`processed: true\` and \`labeled_at\` with the current ISO timestamp.
-5. If the email already has frontmatter (starts with \`---\`), skip it.
+2. Classify the email using the taxonomy above. Think like a **YC startup founder** triaging their inbox — your time is your scarcest resource:
+ - **Relationship**: Who is this from? An investor, customer, team member, vendor, candidate, etc.?
+ - **Topic**: What is this about? Legal, finance, hiring, fundraising, security, infrastructure, etc.?
+ - **Email Type**: Is this a warm intro or a followup on an existing conversation?
+ - **Filter (Noise)**: Is this email noise? **Apply ALL applicable filter tags.** If even one noise tag is present the email is skipped — noise overrides everything. Common noise:
+ - Cold outreach / unsolicited service pitches / "YC exclusive" deals / freelancers offering free work
+ - Newsletters, industry reports, webinar invitations, product tips from vendors
+ - Promotions, marketing, event invitations you did not register for, startup program upsells
+ - Automated notifications (email verifications, recording uploads, platform policy changes, expired OTPs)
+ - Transactional confirmations (salary disbursements, tax payments, GST filings, TDS workings, invoice-sharing threads)
+ - Spam and spam moderation digests
+ - **Action**: Does this need a response (\`action-required\`), is it time-sensitive (\`urgent\`), or are you waiting on them (\`waiting\`)? Use \`""\` if none apply. **Do NOT use \`fyi\` as an action value** — it is not a valid action tag.
+3. **Apply noise tags aggressively.** Noise tags can and should coexist with relationship and topic tags. A salary confirmation from your finance team should have BOTH \`relationship: ['team']\` AND \`filter: ['receipt']\`. The noise tag determines whether a note is created — it overrides relationship and topic signals.
+4. Be accurate — only apply labels that clearly fit. But when an email IS noise, always add the noise tag even when other tags are present.
+5. Use \`workspace-edit\` to prepend YAML frontmatter to the file. The oldString should be the first line of the file (the \`# Subject\` heading), and the newString should be the frontmatter followed by that same first line.
+6. Always include \`processed: true\` and \`labeled_at\` with the current ISO timestamp.
+7. If the email already has frontmatter (starts with \`---\`), skip it.
+
+# The Founder Signal Test
+
+Before finalizing labels, ask: **"Would a busy YC founder want a note about this in their knowledge system?"**
+
+**YES — create a note** if the email:
+- Requires a decision or response from the founder
+- Updates an active business relationship (customer deal, investor conversation, partner integration)
+- Contains information that will be referenced later (pricing, terms, deadlines, compliance requirements)
+- Has action items for the team (e.g. standup notes, meeting notes with to-dos)
+- Presents a genuine opportunity worth evaluating (accelerator, partnership, relevant hire)
+- Flags a risk that needs attention (security vulnerability, legal issue, compliance blocker)
+- Is from a vendor you are actively engaged with on an ongoing process (e.g. your compliance assessor following up after a call you participated in)
+
+**NO — skip it** if the email:
+- Confirms a transaction that already happened with no open decision (payment received, tax filed, salary disbursed, invoice shared)
+- Is a system-generated notification with no decision needed (email verification, recording uploaded, policy update, expired OTP)
+- Is unsolicited outreach from someone you have never engaged with — regardless of how personalized it sounds
+- Is a newsletter, industry report, webinar invitation, or product tips email
+- Is marketing or promotional content, including from vendors you use
+- Is a spam digest or Google Groups moderation report
+- Is routine operational correspondence where the transaction is complete and no follow-up remains
+
+# Cold Outreach Detection (Critical for Precision)
+
+Many emails disguise themselves as real relationships. Before assigning \`vendor\`, \`candidate\`, \`partner\`, or \`followup\`, apply these tests:
+
+**It's \`cold-outreach\` (noise), NOT a real relationship, if:**
+- The sender is pitching their own product or service — design agencies, compliance firms, content/copy writers, dev shops, freelancers, trademark services, company closure/winding-down services, hiring platforms, etc. — even if they reference your company by name, your YC batch, or offer something "free" or "exclusive for YC founders."
+- The thread consists entirely of the same sender following up on their own unanswered messages. A real followup requires prior two-way engagement.
+- A student, job-seeker, freelancer, or founder cold-emails asking for your time, feedback, or offering free work/trials. These are NOT \`candidate\` — they are \`cold-outreach\`.
+- Someone invites you to an event you didn't sign up for, especially if the email has marketing formatting (tracking links, unsubscribe footers, HTML banners). This is \`promotion\`, not \`event\`.
+
+**It IS a real relationship (not noise) if:**
+- You (the inbox owner) are a participant in the thread (you sent a reply, or someone on your team did).
+- The sender is from a company you are already paying, or they are providing a service under contract (e.g., your law firm, your accountant, your cloud provider support).
+- The sender was introduced to you by someone you know (warm intro present in the thread).
+- The sender references a specific ongoing engagement with concrete details — e.g., they are your assigned compliance assessor for an audit you initiated, or they are following up after a call you participated in. This is NOT the same as a generic "I noticed your company uses X" pitch.
+
+**Key heuristic:** If every message in the thread is FROM the same external person and the inbox owner never replied, it's almost certainly cold outreach — regardless of how personalized it sounds. Label it \`cold-outreach\`.
+
+# Routine Operations & Finance (Often Missed as Noise)
+
+These emails involve real relationships (team, vendor) and real topics (finance) but are **noise** because the transaction is complete and no decision remains. They MUST get a filter tag even though they also have relationship/topic tags:
+
+- **Salary/payroll confirmations**: "Total salary disbursement is INR X, transfer initiated" → \`filter: ['receipt']\`
+- **Tax payment acknowledgements**: Income tax challan confirmations, TDS workings sent for processing → \`filter: ['receipt']\`
+- **GST/compliance filing confirmations**: GSTR1 ARN generated, GST OTPs (expired or used) → \`filter: ['receipt']\`
+- **Recurring invoice sharing**: Monthly cloud/SaaS invoices shared between team and finance dept → \`filter: ['receipt']\`
+- **Payment transfer confirmations**: "Transfer initiated", "Payment confirmed" → \`filter: ['receipt']\`
+
+# Automated Notifications (Often Missed as Noise)
+
+System-generated messages that require no decision:
+
+- **Email verifications**: "Confirm your email address on Slack" → \`filter: ['notification']\`
+- **Meeting recordings**: "Your meeting recording is ready in Google Drive" → \`filter: ['notification']\`
+- **Platform policy updates**: "Billing permissions are changing starting next month" → \`filter: ['notification']\`
+- **Expired OTPs**: One-time passwords for completed actions → \`filter: ['notification']\`
+
+# Meeting vs Scheduling (Critical Distinction)
+
+- **topic: meeting** (CREATE) — A calendar invite or scheduling email for a real meeting with a **named person** you have a relationship with: an investor, customer, partner, candidate, advisor, team member. Examples: "Invitation: Zoom: Rowboat Labs <> Dalton Caldwell", "YC between Peer Richelsen and Arjun", "Rowboat <> Smash Capital". The key signal is a specific person or company in the subject/body.
+- **filter: scheduling** (SKIP) — Automated reminders and scheduling tool notifications with **no named person or meaningful context**: "Reminder: your meeting is about to start", "Our meeting in an hour", generic ChiliPiper/Calendly confirmations. These are system-generated noise.
+
+**Rule of thumb:** If the email names who you're meeting with, it's \`topic: meeting\`. If it's just a system ping about a time slot, it's \`filter: scheduling\`.
+
+# Newsletter & Promotion Detection (Often Missed as Noise)
+
+These are noise even from a vendor you recognize or a platform you use:
+
+- **Industry reports**: "Report: $1.2T in combined enterprise AI value" → \`filter: ['newsletter']\`
+- **Webinar/workshop invitations**: "Register for our knowledge sessions", "5 Slots Left. Pitch Tomorrow." → \`filter: ['promotion']\`
+- **Product tips and tutorials**: "Discover more with your free account" → \`filter: ['newsletter']\`
+- **Startup program marketing**: "Reminder - Register for AI Architecture sessions" → \`filter: ['promotion']\`
+
+**Exception:** If a tool your team actively uses is expiring and you need to make an upgrade/cancellation decision, that is NOT noise — it requires action.
+
+# Spam Digests Are Always Spam
+
+If the sender is \`noreply-spamdigest\` (Google Groups spam moderation reports), label it \`filter: ['spam']\`. Google already flagged these as spam. Do not evaluate the held messages inside — the digest itself is noise.
+
+# Filter array must only contain tags from the Noise category
+
+Do not put topic or relationship tags into the filter array. If an email is an event promotion, use \`promotion\` in filter — not \`event\`.
# Frontmatter Format
@@ -34,14 +166,14 @@ ${renderTagSystemForEmails()}
---
labels:
relationship:
- - Investor
+ - investor
topics:
- - Fundraising
- - Finance
- type: Intro
+ - fundraising
+ - finance
+ type: intro
filter:
- - Promotion
- action: FYI
+ - []
+ action: action-required
processed: true
labeled_at: "2026-02-28T12:00:00Z"
---
@@ -50,10 +182,14 @@ labeled_at: "2026-02-28T12:00:00Z"
# Rules
- Every label category must be present in the frontmatter, even if empty (use \`[]\` for empty arrays).
-- \`type\` and \`action\` are single values (strings), not arrays.
+- \`type\` and \`action\` are single values (strings), not arrays. Use empty string \`""\` if not applicable.
- \`relationship\`, \`topics\`, and \`filter\` are arrays.
+- The \`action\` field only accepts: \`action-required\`, \`urgent\`, \`waiting\`, or \`""\`. Never use \`fyi\` as an action value.
- Use the exact label values from the taxonomy — do not invent new ones.
- The \`labeled_at\` timestamp should be the current time in ISO 8601 format.
- Process all files in the batch. Do not skip any unless they already have frontmatter.
+- **Noise labels are skip signals.** If an email is clearly a newsletter, cold outreach, promotion, digest, receipt, notification, or other noise — label it in the \`filter\` array. These emails will NOT create notes.
+- **Noise tags coexist with other tags.** An email from your team about salary (\`relationship: ['team']\`, \`topics: ['finance']\`) that is just a payroll confirmation should ALSO have \`filter: ['receipt']\`. The noise tag overrides — it ensures the email is skipped even when relationship/topic tags are present.
+- **When in doubt, ask:** "Does this email change any decision, require any follow-up, or update a relationship I need to track?" If no, it's noise — add the appropriate filter tag.
`;
}
diff --git a/apps/x/packages/core/src/knowledge/note_creation.ts b/apps/x/packages/core/src/knowledge/note_creation.ts
index 478ced81..1d8aa32d 100644
--- a/apps/x/packages/core/src/knowledge/note_creation.ts
+++ b/apps/x/packages/core/src/knowledge/note_creation.ts
@@ -868,6 +868,7 @@ If you discovered new name variants during resolution, add them to Aliases field
- Note state changes with \`[Field → value]\` in activity
- Escape quotes properly in shell commands
- Write only one file per response (no multi-file write batches)
+- **Always set \`Last update\`** in the Info section to the YYYY-MM-DD date of the source email or meeting. When updating an existing note, update this field to the new source event's date.
---
diff --git a/apps/x/packages/core/src/knowledge/note_system.ts b/apps/x/packages/core/src/knowledge/note_system.ts
index 39cf2695..d167e97c 100644
--- a/apps/x/packages/core/src/knowledge/note_system.ts
+++ b/apps/x/packages/core/src/knowledge/note_system.ts
@@ -23,7 +23,7 @@ const DEFAULT_NOTE_TYPE_DEFINITIONS: NoteTypeDefinition[] = [
**Email:** {email or leave blank}
**Aliases:** {comma-separated: first name, nicknames, email}
**First met:** {YYYY-MM-DD}
-**Last seen:** {YYYY-MM-DD}
+**Last update:** {YYYY-MM-DD}
## Summary
{2-3 sentences: Who they are, why you know them, what you're working on together.}
@@ -56,7 +56,7 @@ const DEFAULT_NOTE_TYPE_DEFINITIONS: NoteTypeDefinition[] = [
**Domain:** {primary email domain}
**Aliases:** {comma-separated: short names, abbreviations}
**First met:** {YYYY-MM-DD}
-**Last seen:** {YYYY-MM-DD}
+**Last update:** {YYYY-MM-DD}
## Summary
{2-3 sentences: What this org is, what your relationship is.}
@@ -90,7 +90,7 @@ const DEFAULT_NOTE_TYPE_DEFINITIONS: NoteTypeDefinition[] = [
**Type:** {deal|product|initiative|hiring|other}
**Status:** {active|planning|on hold|completed|cancelled}
**Started:** {YYYY-MM-DD or leave blank}
-**Last activity:** {YYYY-MM-DD}
+**Last update:** {YYYY-MM-DD}
## Summary
{2-3 sentences: What this project is, goal, current state.}
@@ -131,7 +131,7 @@ const DEFAULT_NOTE_TYPE_DEFINITIONS: NoteTypeDefinition[] = [
**Keywords:** {comma-separated}
**Aliases:** {other ways this topic is referenced}
**First mentioned:** {YYYY-MM-DD}
-**Last mentioned:** {YYYY-MM-DD}
+**Last update:** {YYYY-MM-DD}
## Related
- [[People/{Person}]] — {relationship}
diff --git a/apps/x/packages/core/src/knowledge/note_tagging_agent.ts b/apps/x/packages/core/src/knowledge/note_tagging_agent.ts
index 8238e40a..0dc581f1 100644
--- a/apps/x/packages/core/src/knowledge/note_tagging_agent.ts
+++ b/apps/x/packages/core/src/knowledge/note_tagging_agent.ts
@@ -47,7 +47,7 @@ role: VP Engineering
organization: Acme Corp
email: sarah@acme.com
first_met: "2024-06-15"
-last_seen: "2025-01-20"
+last_update: "2025-01-20"
---
\`\`\`
@@ -80,7 +80,7 @@ Use these exact keys for each tag category:
Extract all \`**Key:** value\` fields from the \`## Info\` (or \`## About\`) section into YAML frontmatter keys:
-1. **Convert keys to snake_case**: e.g. \`**First met:**\` → \`first_met\`, \`**Last activity:**\` → \`last_activity\`, \`**Last seen:**\` → \`last_seen\`.
+1. **Convert keys to snake_case**: e.g. \`**First met:**\` → \`first_met\`, \`**Last update:**\` → \`last_update\`.
2. **Strip wiki-link syntax**: \`[[Organizations/Acme Corp]]\` → \`Acme Corp\`. Extract just the display name (last path segment).
3. **Skip blank/placeholder values**: If a field says "leave blank", is empty, or contains only template placeholders like \`{role}\`, omit it from the frontmatter.
4. **Quote dates**: Wrap date values in quotes, e.g. \`first_met: "2024-06-15"\`.
@@ -93,10 +93,10 @@ Extract all \`**Key:** value\` fields from the \`## Info\` (or \`## About\`) sec
**Per note type, extract these fields:**
-- **People**: role, organization, email, aliases, first_met, last_seen
-- **Organizations**: type, industry, relationship, domain, aliases, first_met, last_seen
-- **Projects**: type, status, started, last_activity
-- **Topics** (from \`## About\`): keywords, aliases, first_mentioned, last_mentioned
+- **People**: role, organization, email, aliases, first_met, last_update
+- **Organizations**: type, industry, relationship, domain, aliases, first_met, last_update
+- **Projects**: type, status, started, last_update
+- **Topics** (from \`## About\`): keywords, aliases, first_mentioned, last_update
- **Meetings**: Extract from the note content and file path:
- \`date\`: meeting date (from the file path \`Meetings/{source}/YYYY/MM/DD/\` or from \`created_at\`/\`Date:\` in content)
- \`source\`: \`granola\` or \`fireflies\` (from the file path)
diff --git a/apps/x/packages/core/src/knowledge/run_pipeline.ts b/apps/x/packages/core/src/knowledge/run_pipeline.ts
new file mode 100644
index 00000000..3bde77c2
--- /dev/null
+++ b/apps/x/packages/core/src/knowledge/run_pipeline.ts
@@ -0,0 +1,164 @@
+#!/usr/bin/env node
+/**
+ * Standalone pipeline runner for email labeling, graph building, and note tagging.
+ *
+ * Usage:
+ * npx tsx packages/core/src/knowledge/run_pipeline.ts --workdir /path/to/workdir
+ * npx tsx packages/core/src/knowledge/run_pipeline.ts --workdir /path/to/workdir --steps label,graph,tag
+ * npx tsx packages/core/src/knowledge/run_pipeline.ts --workdir /path/to/workdir --steps label
+ * npx tsx packages/core/src/knowledge/run_pipeline.ts --workdir /path/to/workdir --steps graph,tag
+ *
+ * The workdir should contain a gmail_sync/ folder with email markdown files.
+ * Output notes are written to workdir/knowledge/.
+ *
+ * Steps:
+ * label - Classify emails with YAML frontmatter labels
+ * graph - Extract entities and create/update knowledge notes
+ * tag - Add YAML frontmatter tags to knowledge notes
+ *
+ * If --steps is omitted, all three steps run in order: label → graph → tag
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+// --- Parse CLI args before any core imports (WorkDir reads env at import time) ---
+
+const VALID_STEPS = ['label', 'graph', 'tag'] as const;
+type Step = typeof VALID_STEPS[number];
+
+function parseArgs(): { workdir: string; steps: Step[]; concurrency: number } {
+ const args = process.argv.slice(2);
+ let workdir: string | undefined;
+ let stepsRaw: string | undefined;
+ let concurrency = 3;
+
+ for (let i = 0; i < args.length; i++) {
+ if (args[i] === '--workdir' && args[i + 1]) {
+ workdir = args[++i];
+ } else if (args[i] === '--steps' && args[i + 1]) {
+ stepsRaw = args[++i];
+ } else if (args[i] === '--concurrency' && args[i + 1]) {
+ concurrency = parseInt(args[++i], 10);
+ if (isNaN(concurrency) || concurrency < 1) {
+ console.error('Error: --concurrency must be a positive integer');
+ process.exit(1);
+ }
+ } else if (args[i] === '--help' || args[i] === '-h') {
+ console.log(`
+Usage: run_pipeline --workdir [--steps label,graph,tag] [--concurrency N]
+
+Options:
+ --workdir Working directory containing gmail_sync/ folder (required)
+ --steps Comma-separated steps to run: label, graph, tag (default: all)
+ --concurrency Number of parallel batches for labeling (default: 3)
+ --help, -h Show this help message
+
+Examples:
+ run_pipeline --workdir ./my-emails
+ run_pipeline --workdir ./my-emails --steps label --concurrency 5
+ run_pipeline --workdir ./my-emails --steps label,graph
+ run_pipeline --workdir ./my-emails --steps graph,tag
+`);
+ process.exit(0);
+ }
+ }
+
+ if (!workdir) {
+ console.error('Error: --workdir is required');
+ process.exit(1);
+ }
+
+ // Resolve to absolute path
+ workdir = path.resolve(workdir);
+
+ if (!fs.existsSync(workdir)) {
+ console.error(`Error: workdir does not exist: ${workdir}`);
+ process.exit(1);
+ }
+
+ // Parse steps
+ let steps: Step[];
+ if (stepsRaw) {
+ const requested = stepsRaw.split(',').map(s => s.trim().toLowerCase());
+ const invalid = requested.filter(s => !VALID_STEPS.includes(s as Step));
+ if (invalid.length > 0) {
+ console.error(`Error: invalid steps: ${invalid.join(', ')}. Valid steps: ${VALID_STEPS.join(', ')}`);
+ process.exit(1);
+ }
+ steps = requested as Step[];
+ } else {
+ steps = [...VALID_STEPS];
+ }
+
+ return { workdir, steps, concurrency };
+}
+
+const { workdir, steps, concurrency } = parseArgs();
+
+// Set env BEFORE importing core modules (WorkDir is read at module load time)
+process.env.ROWBOAT_WORKDIR = workdir;
+
+// --- Now import core modules ---
+
+async function main() {
+ console.log(`[Pipeline] Working directory: ${workdir}`);
+ console.log(`[Pipeline] Steps to run: ${steps.join(', ')}`);
+ console.log(`[Pipeline] Concurrency: ${concurrency}`);
+ console.log();
+
+ // Verify gmail_sync exists if label or graph step is requested
+ const gmailSyncDir = path.join(workdir, 'gmail_sync');
+ if ((steps.includes('label') || steps.includes('graph')) && !fs.existsSync(gmailSyncDir)) {
+ console.warn(`[Pipeline] Warning: gmail_sync/ folder not found in ${workdir}`);
+ }
+
+ const startTime = Date.now();
+
+ if (steps.includes('label')) {
+ console.log('[Pipeline] === Step 1: Email Labeling ===');
+ const { processUnlabeledEmails } = await import('./label_emails.js');
+ await processUnlabeledEmails(concurrency);
+ console.log();
+ }
+
+ if (steps.includes('graph')) {
+ console.log('[Pipeline] === Step 2: Graph Building ===');
+ const { processAllSources } = await import('./build_graph.js');
+ await processAllSources();
+ console.log();
+ }
+
+ if (steps.includes('tag')) {
+ console.log('[Pipeline] === Step 3: Note Tagging ===');
+ const { processUntaggedNotes } = await import('./tag_notes.js');
+ await processUntaggedNotes();
+ console.log();
+ }
+
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+ console.log(`[Pipeline] Done in ${elapsed}s`);
+
+ // Output summary
+ const knowledgeDir = path.join(workdir, 'knowledge');
+ if (fs.existsSync(knowledgeDir)) {
+ const countFiles = (dir: string): number => {
+ let count = 0;
+ for (const entry of fs.readdirSync(dir)) {
+ const full = path.join(dir, entry);
+ const stat = fs.statSync(full);
+ if (stat.isDirectory()) count += countFiles(full);
+ else if (entry.endsWith('.md')) count++;
+ }
+ return count;
+ };
+ console.log(`[Pipeline] Output: ${countFiles(knowledgeDir)} notes in ${knowledgeDir}`);
+ }
+}
+
+main().then(() => {
+ process.exit(0);
+}).catch((err) => {
+ console.error('[Pipeline] Fatal error:', err);
+ process.exit(1);
+});
diff --git a/apps/x/packages/core/src/knowledge/tag_notes.ts b/apps/x/packages/core/src/knowledge/tag_notes.ts
index 89c616e6..39d46a3b 100644
--- a/apps/x/packages/core/src/knowledge/tag_notes.ts
+++ b/apps/x/packages/core/src/knowledge/tag_notes.ts
@@ -143,7 +143,7 @@ async function tagNoteBatch(
/**
* Process all untagged notes in batches
*/
-async function processUntaggedNotes(): Promise {
+export async function processUntaggedNotes(): Promise {
console.log('[NoteTagging] Checking for untagged notes...');
const state = loadNoteTaggingState();
diff --git a/apps/x/packages/core/src/knowledge/tag_system.ts b/apps/x/packages/core/src/knowledge/tag_system.ts
index b8642338..7b46ef4d 100644
--- a/apps/x/packages/core/src/knowledge/tag_system.ts
+++ b/apps/x/packages/core/src/knowledge/tag_system.ts
@@ -9,7 +9,7 @@ export type TagType =
| 'relationship-sub'
| 'topic'
| 'email-type'
- | 'filter'
+ | 'noise'
| 'action'
| 'status'
| 'source';
@@ -29,22 +29,21 @@ export interface TagDefinition {
// ── Default definitions (used to seed ~/.rowboat/config/tags.json) ──────────
const DEFAULT_TAG_DEFINITIONS: TagDefinition[] = [
- // ── Relationship (both) ──────────────────────────────────────────────
+ // ── Relationship — who is this from/about (all create) ────────────────
{ tag: 'investor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Investors, VCs, or angels', example: 'Following up on our meeting — we\'d like to move forward with the Series A term sheet.' },
{ tag: 'customer', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Paying customers', example: 'We\'re seeing great results with Rowboat. Can we discuss expanding to more teams?' },
{ tag: 'prospect', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Potential customers', example: 'Thanks for the demo yesterday. We\'re interested in starting a pilot.' },
- { tag: 'partner', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Business partners', example: 'Let\'s discuss how we can promote the integration to both our user bases.' },
- { tag: 'vendor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Service providers you work with', example: 'Here are the updated employment agreements you requested.' },
- { tag: 'product', type: 'relationship', applicability: 'both', noteEffect: 'skip', description: 'Products or services you use (automated)', example: 'Your AWS bill for January 2025 is now available.' },
- { tag: 'candidate', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Job applicants', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' },
- { tag: 'team', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Internal team members', example: 'Here\'s the updated roadmap for Q2. Let\'s discuss in our sync.' },
+ { tag: 'partner', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Business partners, corp dev, or strategic contacts', example: 'Let\'s discuss how we can promote the integration to both our user bases.' },
+ { tag: 'vendor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Service providers you already pay or have a contract with (legal, accounting, infra). NOT someone pitching their services to you — that is cold-outreach.', example: 'Here are the updated employment agreements you requested.' },
+ { tag: 'candidate', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Job applicants, recruiters, and anyone reaching out about roles — both solicited and unsolicited', example: 'Thanks for reaching out. I\'d love to learn more about the engineering role.' },
+ { tag: 'team', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Internal team members and co-founders', example: 'Here\'s the updated roadmap for Q2. Let\'s discuss in our sync.' },
{ tag: 'advisor', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Advisors, mentors, or board members', example: 'I\'ve reviewed the deck. Here are my thoughts on the GTM strategy.' },
{ tag: 'personal', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Family or friends', example: 'Are you coming to Thanksgiving this year? Let me know your travel dates.' },
{ tag: 'press', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Journalists or media', example: 'I\'m writing a piece on AI agents. Would you be available for an interview?' },
- { tag: 'community', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Users, peers, or open source contributors', example: 'Love what you\'re building with Rowboat. Here\'s a bug I found...' },
+ { tag: 'community', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Peers, YC batchmates, or open source contributors with direct interaction', example: 'Love what you\'re building with Rowboat. Here\'s a bug I found...' },
{ tag: 'government', type: 'relationship', applicability: 'both', noteEffect: 'create', description: 'Government agencies', example: 'Your Delaware franchise tax is due by March 1, 2025.' },
- // ── Relationship Sub-Tags (notes only) ───────────────────────────────
+ // ── Relationship Sub-Tags — role metadata (notes only, all none) ──────
{ tag: 'primary', type: 'relationship-sub', applicability: 'notes', noteEffect: 'none', description: 'Main contact or decision maker', example: 'Sarah Chen — VP Engineering, your main point of contact at Acme.' },
{ tag: 'secondary', type: 'relationship-sub', applicability: 'notes', noteEffect: 'none', description: 'Supporting contact, involved but not the lead', example: 'David Kim — Engineer CC\'d on customer emails.' },
{ tag: 'executive-assistant', type: 'relationship-sub', applicability: 'notes', noteEffect: 'none', description: 'EA or admin handling scheduling and logistics', example: 'Lisa — Sarah\'s EA who schedules all her meetings.' },
@@ -54,57 +53,62 @@ const DEFAULT_TAG_DEFINITIONS: TagDefinition[] = [
{ tag: 'champion', type: 'relationship-sub', applicability: 'notes', noteEffect: 'none', description: 'Internal advocate pushing for you', example: 'Engineer who loves your product and is selling internally.' },
{ tag: 'blocker', type: 'relationship-sub', applicability: 'notes', noteEffect: 'none', description: 'Person opposing or blocking progress', example: 'CFO resistant to spending on new tools.' },
- // ── Topic (both) ─────────────────────────────────────────────────────
+ // ── Topic — what the email is about (all create) ──────────────────────
{ tag: 'sales', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Sales conversations, deals, and revenue', example: 'Here\'s the pricing proposal we discussed. Let me know if you have questions.' },
{ tag: 'support', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Help requests, issues, and customer support', example: 'We\'re seeing an error when trying to export. Can you help?' },
{ tag: 'legal', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Contracts, terms, compliance, and legal matters', example: 'Legal has reviewed the MSA. Attached are our requested changes.' },
- { tag: 'finance', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Money, invoices, payments, banking, and taxes', example: 'Your invoice #1234 for $5,000 is attached. Payment due in 30 days.' },
+ { tag: 'finance', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Actionable money matters: invoices, payments, banking, and taxes', example: 'Your invoice #1234 for $5,000 is attached. Payment due in 30 days.' },
{ tag: 'hiring', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Recruiting, interviews, and employment', example: 'We\'d like to move forward with a final round interview. Are you available Thursday?' },
- { tag: 'fundraising', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Raising money and investor relations', example: 'Thanks for sending the deck. We\'d like to schedule a partner meeting.' },
- { tag: 'travel', type: 'topic', applicability: 'both', noteEffect: 'skip', description: 'Flights, hotels, trips, and travel logistics', example: 'Your flight to Tokyo on March 15 is confirmed. Confirmation #ABC123.' },
- { tag: 'event', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Conferences, meetups, and gatherings', example: 'You\'re invited to speak at TechCrunch Disrupt. Can you confirm your availability?' },
- { tag: 'shopping', type: 'topic', applicability: 'both', noteEffect: 'skip', description: 'Purchases, orders, and returns', example: 'Your order #12345 has shipped. Track it here.' },
- { tag: 'health', type: 'topic', applicability: 'both', noteEffect: 'skip', description: 'Medical, wellness, and health-related matters', example: 'Your appointment with Dr. Smith is confirmed for Monday at 2pm.' },
- { tag: 'learning', type: 'topic', applicability: 'both', noteEffect: 'skip', description: 'Courses, education, and skill-building', example: 'Welcome to the Advanced Python course. Here\'s your access link.' },
+ { tag: 'fundraising', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Raising money, SAFEs, term sheets, and investor relations', example: 'Thanks for sending the deck. We\'d like to schedule a partner meeting.' },
+ { tag: 'security', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Vulnerability disclosures, login alerts, brand impersonation, or compliance requests', example: 'We found a JWT bypass in your auth endpoint. Details attached.' },
+ { tag: 'infrastructure', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Deploy failures, build errors, webhook issues, API migrations, and production alerts', example: 'Vercel deploy failed for rowboat-app. Build log attached.' },
+ { tag: 'meeting', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Calendar invites and scheduling for real meetings with named people — investors, customers, partners, candidates, team members. The key signal is a specific person you have a relationship with.', example: 'Invitation: Zoom: Rowboat Labs <> Dalton Caldwell @ Sat 7 Mar 2026' },
+ { tag: 'event', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Conferences, meetups, and gatherings you are attending or invited to', example: 'You\'re invited to speak at TechCrunch Disrupt. Can you confirm your availability?' },
{ tag: 'research', type: 'topic', applicability: 'both', noteEffect: 'create', description: 'Research requests and information gathering', example: 'Here\'s the market analysis you requested on the AI agent space.' },
- // ── Email Type ───────────────────────────────────────────────────────
+ // ── Email Type — high-signal email formats (all create) ───────────────
{ tag: 'intro', type: 'email-type', applicability: 'both', noteEffect: 'create', description: 'Warm introduction from someone you know', example: 'I\'d like to introduce you to Sarah Chen, VP Engineering at Acme.' },
- { tag: 'followup', type: 'email-type', applicability: 'both', noteEffect: 'create', description: 'Following up on a previous conversation', example: 'Following up on our call last week. Have you had a chance to review the proposal?' },
- { tag: 'scheduling', type: 'email-type', applicability: 'email', noteEffect: 'skip', description: 'Meeting and calendar scheduling', example: 'Are you available for a call next Tuesday at 2pm?' },
- { tag: 'cold-outreach', type: 'email-type', applicability: 'email', noteEffect: 'skip', description: 'Unsolicited contact from someone you don\'t know', example: 'Hi, I noticed your company is growing fast. I\'d love to show you how we can help with...' },
- { tag: 'newsletter', type: 'email-type', applicability: 'email', noteEffect: 'skip', description: 'Newsletters, marketing emails, and subscriptions', example: 'This week in AI: The latest developments in agent frameworks...' },
- { tag: 'notification', type: 'email-type', applicability: 'email', noteEffect: 'skip', description: 'Automated alerts, receipts, and system notifications', example: 'Your password was changed successfully. If this wasn\'t you, contact support.' },
+ { tag: 'followup', type: 'email-type', applicability: 'both', noteEffect: 'create', description: 'Following up on a previous two-way conversation (both parties have engaged). A cold sender bumping their own unanswered email is NOT a followup — it is cold-outreach.', example: 'Following up on our call last week. Have you had a chance to review the proposal?' },
- // ── Filter (email only) ──────────────────────────────────────────────
- { tag: 'spam', type: 'filter', applicability: 'email', noteEffect: 'skip', description: 'Junk and unwanted email', example: 'Congratulations! You\'ve won $1,000,000...' },
- { tag: 'promotion', type: 'filter', applicability: 'email', noteEffect: 'skip', description: 'Marketing offers and sales pitches', example: '50% off all items this weekend only!' },
- { tag: 'social', type: 'filter', applicability: 'email', noteEffect: 'skip', description: 'Social media notifications', example: 'John Smith commented on your post.' },
- { tag: 'forums', type: 'filter', applicability: 'email', noteEffect: 'skip', description: 'Mailing lists and group discussions', example: 'Re: [dev-list] Question about API design' },
+ // ── Noise — all skip signals in one place ─────────────────────────────
+ // NOTE: Noise tags override relationship/topic tags. An email can have
+ // relationship: team AND filter: receipt — the noise tag wins and skips note creation.
+ { tag: 'spam', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Junk and unwanted email, including Google Groups spam moderation digests (from noreply-spamdigest)', example: 'Congratulations! You\'ve won $1,000,000...' },
+ { tag: 'promotion', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Marketing offers, sales pitches, product launches, event invitations you did not register for, startup program upsells, vendor upgrade campaigns, and webinar/workshop invitations from companies', example: 'Register Now! Experts talk live: AI, Marketplace, Architecture & GTM Sessions Coming Up' },
+ { tag: 'cold-outreach', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Unsolicited contact from someone you have no prior engagement with — includes design agencies, compliance firms, content/copy writers, dev shops, freelancers offering free work, trademark services, company closure services, hiring platforms, and anyone pitching a service with "exclusive YC deal" or referencing your YC batch. Even if they mention your company by name or offer something free.', example: 'Ramnique, $2000 worth YC Design deal every month — we work with 230+ YC founders' },
+ { tag: 'newsletter', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Newsletters, industry reports, subscription emails, product tips/tutorials from vendors, and research digests — even from platforms you actively use', example: 'Report: $1.2T in combined enterprise AI value — but what\'s actually built to last?' },
+ { tag: 'notification', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Automated system messages requiring no decision: email verifications, meeting recording uploads, platform policy/permission changes, billing console updates, password resets, and expired OTPs', example: 'Meeting records: your recording has been uploaded to Google Drive.' },
+ { tag: 'digest', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Community digests, forum roundups, and aggregated updates', example: 'YC Bookface Weekly: 12 new posts this week...' },
+ { tag: 'product-update', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Product changelogs, feature announcements, and vendor marketing disguised as tips', example: 'Discover more with your Upstash free account — popular use cases inside' },
+ { tag: 'receipt', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Completed transaction confirmations with no decision remaining: payment receipts, salary/payroll disbursements, tax payment acknowledgements (challans), GST/VAT filing confirmations (GSTR1 ARNs), TDS workings, recurring invoice-sharing threads, and transfer-initiated confirmations', example: 'Challan payment under section 200 for TAN BLXXXXXX4B has been successfully paid.' },
+ { tag: 'social', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Social media notifications', example: 'John Smith commented on your post.' },
+ { tag: 'forums', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Mailing lists, group discussions, and Google Groups moderation digests that are not spam digests', example: 'Re: [dev-list] Question about API design' },
+ { tag: 'scheduling', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Automated meeting reminders, scheduling tool confirmations, and calendar system notifications with no named person or context. NOT real meeting invites with specific people — those are topic: meeting.', example: 'Reminder: your meeting is about to start. Join with Google Meet.' },
+ { tag: 'travel', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Flights, hotels, trips, and travel logistics', example: 'Your flight to Tokyo on March 15 is confirmed. Confirmation #ABC123.' },
+ { tag: 'shopping', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Purchases, orders, and returns', example: 'Your order #12345 has shipped. Track it here.' },
+ { tag: 'health', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Medical, wellness, and health-related matters', example: 'Your appointment with Dr. Smith is confirmed for Monday at 2pm.' },
+ { tag: 'learning', type: 'noise', applicability: 'email', noteEffect: 'skip', description: 'Courses, webinars, workshops, knowledge sessions, and education marketing — even from platforms you are enrolled in', example: 'Welcome to the Advanced Python course. Here\'s your access link.' },
- // ── Action ───────────────────────────────────────────────────────────
+ // ── Action — urgency signals (all create) ─────────────────────────────
{ tag: 'action-required', type: 'action', applicability: 'both', noteEffect: 'create', description: 'Needs a response or action from you', example: 'Can you send me the pricing by Friday?' },
- { tag: 'fyi', type: 'action', applicability: 'email', noteEffect: 'skip', description: 'Informational only, no action needed', example: 'Just wanted to let you know the deal closed. Thanks for your help!' },
{ tag: 'urgent', type: 'action', applicability: 'both', noteEffect: 'create', description: 'Time-sensitive, needs immediate attention', example: 'We need your signature on the contract by EOD today or we lose the deal.' },
{ tag: 'waiting', type: 'action', applicability: 'both', noteEffect: 'create', description: 'Waiting on a response from them' },
- // ── Status (email) ───────────────────────────────────────────────────
+ // ── Status — workflow state (all none) ────────────────────────────────
{ tag: 'unread', type: 'status', applicability: 'email', noteEffect: 'none', description: 'Not yet processed' },
{ tag: 'to-reply', type: 'status', applicability: 'email', noteEffect: 'none', description: 'Need to respond' },
{ tag: 'done', type: 'status', applicability: 'email', noteEffect: 'none', description: 'Handled, can be archived' },
+ { tag: 'active', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'Currently relevant, recent activity' },
+ { tag: 'archived', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'No longer active, kept for reference' },
+ { tag: 'stale', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'No activity in 60+ days, needs attention or archive' },
- // ── Source (notes only) ──────────────────────────────────────────────
+ // ── Source — origin metadata (notes only, all none) ───────────────────
{ tag: 'email', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Created or updated from email' },
{ tag: 'meeting', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Created or updated from meeting transcript' },
{ tag: 'browser', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Content captured from web browsing' },
{ tag: 'web-search', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Information from web search' },
{ tag: 'manual', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Manually entered by user' },
{ tag: 'import', type: 'source', applicability: 'notes', noteEffect: 'none', description: 'Imported from another system' },
-
- // ── Status (notes) ──────────────────────────────────────────────────
- { tag: 'active', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'Currently relevant, recent activity' },
- { tag: 'archived', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'No longer active, kept for reference' },
- { tag: 'stale', type: 'status', applicability: 'notes', noteEffect: 'none', description: 'No activity in 60+ days, needs attention or archive' },
];
// ── Disk-backed config with mtime caching ──────────────────────────────────
@@ -146,7 +150,7 @@ export function getTagDefinitions(): TagDefinition[] {
const TYPE_ORDER: TagType[] = [
'relationship', 'relationship-sub', 'topic', 'email-type',
- 'filter', 'action', 'status', 'source',
+ 'noise', 'action', 'status', 'source',
];
const TYPE_LABELS: Record = {
@@ -154,7 +158,7 @@ const TYPE_LABELS: Record = {
'relationship-sub': 'Relationship Sub-Tags',
'topic': 'Topic',
'email-type': 'Email Type',
- 'filter': 'Filter',
+ 'noise': 'Noise',
'action': 'Action',
'status': 'Status',
'source': 'Source',