diff --git a/apps/x/packages/core/src/knowledge/build_graph.ts b/apps/x/packages/core/src/knowledge/build_graph.ts index 5ca57e3b..104dc750 100644 --- a/apps/x/packages/core/src/knowledge/build_graph.ts +++ b/apps/x/packages/core/src/knowledge/build_graph.ts @@ -12,6 +12,7 @@ import { resetState, type GraphState, } from './graph_state.js'; +import { buildKnowledgeIndex, formatIndexForPrompt } from './knowledge_index.js'; /** * Build obsidian-style knowledge graph by running topic extraction @@ -29,7 +30,6 @@ const SOURCE_FOLDERS = [ 'granola_notes' // Corrected from 'granola_meetings' ]; const MAX_CONCURRENT_BATCHES = 1; // Process only 1 batch at a time to avoid overwhelming the agent -const BATCH_DELAY_MS = 5000; // 5 second delay between batches to avoid overwhelming the system /** * Read content for specific files @@ -66,7 +66,7 @@ async function waitForRunCompletion(runId: string): Promise { /** * Run note creation agent on a batch of files to extract entities and create/update notes */ -async function createNotesFromBatch(files: { path: string; content: string }[], batchNumber: number): Promise { +async function createNotesFromBatch(files: { path: string; content: string }[], batchNumber: number, knowledgeIndex: string): Promise { // Ensure notes output directory exists if (!fs.existsSync(NOTES_OUTPUT_DIR)) { fs.mkdirSync(NOTES_OUTPUT_DIR, { recursive: true }); @@ -77,17 +77,23 @@ async function createNotesFromBatch(files: { path: string; content: string }[], agentId: NOTE_CREATION_AGENT, }); - // Build message with all files in the batch + // Build message with index and all files in the batch let message = `Process the following ${files.length} source files and create/update obsidian notes.\n\n`; message += `**Instructions:**\n`; + message += `- Use the KNOWLEDGE BASE INDEX below to resolve entities - DO NOT grep/search for existing notes\n`; message += `- Extract entities (people, organizations, projects, topics) from ALL files below\n`; message += `- Create or update notes in "knowledge" directory (workspace-relative paths like "knowledge/People/Name.md")\n`; message += `- If the same entity appears in multiple files, merge the information into a single note\n`; - message += `- Use workspace tools to read existing notes and write updates\n`; + message += `- Use workspace tools to read existing notes (when you need full content) and write updates\n`; message += `- Follow the note templates and guidelines in your instructions\n\n`; + + // Add the knowledge base index message += `---\n\n`; + message += knowledgeIndex; + message += `\n---\n\n`; // Add each file's content + message += `# Source Files to Process\n\n`; files.forEach((file, idx) => { message += `## Source File ${idx + 1}: ${path.basename(file.path)}\n\n`; message += file.content; @@ -144,15 +150,15 @@ export async function buildGraph(sourceDir: string): Promise { const batchNumber = Math.floor(i / BATCH_SIZE) + 1; try { - console.log(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)...`); - await createNotesFromBatch(batch, batchNumber); - console.log(`Batch ${batchNumber}/${totalBatches} complete`); + // Build fresh index before each batch to include notes from previous batches + console.log(`Building knowledge index for batch ${batchNumber}...`); + const index = buildKnowledgeIndex(); + const indexForPrompt = formatIndexForPrompt(index); + console.log(`Index built: ${index.people.length} people, ${index.organizations.length} orgs, ${index.projects.length} projects, ${index.topics.length} topics, ${index.other.length} other`); - // Add delay between batches to avoid overwhelming the system - if (i + BATCH_SIZE < contentFiles.length) { - console.log(`Waiting ${BATCH_DELAY_MS/1000} seconds before next batch...`); - await new Promise(resolve => setTimeout(resolve, BATCH_DELAY_MS)); - } + console.log(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)...`); + await createNotesFromBatch(batch, batchNumber, indexForPrompt); + console.log(`Batch ${batchNumber}/${totalBatches} complete`); // Mark files in this batch as processed for (const file of batch) { diff --git a/apps/x/packages/core/src/knowledge/knowledge_index.ts b/apps/x/packages/core/src/knowledge/knowledge_index.ts new file mode 100644 index 00000000..2df46ca3 --- /dev/null +++ b/apps/x/packages/core/src/knowledge/knowledge_index.ts @@ -0,0 +1,355 @@ +import fs from 'fs'; +import path from 'path'; +import { WorkDir } from '../config/config.js'; + +const KNOWLEDGE_DIR = path.join(WorkDir, 'knowledge'); + +/** + * Index entry for a person note + */ +interface PersonEntry { + file: string; + name: string; + email?: string; + aliases: string[]; + organization?: string; + role?: string; +} + +/** + * Index entry for an organization note + */ +interface OrganizationEntry { + file: string; + name: string; + domain?: string; + aliases: string[]; +} + +/** + * Index entry for a project note + */ +interface ProjectEntry { + file: string; + name: string; + status?: string; + aliases: string[]; +} + +/** + * Index entry for a topic note + */ +interface TopicEntry { + file: string; + name: string; + keywords: string[]; + aliases: string[]; +} + +/** + * Index entry for notes in non-standard folders (generic) + */ +interface OtherEntry { + file: string; + name: string; + folder: string; + aliases: string[]; +} + +/** + * The complete knowledge index + */ +export interface KnowledgeIndex { + people: PersonEntry[]; + organizations: OrganizationEntry[]; + projects: ProjectEntry[]; + topics: TopicEntry[]; + other: OtherEntry[]; + buildTime: string; +} + +/** + * Extract a field value from markdown content + * Looks for patterns like **Field:** value or **Field:** [[Link]] + */ +function extractField(content: string, fieldName: string): string | undefined { + // Match **Field:** value (handles [[links]] and plain text) + const pattern = new RegExp(`\\*\\*${fieldName}:\\*\\*\\s*(.+?)(?:\\n|$)`, 'i'); + const match = content.match(pattern); + if (match) { + let value = match[1].trim(); + // Extract text from [[link]] if present + const linkMatch = value.match(/\[\[(?:[^\]|]+\|)?([^\]]+)\]\]/); + if (linkMatch) { + value = linkMatch[1]; + } + return value || undefined; + } + return undefined; +} + +/** + * Extract comma-separated values from a field + */ +function extractList(content: string, fieldName: string): string[] { + const value = extractField(content, fieldName); + if (!value) return []; + return value.split(',').map(s => s.trim()).filter(s => s.length > 0); +} + +/** + * Extract the title (first H1) from markdown content + */ +function extractTitle(content: string): string { + const match = content.match(/^#\s+(.+?)$/m); + return match ? match[1].trim() : ''; +} + +/** + * Parse a person note and extract index data + */ +function parsePersonNote(filePath: string, content: string): PersonEntry { + const name = extractTitle(content); + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + + return { + file: relativePath, + name, + email: extractField(content, 'Email'), + aliases: extractList(content, 'Aliases'), + organization: extractField(content, 'Organization'), + role: extractField(content, 'Role'), + }; +} + +/** + * Parse an organization note and extract index data + */ +function parseOrganizationNote(filePath: string, content: string): OrganizationEntry { + const name = extractTitle(content); + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + + return { + file: relativePath, + name, + domain: extractField(content, 'Domain'), + aliases: extractList(content, 'Aliases'), + }; +} + +/** + * Parse a project note and extract index data + */ +function parseProjectNote(filePath: string, content: string): ProjectEntry { + const name = extractTitle(content); + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + + return { + file: relativePath, + name, + status: extractField(content, 'Status'), + aliases: extractList(content, 'Aliases'), + }; +} + +/** + * Parse a topic note and extract index data + */ +function parseTopicNote(filePath: string, content: string): TopicEntry { + const name = extractTitle(content); + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + + return { + file: relativePath, + name, + keywords: extractList(content, 'Keywords'), + aliases: extractList(content, 'Aliases'), + }; +} + +/** + * Parse a generic note (for non-standard folders) + */ +function parseOtherNote(filePath: string, content: string): OtherEntry { + const name = extractTitle(content); + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + // Get the folder name (first part of relative path) + const folder = relativePath.split(path.sep)[0] || 'root'; + + return { + file: relativePath, + name, + folder, + aliases: extractList(content, 'Aliases'), + }; +} + +/** + * Recursively scan a directory for markdown files + */ +function scanDirectoryRecursive(dir: string): string[] { + if (!fs.existsSync(dir)) { + return []; + } + + const files: string[] = []; + const entries = fs.readdirSync(dir); + + for (const entry of entries) { + const fullPath = path.join(dir, entry); + const stat = fs.statSync(fullPath); + + if (stat.isDirectory()) { + // Recursively scan subdirectories + files.push(...scanDirectoryRecursive(fullPath)); + } else if (stat.isFile() && entry.endsWith('.md')) { + files.push(fullPath); + } + } + + return files; +} + +/** + * Determine which folder a file belongs to based on its path + */ +function getFolderType(filePath: string): string { + const relativePath = path.relative(KNOWLEDGE_DIR, filePath); + const parts = relativePath.split(path.sep); + + // If file is directly in knowledge folder (no subfolder) + if (parts.length === 1) { + return 'root'; + } + + // Return the first folder name + return parts[0]; +} + +/** + * Build a complete index of the knowledge base + * Scans all notes recursively and extracts searchable fields using folder-based parsing + */ +export function buildKnowledgeIndex(): KnowledgeIndex { + const index: KnowledgeIndex = { + people: [], + organizations: [], + projects: [], + topics: [], + other: [], + buildTime: new Date().toISOString(), + }; + + // Scan entire knowledge directory recursively + const allFiles = scanDirectoryRecursive(KNOWLEDGE_DIR); + + for (const filePath of allFiles) { + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const folderType = getFolderType(filePath); + + // Use folder-based parsing + switch (folderType) { + case 'People': + index.people.push(parsePersonNote(filePath, content)); + break; + case 'Organizations': + index.organizations.push(parseOrganizationNote(filePath, content)); + break; + case 'Projects': + index.projects.push(parseProjectNote(filePath, content)); + break; + case 'Topics': + index.topics.push(parseTopicNote(filePath, content)); + break; + default: + // Generic parsing for non-standard folders + index.other.push(parseOtherNote(filePath, content)); + break; + } + } catch (error) { + console.error(`Error parsing note ${filePath}:`, error); + } + } + + return index; +} + +/** + * Format the index as a string for inclusion in agent prompts + */ +export function formatIndexForPrompt(index: KnowledgeIndex): string { + let output = '# Existing Knowledge Base Index\n\n'; + output += `Built at: ${index.buildTime}\n\n`; + + // People + output += '## People\n\n'; + if (index.people.length === 0) { + output += '_No people notes yet_\n\n'; + } else { + output += '| File | Name | Email | Organization | Aliases |\n'; + output += '|------|------|-------|--------------|--------|\n'; + for (const person of index.people) { + const aliases = person.aliases.length > 0 ? person.aliases.join(', ') : '-'; + output += `| ${person.file} | ${person.name} | ${person.email || '-'} | ${person.organization || '-'} | ${aliases} |\n`; + } + output += '\n'; + } + + // Organizations + output += '## Organizations\n\n'; + if (index.organizations.length === 0) { + output += '_No organization notes yet_\n\n'; + } else { + output += '| File | Name | Domain | Aliases |\n'; + output += '|------|------|--------|--------|\n'; + for (const org of index.organizations) { + const aliases = org.aliases.length > 0 ? org.aliases.join(', ') : '-'; + output += `| ${org.file} | ${org.name} | ${org.domain || '-'} | ${aliases} |\n`; + } + output += '\n'; + } + + // Projects + output += '## Projects\n\n'; + if (index.projects.length === 0) { + output += '_No project notes yet_\n\n'; + } else { + output += '| File | Name | Status | Aliases |\n'; + output += '|------|------|--------|--------|\n'; + for (const project of index.projects) { + const aliases = project.aliases.length > 0 ? project.aliases.join(', ') : '-'; + output += `| ${project.file} | ${project.name} | ${project.status || '-'} | ${aliases} |\n`; + } + output += '\n'; + } + + // Topics + output += '## Topics\n\n'; + if (index.topics.length === 0) { + output += '_No topic notes yet_\n\n'; + } else { + output += '| File | Name | Keywords | Aliases |\n'; + output += '|------|------|----------|--------|\n'; + for (const topic of index.topics) { + const keywords = topic.keywords.length > 0 ? topic.keywords.join(', ') : '-'; + const aliases = topic.aliases.length > 0 ? topic.aliases.join(', ') : '-'; + output += `| ${topic.file} | ${topic.name} | ${keywords} | ${aliases} |\n`; + } + output += '\n'; + } + + // Other (non-standard folders) + if (index.other.length > 0) { + output += '## Other Notes\n\n'; + output += '| File | Name | Folder | Aliases |\n'; + output += '|------|------|--------|--------|\n'; + for (const note of index.other) { + const aliases = note.aliases.length > 0 ? note.aliases.join(', ') : '-'; + output += `| ${note.file} | ${note.name} | ${note.folder} | ${aliases} |\n`; + } + output += '\n'; + } + + return output; +} diff --git a/apps/x/packages/core/src/knowledge/note_creation_high.md b/apps/x/packages/core/src/knowledge/note_creation_high.md index 16770871..3cf10de6 100644 --- a/apps/x/packages/core/src/knowledge/note_creation_high.md +++ b/apps/x/packages/core/src/knowledge/note_creation_high.md @@ -49,16 +49,31 @@ You have full read access to the existing knowledge directory. Use this extensiv - name: e.g., "Arj" - email: e.g., "arj@rowboat.com" - domain: e.g., "rowboat.com" +4. **knowledge_index**: A pre-built index of all existing notes (provided in the message) + +# Knowledge Base Index + +**IMPORTANT:** You will receive a pre-built index of all existing notes at the start of each request. This index contains: +- All people notes with their names, emails, aliases, and organizations +- All organization notes with their names, domains, and aliases +- All project notes with their names and statuses +- All topic notes with their names and keywords + +**USE THE INDEX for entity resolution instead of grep/search commands.** This is much faster. + +When you need to: +- Check if a person exists → Look up by name/email/alias in the index +- Find an organization → Look up by name/domain in the index +- Resolve "David" to a full name → Check index for people with that name/alias + organization context + +**Only use `cat` to read full note content** when you need details not in the index (e.g., existing activity logs, open items). # Tools Available You have access to `executeCommand` to run shell commands: ``` executeCommand("ls {path}") # List directory contents -executeCommand("cat {path}") # Read file contents -executeCommand("grep -r '{pattern}' {path}") # Search across files -executeCommand("grep -r -l '{pattern}' {path}") # List files containing pattern -executeCommand("grep -r -i '{pattern}' {path}") # Case-insensitive search +executeCommand("cat {path}") # Read file contents executeCommand("head -50 {path}") # Read first 50 lines executeCommand("write {path} {content}") # Create or overwrite file ``` @@ -66,9 +81,10 @@ executeCommand("write {path} {content}") # Create or overwrite file **Important:** Use shell escaping for paths with spaces: ``` executeCommand("cat 'knowledge_folder/People/Sarah Chen.md'") -executeCommand("grep -r 'David' 'knowledge_folder/People/'") ``` +**NOTE:** Do NOT use grep to search for entities. Use the provided knowledge_index instead. + # Output Either: @@ -259,68 +275,44 @@ Variants found: --- -# Step 3: Search for Existing Notes +# Step 3: Look Up Existing Notes in Index -For each variant identified, search the notes folder thoroughly. +**Use the provided knowledge_index to find existing notes. Do NOT use grep commands.** -## 3a: Search by People -```bash -# Search by full name -executeCommand("grep -r -i -l 'Sarah Chen' '{knowledge_folder}/'") +## 3a: Look Up People -# Search by first name in People folder -executeCommand("grep -r -i -l 'Sarah' '{knowledge_folder}/People/'") +For each person variant (name, email, alias), check the index: -# Search by email -executeCommand("grep -r -i -l 'sarah@acme.com' '{knowledge_folder}/'") - -# Search by email domain (finds all people from same company) -executeCommand("grep -r -i -l '@acme.com' '{knowledge_folder}/'") - -# Search Aliases fields -executeCommand("grep -r -i 'Aliases.*Sarah' '{knowledge_folder}/People/'") +``` +From index, find matches for: +- "Sarah Chen" → Check People table for matching name +- "Sarah" → Check People table for matching name or alias +- "sarah@acme.com" → Check People table for matching email +- "@acme.com" → Check People table for matching organization or check Organizations for domain ``` -## 3b: Search by Organizations -```bash -# List all organization notes -executeCommand("ls '{knowledge_folder}/Organizations/'") +## 3b: Look Up Organizations -# Search for organization name -executeCommand("grep -r -i -l 'Acme' '{knowledge_folder}/Organizations/'") - -# Search by domain -executeCommand("grep -r -i 'Domain.*acme.com' '{knowledge_folder}/Organizations/'") - -# Search Aliases -executeCommand("grep -r -i 'Aliases.*Acme' '{knowledge_folder}/Organizations/'") +``` +From index, find matches for: +- "Acme Corp" → Check Organizations table for matching name +- "Acme" → Check Organizations table for matching name or alias +- "acme.com" → Check Organizations table for matching domain ``` -## 3c: Search by Projects and Topics -```bash -# List all projects -executeCommand("ls '{knowledge_folder}/Projects/'") +## 3c: Look Up Projects and Topics -# Search for project references -executeCommand("grep -r -i 'pilot' '{knowledge_folder}/Projects/'") -executeCommand("grep -r -i 'integration' '{knowledge_folder}/Projects/'") - -# Search for projects involving the organization -executeCommand("grep -r -i 'Acme' '{knowledge_folder}/Projects/'") - -# List and search topics -executeCommand("ls '{knowledge_folder}/Topics/'") -executeCommand("grep -r -i 'SOC 2' '{knowledge_folder}/Topics/'") +``` +From index, find matches for: +- "the pilot" → Check Projects table for related names +- "SOC 2" → Check Topics table for matching keywords ``` -## 3d: Read Candidate Notes +## 3d: Read Full Notes When Needed -For every note file found in searches, read it to understand context: +Only read the full note content when you need details not in the index (e.g., activity logs, open items): ```bash executeCommand("cat '{knowledge_folder}/People/Sarah Chen.md'") -executeCommand("cat '{knowledge_folder}/People/David Kim.md'") -executeCommand("cat '{knowledge_folder}/Organizations/Acme Corp.md'") -executeCommand("cat '{knowledge_folder}/Projects/Acme Integration.md'") ``` **Why read these notes:** diff --git a/apps/x/packages/core/src/knowledge/note_creation_low.md b/apps/x/packages/core/src/knowledge/note_creation_low.md index 7869bc0b..bb5abfcc 100644 --- a/apps/x/packages/core/src/knowledge/note_creation_low.md +++ b/apps/x/packages/core/src/knowledge/note_creation_low.md @@ -49,6 +49,24 @@ You have full read access to the existing knowledge directory. Use this extensiv - name: e.g., "Arj" - email: e.g., "arj@rowboat.com" - domain: e.g., "rowboat.com" +4. **knowledge_index**: A pre-built index of all existing notes (provided in the message) + +# Knowledge Base Index + +**IMPORTANT:** You will receive a pre-built index of all existing notes at the start of each request. This index contains: +- All people notes with their names, emails, aliases, and organizations +- All organization notes with their names, domains, and aliases +- All project notes with their names and statuses +- All topic notes with their names and keywords + +**USE THE INDEX for entity resolution instead of grep/search commands.** This is much faster. + +When you need to: +- Check if a person exists → Look up by name/email/alias in the index +- Find an organization → Look up by name/domain in the index +- Resolve "David" to a full name → Check index for people with that name/alias + organization context + +**Only use `cat` to read full note content** when you need details not in the index (e.g., existing activity logs, open items). # Tools Available @@ -56,9 +74,6 @@ You have access to `executeCommand` to run shell commands: ``` executeCommand("ls {path}") # List directory contents executeCommand("cat {path}") # Read file contents -executeCommand("grep -r '{pattern}' {path}") # Search across files -executeCommand("grep -r -l '{pattern}' {path}") # List files containing pattern -executeCommand("grep -r -i '{pattern}' {path}") # Case-insensitive search executeCommand("head -50 {path}") # Read first 50 lines executeCommand("write {path} {content}") # Create or overwrite file ``` @@ -66,9 +81,10 @@ executeCommand("write {path} {content}") # Create or overwrite file **Important:** Use shell escaping for paths with spaces: ``` executeCommand("cat 'knowledge_folder/People/Sarah Chen.md'") -executeCommand("grep -r 'David' 'knowledge_folder/People/'") ``` +**NOTE:** Do NOT use grep to search for entities. Use the provided knowledge_index instead. + # Output Either: @@ -223,40 +239,87 @@ Create a list of all variants found. --- -# Step 3: Search for Existing Notes +# Step 3: Look Up Existing Notes in Index -For each variant identified, search the notes folder thoroughly. +**Use the provided knowledge_index to find existing notes. Do NOT use grep commands.** -## 3a: Search by People -```bash -executeCommand("grep -r -i -l 'Sarah Chen' '{knowledge_folder}/'") -executeCommand("grep -r -i -l 'Sarah' '{knowledge_folder}/People/'") -executeCommand("grep -r -i -l 'sarah@acme.com' '{knowledge_folder}/'") -executeCommand("grep -r -i -l '@acme.com' '{knowledge_folder}/'") -executeCommand("grep -r -i 'Aliases.*Sarah' '{knowledge_folder}/People/'") +## 3a: Look Up People + +For each person variant (name, email, alias), check the index: + +``` +From index, find matches for: +- "Sarah Chen" → Check People table for matching name +- "Sarah" → Check People table for matching name or alias +- "sarah@acme.com" → Check People table for matching email +- "@acme.com" → Check People table for matching organization or check Organizations for domain ``` -## 3b: Search by Organizations -```bash -executeCommand("ls '{knowledge_folder}/Organizations/'") -executeCommand("grep -r -i -l 'Acme' '{knowledge_folder}/Organizations/'") -executeCommand("grep -r -i 'Domain.*acme.com' '{knowledge_folder}/Organizations/'") +## 3b: Look Up Organizations + +``` +From index, find matches for: +- "Acme Corp" → Check Organizations table for matching name +- "Acme" → Check Organizations table for matching name or alias +- "acme.com" → Check Organizations table for matching domain ``` -## 3c: Search by Projects and Topics -```bash -executeCommand("ls '{knowledge_folder}/Projects/'") -executeCommand("grep -r -i 'Acme' '{knowledge_folder}/Projects/'") -executeCommand("ls '{knowledge_folder}/Topics/'") +## 3c: Look Up Projects and Topics + +``` +From index, find matches for: +- "the pilot" → Check Projects table for related names +- "SOC 2" → Check Topics table for matching keywords ``` -## 3d: Read Candidate Notes +## 3d: Read Full Notes When Needed -For every note file found in searches, read it to understand context. +Only read the full note content when you need details not in the index (e.g., activity logs, open items): +```bash +executeCommand("cat '{knowledge_folder}/People/Sarah Chen.md'") +``` + +**Why read these notes:** +- Find canonical names (David → David Kim) +- Check Aliases fields for known variants +- Understand existing relationships +- See organization context for disambiguation +- Check what's already captured (avoid duplicates) +- Review open items (some might be resolved) +- **Check current status fields (might need updating)** +- **Check current roles (might have changed)** ## 3e: Matching Criteria -Use standard matching criteria for names, emails, and organizations. +Use these criteria to determine if a variant matches an existing note: + +**People matching:** + +| Source has | Note has | Match if | +|------------|----------|----------| +| First name "Sarah" | Full name "Sarah Chen" | Same organization context | +| Email "sarah@acme.com" | Email field | Exact match | +| Email domain "@acme.com" | Organization "Acme Corp" | Domain matches org | +| Role "VP Engineering" | Role field | Same org + same role | +| First name + company context | Full name + Organization | Company matches | +| Any variant | Aliases field | Listed in aliases | + +**Organization matching:** + +| Source has | Note has | Match if | +|------------|----------|----------| +| "Acme" | "Acme Corp" | Substring match | +| "Acme Corporation" | "Acme Corp" | Same root name | +| "@acme.com" | Domain field | Domain matches | +| Any variant | Aliases field | Listed in aliases | + +**Project matching:** + +| Source has | Note has | Match if | +|------------|----------|----------| +| "the pilot" | "Acme Pilot" | Same org context in source | +| "integration project" | "Acme Integration" | Same org + similar type | +| "Series A" | "Series A Fundraise" | Unique identifier match | --- diff --git a/apps/x/packages/core/src/knowledge/note_creation_medium.md b/apps/x/packages/core/src/knowledge/note_creation_medium.md index b53ed578..da7bbdf5 100644 --- a/apps/x/packages/core/src/knowledge/note_creation_medium.md +++ b/apps/x/packages/core/src/knowledge/note_creation_medium.md @@ -49,6 +49,24 @@ You have full read access to the existing knowledge directory. Use this extensiv - name: e.g., "Arj" - email: e.g., "arj@rowboat.com" - domain: e.g., "rowboat.com" +4. **knowledge_index**: A pre-built index of all existing notes (provided in the message) + +# Knowledge Base Index + +**IMPORTANT:** You will receive a pre-built index of all existing notes at the start of each request. This index contains: +- All people notes with their names, emails, aliases, and organizations +- All organization notes with their names, domains, and aliases +- All project notes with their names and statuses +- All topic notes with their names and keywords + +**USE THE INDEX for entity resolution instead of grep/search commands.** This is much faster. + +When you need to: +- Check if a person exists → Look up by name/email/alias in the index +- Find an organization → Look up by name/domain in the index +- Resolve "David" to a full name → Check index for people with that name/alias + organization context + +**Only use `cat` to read full note content** when you need details not in the index (e.g., existing activity logs, open items). # Tools Available @@ -56,9 +74,6 @@ You have access to `executeCommand` to run shell commands: ``` executeCommand("ls {path}") # List directory contents executeCommand("cat {path}") # Read file contents -executeCommand("grep -r '{pattern}' {path}") # Search across files -executeCommand("grep -r -l '{pattern}' {path}") # List files containing pattern -executeCommand("grep -r -i '{pattern}' {path}") # Case-insensitive search executeCommand("head -50 {path}") # Read first 50 lines executeCommand("write {path} {content}") # Create or overwrite file ``` @@ -66,9 +81,10 @@ executeCommand("write {path} {content}") # Create or overwrite file **Important:** Use shell escaping for paths with spaces: ``` executeCommand("cat 'knowledge_folder/People/Sarah Chen.md'") -executeCommand("grep -r 'David' 'knowledge_folder/People/'") ``` +**NOTE:** Do NOT use grep to search for entities. Use the provided knowledge_index instead. + # Output Either: @@ -260,68 +276,44 @@ Variants found: --- -# Step 3: Search for Existing Notes +# Step 3: Look Up Existing Notes in Index -For each variant identified, search the notes folder thoroughly. +**Use the provided knowledge_index to find existing notes. Do NOT use grep commands.** -## 3a: Search by People -```bash -# Search by full name -executeCommand("grep -r -i -l 'Sarah Chen' '{knowledge_folder}/'") +## 3a: Look Up People -# Search by first name in People folder -executeCommand("grep -r -i -l 'Sarah' '{knowledge_folder}/People/'") +For each person variant (name, email, alias), check the index: -# Search by email -executeCommand("grep -r -i -l 'sarah@acme.com' '{knowledge_folder}/'") - -# Search by email domain (finds all people from same company) -executeCommand("grep -r -i -l '@acme.com' '{knowledge_folder}/'") - -# Search Aliases fields -executeCommand("grep -r -i 'Aliases.*Sarah' '{knowledge_folder}/People/'") +``` +From index, find matches for: +- "Sarah Chen" → Check People table for matching name +- "Sarah" → Check People table for matching name or alias +- "sarah@acme.com" → Check People table for matching email +- "@acme.com" → Check People table for matching organization or check Organizations for domain ``` -## 3b: Search by Organizations -```bash -# List all organization notes -executeCommand("ls '{knowledge_folder}/Organizations/'") +## 3b: Look Up Organizations -# Search for organization name -executeCommand("grep -r -i -l 'Acme' '{knowledge_folder}/Organizations/'") - -# Search by domain -executeCommand("grep -r -i 'Domain.*acme.com' '{knowledge_folder}/Organizations/'") - -# Search Aliases -executeCommand("grep -r -i 'Aliases.*Acme' '{knowledge_folder}/Organizations/'") +``` +From index, find matches for: +- "Acme Corp" → Check Organizations table for matching name +- "Acme" → Check Organizations table for matching name or alias +- "acme.com" → Check Organizations table for matching domain ``` -## 3c: Search by Projects and Topics -```bash -# List all projects -executeCommand("ls '{knowledge_folder}/Projects/'") +## 3c: Look Up Projects and Topics -# Search for project references -executeCommand("grep -r -i 'pilot' '{knowledge_folder}/Projects/'") -executeCommand("grep -r -i 'integration' '{knowledge_folder}/Projects/'") - -# Search for projects involving the organization -executeCommand("grep -r -i 'Acme' '{knowledge_folder}/Projects/'") - -# List and search topics -executeCommand("ls '{knowledge_folder}/Topics/'") -executeCommand("grep -r -i 'SOC 2' '{knowledge_folder}/Topics/'") +``` +From index, find matches for: +- "the pilot" → Check Projects table for related names +- "SOC 2" → Check Topics table for matching keywords ``` -## 3d: Read Candidate Notes +## 3d: Read Full Notes When Needed -For every note file found in searches, read it to understand context: +Only read the full note content when you need details not in the index (e.g., activity logs, open items): ```bash executeCommand("cat '{knowledge_folder}/People/Sarah Chen.md'") -executeCommand("cat '{knowledge_folder}/People/David Kim.md'") -executeCommand("cat '{knowledge_folder}/Organizations/Acme Corp.md'") -executeCommand("cat '{knowledge_folder}/Projects/Acme Integration.md'") ``` **Why read these notes:**