diff --git a/apps/x/packages/core/src/knowledge/sources/rank_slack_home.test.ts b/apps/x/packages/core/src/knowledge/sources/rank_slack_home.test.ts index 0b725f73..8afbac5f 100644 --- a/apps/x/packages/core/src/knowledge/sources/rank_slack_home.test.ts +++ b/apps/x/packages/core/src/knowledge/sources/rank_slack_home.test.ts @@ -1,47 +1,126 @@ import { describe, expect, it } from 'vitest'; -import { filterSlackHomeCandidatesForRelevance, SlackHomeRankCandidate } from './rank_slack_home.js'; +import { + filterSlackHomeCandidatesForRelevance, + rankSlackHomeMessages, + SlackHomeRankCandidate, +} from './rank_slack_home.js'; function slackTs(dateMs: number): string { return `${Math.floor(dateMs / 1000)}.000000`; } -describe('Slack Home ranking filters', () => { - it('drops stale routine standup logistics but keeps durable updates', () => { - const now = Date.parse('2026-06-04T18:00:00Z'); - const nineHoursAgo = now - 9 * 60 * 60 * 1000; - const twelveHoursAgo = now - 12 * 60 * 60 * 1000; - const thirtyMinutesAgo = now - 30 * 60 * 1000; +const NOW = Date.parse('2026-06-04T18:00:00Z'); +const recent = (text: string, id = text): SlackHomeRankCandidate => ({ + id, + channelName: 'general', + text, + ts: slackTs(NOW - 5 * 60 * 1000), +}); - const candidates: SlackHomeRankCandidate[] = [ - { - id: 'stale-standup-schedule', - channelName: 'general', - text: 'standup at 4pm possible?', - ts: slackTs(nineHoursAgo), - }, - { - id: 'stale-standup-sick', - channelName: 'general', - text: 'ill skip todays standup I am having stomach ache and not feeling well', - ts: slackTs(twelveHoursAgo), - }, - { - id: 'durable-issue-update', - channelName: 'general', - text: 'is the icon issue fixed for windows?', - ts: slackTs(twelveHoursAgo), - }, - { - id: 'recent-standup-schedule', - channelName: 'general', - text: 'standup at 4pm possible?', - ts: slackTs(thirtyMinutesAgo), - }, - ]; +function keptIds(candidates: SlackHomeRankCandidate[]): string[] { + return filterSlackHomeCandidatesForRelevance(candidates, NOW).map(c => c.id); +} - expect(filterSlackHomeCandidatesForRelevance(candidates, now).map(candidate => candidate.id)).toEqual([ - 'durable-issue-update', - 'recent-standup-schedule', - ]); +describe('filterSlackHomeCandidatesForRelevance', () => { + describe('routine standup logistics', () => { + it('drops stale standup logistics but keeps recent ones and durable updates', () => { + const nineHoursAgo = NOW - 9 * 60 * 60 * 1000; + const twelveHoursAgo = NOW - 12 * 60 * 60 * 1000; + const thirtyMinutesAgo = NOW - 30 * 60 * 1000; + + const candidates: SlackHomeRankCandidate[] = [ + { id: 'stale-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(nineHoursAgo) }, + { id: 'stale-standup-sick', channelName: 'general', text: 'ill skip todays standup I am having stomach ache and not feeling well', ts: slackTs(twelveHoursAgo) }, + { id: 'durable-issue-update', channelName: 'general', text: 'is the icon issue fixed for windows?', ts: slackTs(twelveHoursAgo) }, + { id: 'recent-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(thirtyMinutesAgo) }, + ]; + + expect(keptIds(candidates)).toEqual(['durable-issue-update', 'recent-standup-schedule']); + }); + }); + + describe('system / automated messages', () => { + it('drops channel join/leave, topic, rename and call notices', () => { + const candidates = [ + recent('Alex has joined the channel', 'join'), + recent('Sam has left the channel', 'leave'), + recent('Alex set the channel topic: Q3 planning', 'topic'), + recent('Sam renamed the channel to design-team', 'rename'), + recent('Alex started a huddle', 'huddle'), + recent('Real question: can someone review my PR?', 'real'), + ]; + expect(keptIds(candidates)).toEqual(['real']); + }); + + it('keeps a system-shaped message that carries a durable signal', () => { + const candidates = [recent('Priya set the channel topic: incident response war room', 'topic-incident')]; + expect(keptIds(candidates)).toEqual(['topic-incident']); + }); + }); + + describe('emoji / reaction-only', () => { + it('drops emoji-only, shortcode-only and punctuation-only posts', () => { + const candidates = [ + recent('👍', 'thumbs'), + recent('🎉🎉🎉', 'party'), + recent(':tada: :rocket:', 'shortcodes'), + recent('!!!', 'punct'), + recent('🚀 shipping the new pricing page today', 'real'), + ]; + expect(keptIds(candidates)).toEqual(['real']); + }); + }); + + describe('greetings / acknowledgements', () => { + it('drops bare greetings and acks but keeps anything with content', () => { + const candidates = [ + recent('thanks!', 'thanks'), + recent('gm', 'gm'), + recent('lgtm', 'lgtm'), + recent('+1', 'plus1'), + recent('sounds good', 'sg'), + recent('ok', 'ok'), + recent('ok, the deploy is blocked on the migration', 'ok-with-content'), + recent('thanks for fixing the outage', 'thanks-durable'), + ]; + // 'ok-with-content' kept (has content); 'thanks-durable' kept (durable signal). + expect(keptIds(candidates)).toEqual(['ok-with-content', 'thanks-durable']); + }); + }); + + it('drops empty-text candidates', () => { + expect(keptIds([recent(' ', 'blank'), recent('a real message here', 'real')])).toEqual(['real']); + }); +}); + +describe('rankSlackHomeMessages (deterministic)', () => { + it('orders surviving candidates newest-first and caps at the limit', async () => { + const mk = (id: string, minutesAgo: number): SlackHomeRankCandidate => ({ + id, channelName: 'general', text: `update ${id}`, ts: slackTs(NOW - minutesAgo * 60 * 1000), + }); + const candidates = [mk('old', 50), mk('newest', 1), mk('mid', 20)]; + expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['newest', 'mid', 'old']); + expect(await rankSlackHomeMessages(candidates, 2)).toEqual(['newest', 'mid']); + }); + + it('filters noise before ranking', async () => { + const candidates = [ + recent('👍', 'emoji'), + recent('Alex has joined the channel', 'join'), + recent('can you review the pricing proposal?', 'real'), + ]; + expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['real']); + }); + + it('handles a high-volume batch: caps output and preserves recency order', async () => { + const candidates: SlackHomeRankCandidate[] = Array.from({ length: 150 }, (_, i) => ({ + id: `m${i}`, + channelName: 'general', + // i=0 is newest; larger i is older. + text: `status update number ${i}`, + ts: slackTs(NOW - i * 60 * 1000), + })); + const ranked = await rankSlackHomeMessages(candidates, 5); + expect(ranked).toEqual(['m0', 'm1', 'm2', 'm3', 'm4']); }); }); diff --git a/apps/x/packages/core/src/knowledge/sources/rank_slack_home.ts b/apps/x/packages/core/src/knowledge/sources/rank_slack_home.ts index e71f46fd..388a1289 100644 --- a/apps/x/packages/core/src/knowledge/sources/rank_slack_home.ts +++ b/apps/x/packages/core/src/knowledge/sources/rank_slack_home.ts @@ -1,14 +1,3 @@ -import { z } from 'zod'; -import { generateObject } from 'ai'; -import { createProvider } from '../../models/models.js'; -import { - getDefaultModelAndProvider, - getKgModel, - resolveProviderConfig, -} from '../../models/defaults.js'; -import { captureLlmUsage } from '../../analytics/usage.js'; -import { withUseCase } from '../../analytics/use_case.js'; - export type SlackHomeRankCandidate = { id: string; workspaceName?: string; @@ -18,15 +7,32 @@ export type SlackHomeRankCandidate = { ts: string; }; -const RankedSlackMessagesSchema = z.object({ - rankedIds: z.array(z.string()).describe('Message ids in the order they should appear on Home.'), -}); - const EXPIRED_ROUTINE_AGE_MS = 2 * 60 * 60 * 1000; const ROUTINE_EVENT_RE = /\b(stand[-\s]?up|daily\s+(sync|scrum|standup)|scrum|check[-\s]?in)\b/i; const ROUTINE_LOGISTICS_RE = /\b(skip|skipping|miss|missing|can't|cannot|cant|won't|wont|join|attend|possible|move|reschedule|shift|late|running\s+late|stomach|sick|not\s+feeling|headache|doctor|appointment|today|todays|today's|tomorrow|at\s+\d{1,2}(:\d{2})?\s*(am|pm)?)\b/i; + +// Durable signals always win: a message matching any of these is kept even if +// it would otherwise look like noise (a system message, a "done", etc.). const DURABLE_SIGNAL_RE = /\b(blocker|blocked|decision|decided|owner|deadline|shipped|fixed|done|launched|deployed|merged|bug|issue|incident|outage|customer|contract|pricing|proposal|launch|release|handoff|review|approval|approved)\b/i; +// Slack system / automated messages render as plain narration like +// " has joined the channel". They carry no human content, so drop them. +const SYSTEM_MESSAGE_RE = /\b(has joined the channel|has left the channel|was added to|has been added|set the channel (topic|purpose|description)|cleared the channel (topic|purpose)|renamed the channel|archived the channel|un-?archived the channel|pinned a message|joined the (call|huddle)|started a (call|huddle)|set up a call)\b/i; + +// Greetings / acknowledgements with no informational content. Anchored to the +// whole (trimmed) message so "ok" drops but "ok, the deploy is blocked" stays. +const TRIVIAL_RE = /^(hi|hello+|hey+|yo|gm|gn|good\s*(morning|night|evening|afternoon)|morning|thanks?|thank\s*you|ty|thx|tysm|np|no\s*problem|ok(ay)?|k|got\s*it|gotcha|lgtm|\+1|nice|cool|great|awesome|perfect|done|yes+|yep|yup|no+|nope|sure|sounds?\s*good|sg|welcome|congrats?|congratulations)[\s.!?]*$/i; + +const EMOJI_SHORTCODE_RE = /:[a-z0-9_+-]+:/gi; + +function slackTsToMs(ts: string): number | null { + const seconds = Number(ts.split('.')[0]); + if (!Number.isFinite(seconds)) return null; + return seconds * 1000; +} + +// Newest-first recency ordering, capped at limit. The Home card shows "latest +// messages", so recency is the ordering once noise is filtered out. function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[] { return [...candidates] .sort((a, b) => Number(b.ts) - Number(a.ts)) @@ -34,10 +40,13 @@ function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[] .map(candidate => candidate.id); } -function slackTsToMs(ts: string): number | null { - const seconds = Number(ts.split('.')[0]); - if (!Number.isFinite(seconds)) return null; - return seconds * 1000; +// What remains after removing :shortcodes:, unicode emoji/symbols, punctuation +// and whitespace. Empty ⇒ the message was emoji/reaction-only. +function strippedToCore(text: string): string { + return text + .replace(EMOJI_SHORTCODE_RE, '') + .replace(/[\s\p{P}\p{S}]/gu, '') + .trim(); } function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: number): boolean { @@ -52,96 +61,32 @@ function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: num return ROUTINE_LOGISTICS_RE.test(text); } +// Low-value classes that never belong on Home: empty bodies, Slack system +// messages, emoji/reaction-only posts, and bare greetings/acks. A durable +// signal overrides all of these. +function isLowValueNoise(candidate: SlackHomeRankCandidate): boolean { + const text = candidate.text.replace(/\s+/g, ' ').trim(); + if (!text) return true; + if (DURABLE_SIGNAL_RE.test(text)) return false; + if (SYSTEM_MESSAGE_RE.test(text)) return true; + if (TRIVIAL_RE.test(text)) return true; + return strippedToCore(text).length === 0; +} + export function filterSlackHomeCandidatesForRelevance( candidates: SlackHomeRankCandidate[], nowMs = Date.now(), ): SlackHomeRankCandidate[] { - return candidates.filter(candidate => !isExpiredRoutineLogistics(candidate, nowMs)); -} - -function truncate(value: string, max: number): string { - return value.length <= max ? value : `${value.slice(0, max)}...`; -} - -function buildPrompt(candidates: SlackHomeRankCandidate[], limit: number): string { - const messages = candidates.map((candidate, index) => { - const date = Number.isFinite(Number(candidate.ts)) - ? new Date(Number(candidate.ts.split('.')[0]) * 1000).toISOString() - : candidate.ts; - return [ - `## ${index + 1}. ${candidate.id}`, - `Workspace: ${candidate.workspaceName ?? 'unknown'}`, - `Channel: ${candidate.channelName ?? 'unknown'}`, - `Author: ${candidate.author ?? 'unknown'}`, - `Time: ${date}`, - `Text: ${truncate(candidate.text.replace(/\s+/g, ' ').trim(), 700)}`, - ].join('\n'); - }).join('\n\n'); - - return `Choose up to ${limit} Slack messages to show on the user's Home screen. - -Prioritize messages that are likely useful at a glance: -- direct questions or requests to the user -- decisions, blockers, owners, deadlines, status changes, or shipped/fixed/done updates -- project/customer/product updates -- messages with clear actionability or durable knowledge - -Deprioritize: -- greetings, thanks, jokes, reactions, short acknowledgements, bot noise -- vague chatter without clear project/action relevance -- near-duplicates of the same point -- routine logistics whose value expires quickly, such as standup scheduling, standup attendance, sick notes, lunch/commute coordination, and "can we move this?" chatter once the event is likely past - -Return only ids from the candidate list. You may return fewer than ${limit} ids if fewer messages are useful. Prefer relevance over recency, but use recency as a tiebreaker. - -# Candidates - -${messages}`; + return candidates.filter(candidate => + !isExpiredRoutineLogistics(candidate, nowMs) && !isLowValueNoise(candidate)); } +// Deterministic Home feed: drop noise, then order by recency and cap. No LLM +// call — the filter does the de-noising and recency does the ordering. +// (kept async so the IPC caller's contract is unchanged.) export async function rankSlackHomeMessages( candidates: SlackHomeRankCandidate[], limit: number, ): Promise { - const relevantCandidates = filterSlackHomeCandidatesForRelevance(candidates); - - if (relevantCandidates.length <= limit) { - return timeRank(relevantCandidates, limit); - } - - try { - const modelId = await getKgModel(); - const { provider } = await getDefaultModelAndProvider(); - const config = await resolveProviderConfig(provider); - const model = createProvider(config).languageModel(modelId); - - const result = await withUseCase({ useCase: 'knowledge_sync', subUseCase: 'slack_home_rank' }, () => generateObject({ - model, - system: 'You rank Slack messages for a personal productivity Home screen. Be selective and return valid ids only.', - prompt: buildPrompt(relevantCandidates, limit), - schema: RankedSlackMessagesSchema, - })); - - captureLlmUsage({ - useCase: 'knowledge_sync', - subUseCase: 'slack_home_rank', - model: modelId, - provider, - usage: result.usage, - }); - - const validIds = new Set(relevantCandidates.map(candidate => candidate.id)); - const ranked = result.object.rankedIds.filter(id => validIds.has(id)); - const seen = new Set(); - const deduped = ranked.filter(id => { - if (seen.has(id)) return false; - seen.add(id); - return true; - }); - - return deduped.slice(0, limit); - } catch (error) { - console.warn('[SlackHomeRank] LLM ranking failed, falling back to recency:', error); - return timeRank(relevantCandidates, limit); - } + return timeRank(filterSlackHomeCandidatesForRelevance(candidates), limit); }