feat: rank Slack home feed deterministically by recency

Drop the per-load LLM ranker (cost/latency/model dependency) in favor of a stronger deterministic filter + recency ordering. The filter now removes system messages, emoji/reaction-only posts, bare greetings/acks, and empty bodies, with a durable-signal escape hatch. Expand tests to one describe per noise class plus ordering/cap/volume coverage.
This commit is contained in:
Gagancreates 2026-06-15 01:07:41 +05:30
parent 2421a40886
commit 0c1049154e
2 changed files with 162 additions and 138 deletions

View file

@ -1,47 +1,126 @@
import { describe, expect, it } from 'vitest';
import { filterSlackHomeCandidatesForRelevance, SlackHomeRankCandidate } from './rank_slack_home.js';
import {
filterSlackHomeCandidatesForRelevance,
rankSlackHomeMessages,
SlackHomeRankCandidate,
} from './rank_slack_home.js';
function slackTs(dateMs: number): string {
return `${Math.floor(dateMs / 1000)}.000000`;
}
describe('Slack Home ranking filters', () => {
it('drops stale routine standup logistics but keeps durable updates', () => {
const now = Date.parse('2026-06-04T18:00:00Z');
const nineHoursAgo = now - 9 * 60 * 60 * 1000;
const twelveHoursAgo = now - 12 * 60 * 60 * 1000;
const thirtyMinutesAgo = now - 30 * 60 * 1000;
const NOW = Date.parse('2026-06-04T18:00:00Z');
const recent = (text: string, id = text): SlackHomeRankCandidate => ({
id,
channelName: 'general',
text,
ts: slackTs(NOW - 5 * 60 * 1000),
});
const candidates: SlackHomeRankCandidate[] = [
{
id: 'stale-standup-schedule',
channelName: 'general',
text: 'standup at 4pm possible?',
ts: slackTs(nineHoursAgo),
},
{
id: 'stale-standup-sick',
channelName: 'general',
text: 'ill skip todays standup I am having stomach ache and not feeling well',
ts: slackTs(twelveHoursAgo),
},
{
id: 'durable-issue-update',
channelName: 'general',
text: 'is the icon issue fixed for windows?',
ts: slackTs(twelveHoursAgo),
},
{
id: 'recent-standup-schedule',
channelName: 'general',
text: 'standup at 4pm possible?',
ts: slackTs(thirtyMinutesAgo),
},
];
function keptIds(candidates: SlackHomeRankCandidate[]): string[] {
return filterSlackHomeCandidatesForRelevance(candidates, NOW).map(c => c.id);
}
expect(filterSlackHomeCandidatesForRelevance(candidates, now).map(candidate => candidate.id)).toEqual([
'durable-issue-update',
'recent-standup-schedule',
]);
describe('filterSlackHomeCandidatesForRelevance', () => {
describe('routine standup logistics', () => {
it('drops stale standup logistics but keeps recent ones and durable updates', () => {
const nineHoursAgo = NOW - 9 * 60 * 60 * 1000;
const twelveHoursAgo = NOW - 12 * 60 * 60 * 1000;
const thirtyMinutesAgo = NOW - 30 * 60 * 1000;
const candidates: SlackHomeRankCandidate[] = [
{ id: 'stale-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(nineHoursAgo) },
{ id: 'stale-standup-sick', channelName: 'general', text: 'ill skip todays standup I am having stomach ache and not feeling well', ts: slackTs(twelveHoursAgo) },
{ id: 'durable-issue-update', channelName: 'general', text: 'is the icon issue fixed for windows?', ts: slackTs(twelveHoursAgo) },
{ id: 'recent-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(thirtyMinutesAgo) },
];
expect(keptIds(candidates)).toEqual(['durable-issue-update', 'recent-standup-schedule']);
});
});
describe('system / automated messages', () => {
it('drops channel join/leave, topic, rename and call notices', () => {
const candidates = [
recent('Alex has joined the channel', 'join'),
recent('Sam has left the channel', 'leave'),
recent('Alex set the channel topic: Q3 planning', 'topic'),
recent('Sam renamed the channel to design-team', 'rename'),
recent('Alex started a huddle', 'huddle'),
recent('Real question: can someone review my PR?', 'real'),
];
expect(keptIds(candidates)).toEqual(['real']);
});
it('keeps a system-shaped message that carries a durable signal', () => {
const candidates = [recent('Priya set the channel topic: incident response war room', 'topic-incident')];
expect(keptIds(candidates)).toEqual(['topic-incident']);
});
});
describe('emoji / reaction-only', () => {
it('drops emoji-only, shortcode-only and punctuation-only posts', () => {
const candidates = [
recent('👍', 'thumbs'),
recent('🎉🎉🎉', 'party'),
recent(':tada: :rocket:', 'shortcodes'),
recent('!!!', 'punct'),
recent('🚀 shipping the new pricing page today', 'real'),
];
expect(keptIds(candidates)).toEqual(['real']);
});
});
describe('greetings / acknowledgements', () => {
it('drops bare greetings and acks but keeps anything with content', () => {
const candidates = [
recent('thanks!', 'thanks'),
recent('gm', 'gm'),
recent('lgtm', 'lgtm'),
recent('+1', 'plus1'),
recent('sounds good', 'sg'),
recent('ok', 'ok'),
recent('ok, the deploy is blocked on the migration', 'ok-with-content'),
recent('thanks for fixing the outage', 'thanks-durable'),
];
// 'ok-with-content' kept (has content); 'thanks-durable' kept (durable signal).
expect(keptIds(candidates)).toEqual(['ok-with-content', 'thanks-durable']);
});
});
it('drops empty-text candidates', () => {
expect(keptIds([recent(' ', 'blank'), recent('a real message here', 'real')])).toEqual(['real']);
});
});
describe('rankSlackHomeMessages (deterministic)', () => {
it('orders surviving candidates newest-first and caps at the limit', async () => {
const mk = (id: string, minutesAgo: number): SlackHomeRankCandidate => ({
id, channelName: 'general', text: `update ${id}`, ts: slackTs(NOW - minutesAgo * 60 * 1000),
});
const candidates = [mk('old', 50), mk('newest', 1), mk('mid', 20)];
expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['newest', 'mid', 'old']);
expect(await rankSlackHomeMessages(candidates, 2)).toEqual(['newest', 'mid']);
});
it('filters noise before ranking', async () => {
const candidates = [
recent('👍', 'emoji'),
recent('Alex has joined the channel', 'join'),
recent('can you review the pricing proposal?', 'real'),
];
expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['real']);
});
it('handles a high-volume batch: caps output and preserves recency order', async () => {
const candidates: SlackHomeRankCandidate[] = Array.from({ length: 150 }, (_, i) => ({
id: `m${i}`,
channelName: 'general',
// i=0 is newest; larger i is older.
text: `status update number ${i}`,
ts: slackTs(NOW - i * 60 * 1000),
}));
const ranked = await rankSlackHomeMessages(candidates, 5);
expect(ranked).toEqual(['m0', 'm1', 'm2', 'm3', 'm4']);
});
});

View file

@ -1,14 +1,3 @@
import { z } from 'zod';
import { generateObject } from 'ai';
import { createProvider } from '../../models/models.js';
import {
getDefaultModelAndProvider,
getKgModel,
resolveProviderConfig,
} from '../../models/defaults.js';
import { captureLlmUsage } from '../../analytics/usage.js';
import { withUseCase } from '../../analytics/use_case.js';
export type SlackHomeRankCandidate = {
id: string;
workspaceName?: string;
@ -18,15 +7,32 @@ export type SlackHomeRankCandidate = {
ts: string;
};
const RankedSlackMessagesSchema = z.object({
rankedIds: z.array(z.string()).describe('Message ids in the order they should appear on Home.'),
});
const EXPIRED_ROUTINE_AGE_MS = 2 * 60 * 60 * 1000;
const ROUTINE_EVENT_RE = /\b(stand[-\s]?up|daily\s+(sync|scrum|standup)|scrum|check[-\s]?in)\b/i;
const ROUTINE_LOGISTICS_RE = /\b(skip|skipping|miss|missing|can't|cannot|cant|won't|wont|join|attend|possible|move|reschedule|shift|late|running\s+late|stomach|sick|not\s+feeling|headache|doctor|appointment|today|todays|today's|tomorrow|at\s+\d{1,2}(:\d{2})?\s*(am|pm)?)\b/i;
// Durable signals always win: a message matching any of these is kept even if
// it would otherwise look like noise (a system message, a "done", etc.).
const DURABLE_SIGNAL_RE = /\b(blocker|blocked|decision|decided|owner|deadline|shipped|fixed|done|launched|deployed|merged|bug|issue|incident|outage|customer|contract|pricing|proposal|launch|release|handoff|review|approval|approved)\b/i;
// Slack system / automated messages render as plain narration like
// "<name> has joined the channel". They carry no human content, so drop them.
const SYSTEM_MESSAGE_RE = /\b(has joined the channel|has left the channel|was added to|has been added|set the channel (topic|purpose|description)|cleared the channel (topic|purpose)|renamed the channel|archived the channel|un-?archived the channel|pinned a message|joined the (call|huddle)|started a (call|huddle)|set up a call)\b/i;
// Greetings / acknowledgements with no informational content. Anchored to the
// whole (trimmed) message so "ok" drops but "ok, the deploy is blocked" stays.
const TRIVIAL_RE = /^(hi|hello+|hey+|yo|gm|gn|good\s*(morning|night|evening|afternoon)|morning|thanks?|thank\s*you|ty|thx|tysm|np|no\s*problem|ok(ay)?|k|got\s*it|gotcha|lgtm|\+1|nice|cool|great|awesome|perfect|done|yes+|yep|yup|no+|nope|sure|sounds?\s*good|sg|welcome|congrats?|congratulations)[\s.!?]*$/i;
const EMOJI_SHORTCODE_RE = /:[a-z0-9_+-]+:/gi;
function slackTsToMs(ts: string): number | null {
const seconds = Number(ts.split('.')[0]);
if (!Number.isFinite(seconds)) return null;
return seconds * 1000;
}
// Newest-first recency ordering, capped at limit. The Home card shows "latest
// messages", so recency is the ordering once noise is filtered out.
function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[] {
return [...candidates]
.sort((a, b) => Number(b.ts) - Number(a.ts))
@ -34,10 +40,13 @@ function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[]
.map(candidate => candidate.id);
}
function slackTsToMs(ts: string): number | null {
const seconds = Number(ts.split('.')[0]);
if (!Number.isFinite(seconds)) return null;
return seconds * 1000;
// What remains after removing :shortcodes:, unicode emoji/symbols, punctuation
// and whitespace. Empty ⇒ the message was emoji/reaction-only.
function strippedToCore(text: string): string {
return text
.replace(EMOJI_SHORTCODE_RE, '')
.replace(/[\s\p{P}\p{S}]/gu, '')
.trim();
}
function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: number): boolean {
@ -52,96 +61,32 @@ function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: num
return ROUTINE_LOGISTICS_RE.test(text);
}
// Low-value classes that never belong on Home: empty bodies, Slack system
// messages, emoji/reaction-only posts, and bare greetings/acks. A durable
// signal overrides all of these.
function isLowValueNoise(candidate: SlackHomeRankCandidate): boolean {
const text = candidate.text.replace(/\s+/g, ' ').trim();
if (!text) return true;
if (DURABLE_SIGNAL_RE.test(text)) return false;
if (SYSTEM_MESSAGE_RE.test(text)) return true;
if (TRIVIAL_RE.test(text)) return true;
return strippedToCore(text).length === 0;
}
export function filterSlackHomeCandidatesForRelevance(
candidates: SlackHomeRankCandidate[],
nowMs = Date.now(),
): SlackHomeRankCandidate[] {
return candidates.filter(candidate => !isExpiredRoutineLogistics(candidate, nowMs));
}
function truncate(value: string, max: number): string {
return value.length <= max ? value : `${value.slice(0, max)}...`;
}
function buildPrompt(candidates: SlackHomeRankCandidate[], limit: number): string {
const messages = candidates.map((candidate, index) => {
const date = Number.isFinite(Number(candidate.ts))
? new Date(Number(candidate.ts.split('.')[0]) * 1000).toISOString()
: candidate.ts;
return [
`## ${index + 1}. ${candidate.id}`,
`Workspace: ${candidate.workspaceName ?? 'unknown'}`,
`Channel: ${candidate.channelName ?? 'unknown'}`,
`Author: ${candidate.author ?? 'unknown'}`,
`Time: ${date}`,
`Text: ${truncate(candidate.text.replace(/\s+/g, ' ').trim(), 700)}`,
].join('\n');
}).join('\n\n');
return `Choose up to ${limit} Slack messages to show on the user's Home screen.
Prioritize messages that are likely useful at a glance:
- direct questions or requests to the user
- decisions, blockers, owners, deadlines, status changes, or shipped/fixed/done updates
- project/customer/product updates
- messages with clear actionability or durable knowledge
Deprioritize:
- greetings, thanks, jokes, reactions, short acknowledgements, bot noise
- vague chatter without clear project/action relevance
- near-duplicates of the same point
- routine logistics whose value expires quickly, such as standup scheduling, standup attendance, sick notes, lunch/commute coordination, and "can we move this?" chatter once the event is likely past
Return only ids from the candidate list. You may return fewer than ${limit} ids if fewer messages are useful. Prefer relevance over recency, but use recency as a tiebreaker.
# Candidates
${messages}`;
return candidates.filter(candidate =>
!isExpiredRoutineLogistics(candidate, nowMs) && !isLowValueNoise(candidate));
}
// Deterministic Home feed: drop noise, then order by recency and cap. No LLM
// call — the filter does the de-noising and recency does the ordering.
// (kept async so the IPC caller's contract is unchanged.)
export async function rankSlackHomeMessages(
candidates: SlackHomeRankCandidate[],
limit: number,
): Promise<string[]> {
const relevantCandidates = filterSlackHomeCandidatesForRelevance(candidates);
if (relevantCandidates.length <= limit) {
return timeRank(relevantCandidates, limit);
}
try {
const modelId = await getKgModel();
const { provider } = await getDefaultModelAndProvider();
const config = await resolveProviderConfig(provider);
const model = createProvider(config).languageModel(modelId);
const result = await withUseCase({ useCase: 'knowledge_sync', subUseCase: 'slack_home_rank' }, () => generateObject({
model,
system: 'You rank Slack messages for a personal productivity Home screen. Be selective and return valid ids only.',
prompt: buildPrompt(relevantCandidates, limit),
schema: RankedSlackMessagesSchema,
}));
captureLlmUsage({
useCase: 'knowledge_sync',
subUseCase: 'slack_home_rank',
model: modelId,
provider,
usage: result.usage,
});
const validIds = new Set(relevantCandidates.map(candidate => candidate.id));
const ranked = result.object.rankedIds.filter(id => validIds.has(id));
const seen = new Set<string>();
const deduped = ranked.filter(id => {
if (seen.has(id)) return false;
seen.add(id);
return true;
});
return deduped.slice(0, limit);
} catch (error) {
console.warn('[SlackHomeRank] LLM ranking failed, falling back to recency:', error);
return timeRank(relevantCandidates, limit);
}
return timeRank(filterSlackHomeCandidatesForRelevance(candidates), limit);
}