mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-15 20:05:16 +02:00
feat: rank Slack home feed deterministically by recency
Drop the per-load LLM ranker (cost/latency/model dependency) in favor of a stronger deterministic filter + recency ordering. The filter now removes system messages, emoji/reaction-only posts, bare greetings/acks, and empty bodies, with a durable-signal escape hatch. Expand tests to one describe per noise class plus ordering/cap/volume coverage.
This commit is contained in:
parent
2421a40886
commit
0c1049154e
2 changed files with 162 additions and 138 deletions
|
|
@ -1,47 +1,126 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { filterSlackHomeCandidatesForRelevance, SlackHomeRankCandidate } from './rank_slack_home.js';
|
||||
import {
|
||||
filterSlackHomeCandidatesForRelevance,
|
||||
rankSlackHomeMessages,
|
||||
SlackHomeRankCandidate,
|
||||
} from './rank_slack_home.js';
|
||||
|
||||
function slackTs(dateMs: number): string {
|
||||
return `${Math.floor(dateMs / 1000)}.000000`;
|
||||
}
|
||||
|
||||
describe('Slack Home ranking filters', () => {
|
||||
it('drops stale routine standup logistics but keeps durable updates', () => {
|
||||
const now = Date.parse('2026-06-04T18:00:00Z');
|
||||
const nineHoursAgo = now - 9 * 60 * 60 * 1000;
|
||||
const twelveHoursAgo = now - 12 * 60 * 60 * 1000;
|
||||
const thirtyMinutesAgo = now - 30 * 60 * 1000;
|
||||
const NOW = Date.parse('2026-06-04T18:00:00Z');
|
||||
const recent = (text: string, id = text): SlackHomeRankCandidate => ({
|
||||
id,
|
||||
channelName: 'general',
|
||||
text,
|
||||
ts: slackTs(NOW - 5 * 60 * 1000),
|
||||
});
|
||||
|
||||
const candidates: SlackHomeRankCandidate[] = [
|
||||
{
|
||||
id: 'stale-standup-schedule',
|
||||
channelName: 'general',
|
||||
text: 'standup at 4pm possible?',
|
||||
ts: slackTs(nineHoursAgo),
|
||||
},
|
||||
{
|
||||
id: 'stale-standup-sick',
|
||||
channelName: 'general',
|
||||
text: 'ill skip todays standup I am having stomach ache and not feeling well',
|
||||
ts: slackTs(twelveHoursAgo),
|
||||
},
|
||||
{
|
||||
id: 'durable-issue-update',
|
||||
channelName: 'general',
|
||||
text: 'is the icon issue fixed for windows?',
|
||||
ts: slackTs(twelveHoursAgo),
|
||||
},
|
||||
{
|
||||
id: 'recent-standup-schedule',
|
||||
channelName: 'general',
|
||||
text: 'standup at 4pm possible?',
|
||||
ts: slackTs(thirtyMinutesAgo),
|
||||
},
|
||||
];
|
||||
function keptIds(candidates: SlackHomeRankCandidate[]): string[] {
|
||||
return filterSlackHomeCandidatesForRelevance(candidates, NOW).map(c => c.id);
|
||||
}
|
||||
|
||||
expect(filterSlackHomeCandidatesForRelevance(candidates, now).map(candidate => candidate.id)).toEqual([
|
||||
'durable-issue-update',
|
||||
'recent-standup-schedule',
|
||||
]);
|
||||
describe('filterSlackHomeCandidatesForRelevance', () => {
|
||||
describe('routine standup logistics', () => {
|
||||
it('drops stale standup logistics but keeps recent ones and durable updates', () => {
|
||||
const nineHoursAgo = NOW - 9 * 60 * 60 * 1000;
|
||||
const twelveHoursAgo = NOW - 12 * 60 * 60 * 1000;
|
||||
const thirtyMinutesAgo = NOW - 30 * 60 * 1000;
|
||||
|
||||
const candidates: SlackHomeRankCandidate[] = [
|
||||
{ id: 'stale-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(nineHoursAgo) },
|
||||
{ id: 'stale-standup-sick', channelName: 'general', text: 'ill skip todays standup I am having stomach ache and not feeling well', ts: slackTs(twelveHoursAgo) },
|
||||
{ id: 'durable-issue-update', channelName: 'general', text: 'is the icon issue fixed for windows?', ts: slackTs(twelveHoursAgo) },
|
||||
{ id: 'recent-standup-schedule', channelName: 'general', text: 'standup at 4pm possible?', ts: slackTs(thirtyMinutesAgo) },
|
||||
];
|
||||
|
||||
expect(keptIds(candidates)).toEqual(['durable-issue-update', 'recent-standup-schedule']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('system / automated messages', () => {
|
||||
it('drops channel join/leave, topic, rename and call notices', () => {
|
||||
const candidates = [
|
||||
recent('Alex has joined the channel', 'join'),
|
||||
recent('Sam has left the channel', 'leave'),
|
||||
recent('Alex set the channel topic: Q3 planning', 'topic'),
|
||||
recent('Sam renamed the channel to design-team', 'rename'),
|
||||
recent('Alex started a huddle', 'huddle'),
|
||||
recent('Real question: can someone review my PR?', 'real'),
|
||||
];
|
||||
expect(keptIds(candidates)).toEqual(['real']);
|
||||
});
|
||||
|
||||
it('keeps a system-shaped message that carries a durable signal', () => {
|
||||
const candidates = [recent('Priya set the channel topic: incident response war room', 'topic-incident')];
|
||||
expect(keptIds(candidates)).toEqual(['topic-incident']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('emoji / reaction-only', () => {
|
||||
it('drops emoji-only, shortcode-only and punctuation-only posts', () => {
|
||||
const candidates = [
|
||||
recent('👍', 'thumbs'),
|
||||
recent('🎉🎉🎉', 'party'),
|
||||
recent(':tada: :rocket:', 'shortcodes'),
|
||||
recent('!!!', 'punct'),
|
||||
recent('🚀 shipping the new pricing page today', 'real'),
|
||||
];
|
||||
expect(keptIds(candidates)).toEqual(['real']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('greetings / acknowledgements', () => {
|
||||
it('drops bare greetings and acks but keeps anything with content', () => {
|
||||
const candidates = [
|
||||
recent('thanks!', 'thanks'),
|
||||
recent('gm', 'gm'),
|
||||
recent('lgtm', 'lgtm'),
|
||||
recent('+1', 'plus1'),
|
||||
recent('sounds good', 'sg'),
|
||||
recent('ok', 'ok'),
|
||||
recent('ok, the deploy is blocked on the migration', 'ok-with-content'),
|
||||
recent('thanks for fixing the outage', 'thanks-durable'),
|
||||
];
|
||||
// 'ok-with-content' kept (has content); 'thanks-durable' kept (durable signal).
|
||||
expect(keptIds(candidates)).toEqual(['ok-with-content', 'thanks-durable']);
|
||||
});
|
||||
});
|
||||
|
||||
it('drops empty-text candidates', () => {
|
||||
expect(keptIds([recent(' ', 'blank'), recent('a real message here', 'real')])).toEqual(['real']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('rankSlackHomeMessages (deterministic)', () => {
|
||||
it('orders surviving candidates newest-first and caps at the limit', async () => {
|
||||
const mk = (id: string, minutesAgo: number): SlackHomeRankCandidate => ({
|
||||
id, channelName: 'general', text: `update ${id}`, ts: slackTs(NOW - minutesAgo * 60 * 1000),
|
||||
});
|
||||
const candidates = [mk('old', 50), mk('newest', 1), mk('mid', 20)];
|
||||
expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['newest', 'mid', 'old']);
|
||||
expect(await rankSlackHomeMessages(candidates, 2)).toEqual(['newest', 'mid']);
|
||||
});
|
||||
|
||||
it('filters noise before ranking', async () => {
|
||||
const candidates = [
|
||||
recent('👍', 'emoji'),
|
||||
recent('Alex has joined the channel', 'join'),
|
||||
recent('can you review the pricing proposal?', 'real'),
|
||||
];
|
||||
expect(await rankSlackHomeMessages(candidates, 5)).toEqual(['real']);
|
||||
});
|
||||
|
||||
it('handles a high-volume batch: caps output and preserves recency order', async () => {
|
||||
const candidates: SlackHomeRankCandidate[] = Array.from({ length: 150 }, (_, i) => ({
|
||||
id: `m${i}`,
|
||||
channelName: 'general',
|
||||
// i=0 is newest; larger i is older.
|
||||
text: `status update number ${i}`,
|
||||
ts: slackTs(NOW - i * 60 * 1000),
|
||||
}));
|
||||
const ranked = await rankSlackHomeMessages(candidates, 5);
|
||||
expect(ranked).toEqual(['m0', 'm1', 'm2', 'm3', 'm4']);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,14 +1,3 @@
|
|||
import { z } from 'zod';
|
||||
import { generateObject } from 'ai';
|
||||
import { createProvider } from '../../models/models.js';
|
||||
import {
|
||||
getDefaultModelAndProvider,
|
||||
getKgModel,
|
||||
resolveProviderConfig,
|
||||
} from '../../models/defaults.js';
|
||||
import { captureLlmUsage } from '../../analytics/usage.js';
|
||||
import { withUseCase } from '../../analytics/use_case.js';
|
||||
|
||||
export type SlackHomeRankCandidate = {
|
||||
id: string;
|
||||
workspaceName?: string;
|
||||
|
|
@ -18,15 +7,32 @@ export type SlackHomeRankCandidate = {
|
|||
ts: string;
|
||||
};
|
||||
|
||||
const RankedSlackMessagesSchema = z.object({
|
||||
rankedIds: z.array(z.string()).describe('Message ids in the order they should appear on Home.'),
|
||||
});
|
||||
|
||||
const EXPIRED_ROUTINE_AGE_MS = 2 * 60 * 60 * 1000;
|
||||
const ROUTINE_EVENT_RE = /\b(stand[-\s]?up|daily\s+(sync|scrum|standup)|scrum|check[-\s]?in)\b/i;
|
||||
const ROUTINE_LOGISTICS_RE = /\b(skip|skipping|miss|missing|can't|cannot|cant|won't|wont|join|attend|possible|move|reschedule|shift|late|running\s+late|stomach|sick|not\s+feeling|headache|doctor|appointment|today|todays|today's|tomorrow|at\s+\d{1,2}(:\d{2})?\s*(am|pm)?)\b/i;
|
||||
|
||||
// Durable signals always win: a message matching any of these is kept even if
|
||||
// it would otherwise look like noise (a system message, a "done", etc.).
|
||||
const DURABLE_SIGNAL_RE = /\b(blocker|blocked|decision|decided|owner|deadline|shipped|fixed|done|launched|deployed|merged|bug|issue|incident|outage|customer|contract|pricing|proposal|launch|release|handoff|review|approval|approved)\b/i;
|
||||
|
||||
// Slack system / automated messages render as plain narration like
|
||||
// "<name> has joined the channel". They carry no human content, so drop them.
|
||||
const SYSTEM_MESSAGE_RE = /\b(has joined the channel|has left the channel|was added to|has been added|set the channel (topic|purpose|description)|cleared the channel (topic|purpose)|renamed the channel|archived the channel|un-?archived the channel|pinned a message|joined the (call|huddle)|started a (call|huddle)|set up a call)\b/i;
|
||||
|
||||
// Greetings / acknowledgements with no informational content. Anchored to the
|
||||
// whole (trimmed) message so "ok" drops but "ok, the deploy is blocked" stays.
|
||||
const TRIVIAL_RE = /^(hi|hello+|hey+|yo|gm|gn|good\s*(morning|night|evening|afternoon)|morning|thanks?|thank\s*you|ty|thx|tysm|np|no\s*problem|ok(ay)?|k|got\s*it|gotcha|lgtm|\+1|nice|cool|great|awesome|perfect|done|yes+|yep|yup|no+|nope|sure|sounds?\s*good|sg|welcome|congrats?|congratulations)[\s.!?]*$/i;
|
||||
|
||||
const EMOJI_SHORTCODE_RE = /:[a-z0-9_+-]+:/gi;
|
||||
|
||||
function slackTsToMs(ts: string): number | null {
|
||||
const seconds = Number(ts.split('.')[0]);
|
||||
if (!Number.isFinite(seconds)) return null;
|
||||
return seconds * 1000;
|
||||
}
|
||||
|
||||
// Newest-first recency ordering, capped at limit. The Home card shows "latest
|
||||
// messages", so recency is the ordering once noise is filtered out.
|
||||
function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[] {
|
||||
return [...candidates]
|
||||
.sort((a, b) => Number(b.ts) - Number(a.ts))
|
||||
|
|
@ -34,10 +40,13 @@ function timeRank(candidates: SlackHomeRankCandidate[], limit: number): string[]
|
|||
.map(candidate => candidate.id);
|
||||
}
|
||||
|
||||
function slackTsToMs(ts: string): number | null {
|
||||
const seconds = Number(ts.split('.')[0]);
|
||||
if (!Number.isFinite(seconds)) return null;
|
||||
return seconds * 1000;
|
||||
// What remains after removing :shortcodes:, unicode emoji/symbols, punctuation
|
||||
// and whitespace. Empty ⇒ the message was emoji/reaction-only.
|
||||
function strippedToCore(text: string): string {
|
||||
return text
|
||||
.replace(EMOJI_SHORTCODE_RE, '')
|
||||
.replace(/[\s\p{P}\p{S}]/gu, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: number): boolean {
|
||||
|
|
@ -52,96 +61,32 @@ function isExpiredRoutineLogistics(candidate: SlackHomeRankCandidate, nowMs: num
|
|||
return ROUTINE_LOGISTICS_RE.test(text);
|
||||
}
|
||||
|
||||
// Low-value classes that never belong on Home: empty bodies, Slack system
|
||||
// messages, emoji/reaction-only posts, and bare greetings/acks. A durable
|
||||
// signal overrides all of these.
|
||||
function isLowValueNoise(candidate: SlackHomeRankCandidate): boolean {
|
||||
const text = candidate.text.replace(/\s+/g, ' ').trim();
|
||||
if (!text) return true;
|
||||
if (DURABLE_SIGNAL_RE.test(text)) return false;
|
||||
if (SYSTEM_MESSAGE_RE.test(text)) return true;
|
||||
if (TRIVIAL_RE.test(text)) return true;
|
||||
return strippedToCore(text).length === 0;
|
||||
}
|
||||
|
||||
export function filterSlackHomeCandidatesForRelevance(
|
||||
candidates: SlackHomeRankCandidate[],
|
||||
nowMs = Date.now(),
|
||||
): SlackHomeRankCandidate[] {
|
||||
return candidates.filter(candidate => !isExpiredRoutineLogistics(candidate, nowMs));
|
||||
}
|
||||
|
||||
function truncate(value: string, max: number): string {
|
||||
return value.length <= max ? value : `${value.slice(0, max)}...`;
|
||||
}
|
||||
|
||||
function buildPrompt(candidates: SlackHomeRankCandidate[], limit: number): string {
|
||||
const messages = candidates.map((candidate, index) => {
|
||||
const date = Number.isFinite(Number(candidate.ts))
|
||||
? new Date(Number(candidate.ts.split('.')[0]) * 1000).toISOString()
|
||||
: candidate.ts;
|
||||
return [
|
||||
`## ${index + 1}. ${candidate.id}`,
|
||||
`Workspace: ${candidate.workspaceName ?? 'unknown'}`,
|
||||
`Channel: ${candidate.channelName ?? 'unknown'}`,
|
||||
`Author: ${candidate.author ?? 'unknown'}`,
|
||||
`Time: ${date}`,
|
||||
`Text: ${truncate(candidate.text.replace(/\s+/g, ' ').trim(), 700)}`,
|
||||
].join('\n');
|
||||
}).join('\n\n');
|
||||
|
||||
return `Choose up to ${limit} Slack messages to show on the user's Home screen.
|
||||
|
||||
Prioritize messages that are likely useful at a glance:
|
||||
- direct questions or requests to the user
|
||||
- decisions, blockers, owners, deadlines, status changes, or shipped/fixed/done updates
|
||||
- project/customer/product updates
|
||||
- messages with clear actionability or durable knowledge
|
||||
|
||||
Deprioritize:
|
||||
- greetings, thanks, jokes, reactions, short acknowledgements, bot noise
|
||||
- vague chatter without clear project/action relevance
|
||||
- near-duplicates of the same point
|
||||
- routine logistics whose value expires quickly, such as standup scheduling, standup attendance, sick notes, lunch/commute coordination, and "can we move this?" chatter once the event is likely past
|
||||
|
||||
Return only ids from the candidate list. You may return fewer than ${limit} ids if fewer messages are useful. Prefer relevance over recency, but use recency as a tiebreaker.
|
||||
|
||||
# Candidates
|
||||
|
||||
${messages}`;
|
||||
return candidates.filter(candidate =>
|
||||
!isExpiredRoutineLogistics(candidate, nowMs) && !isLowValueNoise(candidate));
|
||||
}
|
||||
|
||||
// Deterministic Home feed: drop noise, then order by recency and cap. No LLM
|
||||
// call — the filter does the de-noising and recency does the ordering.
|
||||
// (kept async so the IPC caller's contract is unchanged.)
|
||||
export async function rankSlackHomeMessages(
|
||||
candidates: SlackHomeRankCandidate[],
|
||||
limit: number,
|
||||
): Promise<string[]> {
|
||||
const relevantCandidates = filterSlackHomeCandidatesForRelevance(candidates);
|
||||
|
||||
if (relevantCandidates.length <= limit) {
|
||||
return timeRank(relevantCandidates, limit);
|
||||
}
|
||||
|
||||
try {
|
||||
const modelId = await getKgModel();
|
||||
const { provider } = await getDefaultModelAndProvider();
|
||||
const config = await resolveProviderConfig(provider);
|
||||
const model = createProvider(config).languageModel(modelId);
|
||||
|
||||
const result = await withUseCase({ useCase: 'knowledge_sync', subUseCase: 'slack_home_rank' }, () => generateObject({
|
||||
model,
|
||||
system: 'You rank Slack messages for a personal productivity Home screen. Be selective and return valid ids only.',
|
||||
prompt: buildPrompt(relevantCandidates, limit),
|
||||
schema: RankedSlackMessagesSchema,
|
||||
}));
|
||||
|
||||
captureLlmUsage({
|
||||
useCase: 'knowledge_sync',
|
||||
subUseCase: 'slack_home_rank',
|
||||
model: modelId,
|
||||
provider,
|
||||
usage: result.usage,
|
||||
});
|
||||
|
||||
const validIds = new Set(relevantCandidates.map(candidate => candidate.id));
|
||||
const ranked = result.object.rankedIds.filter(id => validIds.has(id));
|
||||
const seen = new Set<string>();
|
||||
const deduped = ranked.filter(id => {
|
||||
if (seen.has(id)) return false;
|
||||
seen.add(id);
|
||||
return true;
|
||||
});
|
||||
|
||||
return deduped.slice(0, limit);
|
||||
} catch (error) {
|
||||
console.warn('[SlackHomeRank] LLM ranking failed, falling back to recency:', error);
|
||||
return timeRank(relevantCandidates, limit);
|
||||
}
|
||||
return timeRank(filterSlackHomeCandidatesForRelevance(candidates), limit);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue