mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-24 23:56:21 +02:00
Refactor issue dedup system to use claude-code-action with /dedupe command
Replace the copilot-generated inline search logic with a claude-code-action based architecture inspired by anthropic/claude-code's approach: - Add .claude/commands/dedupe.md with 5-parallel-search strategy - Add scripts/comment-on-duplicates.sh with 3-day grace period warning - Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command - Rewrite autoclose script to check bot comments, human activity, and thumbsdown - Rewrite backfill to trigger dedupe workflow per issue with rate limiting - Add concurrency control, timeout, input validation, and rate limit retry - Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger)
This commit is contained in:
parent
b3cb9531a4
commit
fd9330c434
8 changed files with 413 additions and 752 deletions
|
|
@ -1,34 +1,32 @@
|
|||
/**
|
||||
* scripts/autoclose-labeled-issues.js
|
||||
*
|
||||
* Closes open issues that carry the "autoclose" label and have been inactive
|
||||
* (no updates) for more than INACTIVITY_DAYS days.
|
||||
* Auto-closes issues that have a bot "possible duplicate" comment older than
|
||||
* 3 days, unless:
|
||||
* - A human has commented after the bot's duplicate comment
|
||||
* - The author reacted with thumbs-down on the duplicate comment
|
||||
*
|
||||
* Required environment variables:
|
||||
* GITHUB_TOKEN – GitHub Actions token (or PAT with repo:issues write access)
|
||||
* REPO_OWNER – Repository owner (e.g. VectifyAI)
|
||||
* REPO_NAME – Repository name (e.g. PageIndex)
|
||||
* GITHUB_TOKEN - GitHub Actions token
|
||||
* REPO_OWNER - Repository owner
|
||||
* REPO_NAME - Repository name
|
||||
*
|
||||
* Optional environment variables:
|
||||
* INACTIVITY_DAYS – Days of inactivity before closing (default: 7)
|
||||
* DRY_RUN – If "true", report but do not close issues (default: false)
|
||||
* Optional:
|
||||
* DRY_RUN - If "true", report but do not close (default: false)
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
const https = require('https');
|
||||
|
||||
// ── Configuration ─────────────────────────────────────────────────────────────
|
||||
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
|
||||
const REPO_OWNER = process.env.REPO_OWNER;
|
||||
const REPO_NAME = process.env.REPO_NAME;
|
||||
const DRY_RUN = process.env.DRY_RUN === 'true';
|
||||
|
||||
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
|
||||
const REPO_OWNER = process.env.REPO_OWNER;
|
||||
const REPO_NAME = process.env.REPO_NAME;
|
||||
const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10);
|
||||
const DRY_RUN = process.env.DRY_RUN === 'true';
|
||||
const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000;
|
||||
|
||||
// ── HTTP helper ───────────────────────────────────────────────────────────────
|
||||
|
||||
function githubRequest(method, path, body = null) {
|
||||
function githubRequest(method, path, body = null, retried = false) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const payload = body ? JSON.stringify(body) : null;
|
||||
const options = {
|
||||
|
|
@ -37,29 +35,31 @@ function githubRequest(method, path, body = null) {
|
|||
method,
|
||||
headers: {
|
||||
'Authorization': `Bearer ${GITHUB_TOKEN}`,
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'User-Agent': 'PageIndex-Autoclose-Script/1.0',
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'User-Agent': 'PageIndex-Autoclose/1.0',
|
||||
'X-GitHub-Api-Version': '2022-11-28',
|
||||
...(payload ? {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': Buffer.byteLength(payload),
|
||||
} : {}),
|
||||
...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}),
|
||||
},
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let data = '';
|
||||
res.on('data', chunk => (data += chunk));
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 400) {
|
||||
reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`));
|
||||
res.on('end', async () => {
|
||||
if ((res.statusCode === 403 || res.statusCode === 429) && !retried) {
|
||||
const retryAfter = parseInt(res.headers['retry-after'] || '60', 10);
|
||||
console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`);
|
||||
await sleep(retryAfter * 1000);
|
||||
try { resolve(await githubRequest(method, path, body, true)); }
|
||||
catch (err) { reject(err); }
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(data ? JSON.parse(data) : {});
|
||||
} catch {
|
||||
resolve({});
|
||||
if (res.statusCode >= 400) {
|
||||
reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`));
|
||||
return;
|
||||
}
|
||||
try { resolve(data ? JSON.parse(data) : {}); }
|
||||
catch { resolve({}); }
|
||||
});
|
||||
});
|
||||
req.on('error', reject);
|
||||
|
|
@ -68,113 +68,162 @@ function githubRequest(method, path, body = null) {
|
|||
});
|
||||
}
|
||||
|
||||
/** Simple sleep helper for rate-limiting. */
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
// ── Core logic ────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetches all open issues with the "autoclose" label, paginating as needed.
|
||||
* Fetches open issues with the "duplicate" label, paginating as needed.
|
||||
* Only returns issues created more than 3 days ago.
|
||||
*/
|
||||
async function fetchAutocloseIssues() {
|
||||
async function fetchDuplicateIssues() {
|
||||
const issues = [];
|
||||
let page = 1;
|
||||
while (true) {
|
||||
const data = await githubRequest(
|
||||
'GET',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}`
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=duplicate&per_page=100&page=${page}`
|
||||
);
|
||||
if (!Array.isArray(data) || data.length === 0) break;
|
||||
// Filter out any pull requests that may surface
|
||||
issues.push(...data.filter(i => !i.pull_request));
|
||||
if (data.length < 100) break;
|
||||
page++;
|
||||
}
|
||||
return issues;
|
||||
|
||||
const cutoff = new Date(Date.now() - THREE_DAYS_MS);
|
||||
return issues.filter(i => new Date(i.created_at) < cutoff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes a single issue with a polite explanatory comment.
|
||||
* Finds the bot's duplicate comment on an issue (contains "possible duplicate").
|
||||
*/
|
||||
async function closeIssue(issueNumber, inactivityDays) {
|
||||
const body =
|
||||
`This issue has been automatically closed because it was marked as a **duplicate** ` +
|
||||
`and has had no new activity for ${inactivityDays} day(s).\n\n` +
|
||||
`If you believe this was closed in error, please reopen the issue and leave a comment. ` +
|
||||
`New human activity will prevent automatic closure in the future.\n\n` +
|
||||
`Thank you for your contribution! 🙏`;
|
||||
function findDuplicateComment(comments) {
|
||||
return comments.find(c =>
|
||||
(c.user.type === 'Bot' || c.user.login === 'github-actions[bot]') &&
|
||||
c.body.includes('possible duplicate')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if there are human comments after the duplicate comment.
|
||||
*/
|
||||
function hasHumanCommentAfter(comments, afterDate) {
|
||||
return comments.some(c => {
|
||||
if (c.user.type === 'Bot' || c.user.login.endsWith('[bot]') || c.user.login === 'github-actions') {
|
||||
return false;
|
||||
}
|
||||
return new Date(c.created_at) > afterDate;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the duplicate comment has a thumbs-down reaction.
|
||||
*/
|
||||
async function hasThumbsDownReaction(commentId) {
|
||||
const reactions = await githubRequest(
|
||||
'GET',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/comments/${commentId}/reactions`
|
||||
);
|
||||
return Array.isArray(reactions) && reactions.some(r => r.content === '-1');
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes an issue as duplicate with a comment.
|
||||
*/
|
||||
async function closeAsDuplicate(issueNumber) {
|
||||
const body =
|
||||
'This issue has been automatically closed as a duplicate. ' +
|
||||
'No human activity or objection was received within the 3-day grace period.\n\n' +
|
||||
'If you believe this was closed in error, please reopen the issue and leave a comment.';
|
||||
|
||||
// Post closing comment first
|
||||
await githubRequest(
|
||||
'POST',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
|
||||
{ body }
|
||||
);
|
||||
|
||||
// Close the issue
|
||||
await githubRequest(
|
||||
'PATCH',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`,
|
||||
{ state: 'closed', state_reason: 'not_planned' }
|
||||
{ state: 'closed', state_reason: 'completed' }
|
||||
);
|
||||
|
||||
await githubRequest(
|
||||
'POST',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
|
||||
{ labels: ['duplicate'] }
|
||||
);
|
||||
}
|
||||
|
||||
// ── Entry point ───────────────────────────────────────────────────────────────
|
||||
async function processIssue(issue) {
|
||||
const num = issue.number;
|
||||
console.log(`\nChecking issue #${num}: ${issue.title}`);
|
||||
|
||||
const comments = await githubRequest(
|
||||
'GET',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${num}/comments?per_page=100`
|
||||
);
|
||||
|
||||
if (!Array.isArray(comments)) {
|
||||
console.log(` -> Could not fetch comments, skipping.`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const dupeComment = findDuplicateComment(comments);
|
||||
if (!dupeComment) {
|
||||
console.log(` -> No duplicate comment found, skipping.`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const commentDate = new Date(dupeComment.created_at);
|
||||
const ageMs = Date.now() - commentDate.getTime();
|
||||
|
||||
if (ageMs < THREE_DAYS_MS) {
|
||||
const daysLeft = Math.ceil((THREE_DAYS_MS - ageMs) / (24 * 60 * 60 * 1000));
|
||||
console.log(` -> Duplicate comment is less than 3 days old (${daysLeft}d remaining), skipping.`);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hasHumanCommentAfter(comments, commentDate)) {
|
||||
console.log(` -> Human commented after duplicate comment, skipping.`);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (await hasThumbsDownReaction(dupeComment.id)) {
|
||||
console.log(` -> Author reacted with thumbs-down, skipping.`);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DRY_RUN) {
|
||||
console.log(` [DRY RUN] Would close issue #${num}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
await closeAsDuplicate(num);
|
||||
console.log(` -> Closed issue #${num} as duplicate`);
|
||||
return true;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// Validate required env vars
|
||||
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME']
|
||||
.filter(k => !process.env[k]);
|
||||
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'].filter(k => !process.env[k]);
|
||||
if (missing.length) {
|
||||
console.error(`Missing required environment variables: ${missing.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000);
|
||||
console.log('Auto-close duplicate issues');
|
||||
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
|
||||
console.log(` Dry run: ${DRY_RUN}`);
|
||||
|
||||
console.log(`Auto-close inactive labelled issues`);
|
||||
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
|
||||
console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`);
|
||||
console.log(` Dry run: ${DRY_RUN}`);
|
||||
|
||||
const issues = await fetchAutocloseIssues();
|
||||
console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`);
|
||||
const issues = await fetchDuplicateIssues();
|
||||
console.log(`\nFound ${issues.length} duplicate-labeled issue(s) older than 3 days.`);
|
||||
|
||||
let closedCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
for (const issue of issues) {
|
||||
const lastActivity = new Date(issue.updated_at);
|
||||
const inactive = lastActivity < cutoff;
|
||||
const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24));
|
||||
|
||||
if (!inactive) {
|
||||
console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`);
|
||||
|
||||
if (DRY_RUN) {
|
||||
console.log(` [DRY RUN] Would close issue #${issue.number}`);
|
||||
closedCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await closeIssue(issue.number, INACTIVITY_DAYS);
|
||||
console.log(` ✅ Closed issue #${issue.number}`);
|
||||
closedCount++;
|
||||
} catch (err) {
|
||||
console.error(` ❌ Failed to close #${issue.number}: ${err.message}`);
|
||||
}
|
||||
|
||||
// Respect GitHub's secondary rate limit
|
||||
const closed = await processIssue(issue);
|
||||
if (closed) closedCount++;
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`);
|
||||
console.log(`\nSummary: ${closedCount} issue(s) closed.`);
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
|
|
|
|||
|
|
@ -1,370 +0,0 @@
|
|||
/**
|
||||
* scripts/backfill-dedupe.js
|
||||
*
|
||||
* Backfills duplicate detection for historical issues.
|
||||
* Fetches issues created within the last DAYS_BACK days, searches for
|
||||
* candidate duplicates via the GitHub Search API, and asks the Anthropic
|
||||
* API to determine whether each issue is a duplicate.
|
||||
*
|
||||
* Required environment variables:
|
||||
* GITHUB_TOKEN – GitHub Actions token (or PAT with repo access)
|
||||
* ANTHROPIC_API_KEY – Anthropic API key (mapped from AUTHROPIC_API_KEY secret)
|
||||
* REPO_OWNER – Repository owner (e.g. VectifyAI)
|
||||
* REPO_NAME – Repository name (e.g. PageIndex)
|
||||
*
|
||||
* Optional environment variables:
|
||||
* DAYS_BACK – How many days back to process (default: 30)
|
||||
* DRY_RUN – If "true", analyse but do not write to GitHub (default: false)
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
const https = require('https');
|
||||
|
||||
// ── Configuration ─────────────────────────────────────────────────────────────
|
||||
|
||||
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
|
||||
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
|
||||
const REPO_OWNER = process.env.REPO_OWNER;
|
||||
const REPO_NAME = process.env.REPO_NAME;
|
||||
const DAYS_BACK = parseInt(process.env.DAYS_BACK || '30', 10);
|
||||
const DRY_RUN = process.env.DRY_RUN === 'true';
|
||||
|
||||
const STOP_WORDS = new Set([
|
||||
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
|
||||
'with','this','that','it','be','are','was','has','have','does','do','how',
|
||||
'why','when','where','what','which','who','will','can','could','should',
|
||||
'would','may','might','must','get','got','use','using','used','error',
|
||||
'issue','bug','feature','request','problem','question','please','just',
|
||||
'after','before','during','about','from','into','also','then','than',
|
||||
]);
|
||||
|
||||
// ── HTTP helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Makes an authenticated GitHub REST API request.
|
||||
* @param {string} method HTTP method
|
||||
* @param {string} path API path (e.g. '/repos/owner/repo/issues')
|
||||
* @param {object|null} body Request body (will be JSON-encoded)
|
||||
* @returns {Promise<object>}
|
||||
*/
|
||||
function githubRequest(method, path, body = null) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const payload = body ? JSON.stringify(body) : null;
|
||||
const options = {
|
||||
hostname: 'api.github.com',
|
||||
path,
|
||||
method,
|
||||
headers: {
|
||||
'Authorization': `Bearer ${GITHUB_TOKEN}`,
|
||||
'Accept': 'application/vnd.github+json',
|
||||
'User-Agent': 'PageIndex-Backfill-Script/1.0',
|
||||
'X-GitHub-Api-Version': '2022-11-28',
|
||||
...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}),
|
||||
},
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let data = '';
|
||||
res.on('data', chunk => (data += chunk));
|
||||
res.on('end', () => {
|
||||
if (res.statusCode >= 400) {
|
||||
reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(data ? JSON.parse(data) : {});
|
||||
} catch {
|
||||
resolve({});
|
||||
}
|
||||
});
|
||||
});
|
||||
req.on('error', reject);
|
||||
if (payload) req.write(payload);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the Anthropic Messages API and returns Claude's text response.
|
||||
* @param {string} prompt User prompt
|
||||
* @returns {Promise<string>}
|
||||
*/
|
||||
function callClaude(prompt) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const body = JSON.stringify({
|
||||
model: 'claude-haiku-4-5',
|
||||
max_tokens: 1024,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
const options = {
|
||||
hostname: 'api.anthropic.com',
|
||||
path: '/v1/messages',
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': Buffer.byteLength(body),
|
||||
'x-api-key': ANTHROPIC_API_KEY,
|
||||
'anthropic-version': '2023-06-01',
|
||||
},
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let data = '';
|
||||
res.on('data', chunk => (data += chunk));
|
||||
res.on('end', () => {
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
if (parsed.error) {
|
||||
reject(new Error(`Anthropic API error: ${parsed.error.message}`));
|
||||
return;
|
||||
}
|
||||
const text = (parsed.content || [])
|
||||
.filter(b => b.type === 'text')
|
||||
.map(b => b.text)
|
||||
.join('');
|
||||
resolve(text);
|
||||
} catch (err) {
|
||||
reject(new Error(`Failed to parse Anthropic response: ${err.message}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
req.on('error', reject);
|
||||
req.write(body);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
/** Simple sleep helper for rate-limiting. */
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
// ── Core logic ────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetches open issues created since `since` (ISO 8601 string), paginating as needed.
|
||||
*/
|
||||
async function fetchIssuesSince(since) {
|
||||
const issues = [];
|
||||
let page = 1;
|
||||
while (true) {
|
||||
const data = await githubRequest(
|
||||
'GET',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&sort=created&direction=desc&since=${since}&per_page=100&page=${page}`
|
||||
);
|
||||
if (!Array.isArray(data) || data.length === 0) break;
|
||||
// Filter out pull requests
|
||||
issues.push(...data.filter(i => !i.pull_request));
|
||||
if (data.length < 100) break;
|
||||
page++;
|
||||
}
|
||||
return issues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for up to 10 candidate duplicate issues for the given issue.
|
||||
*/
|
||||
async function findCandidates(issue) {
|
||||
const keywords = (issue.title || '')
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9\s]/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length > 2 && !STOP_WORDS.has(w))
|
||||
.slice(0, 6)
|
||||
.join(' ');
|
||||
|
||||
if (!keywords) return [];
|
||||
|
||||
const q = encodeURIComponent(
|
||||
`repo:${REPO_OWNER}/${REPO_NAME} is:issue state:open ${keywords}`
|
||||
);
|
||||
|
||||
const data = await githubRequest('GET', `/search/issues?q=${q}&per_page=15`);
|
||||
return (data.items || [])
|
||||
.filter(item => item.number !== issue.number && !item.pull_request)
|
||||
.slice(0, 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the duplicate-detection prompt for Claude.
|
||||
*/
|
||||
function buildPrompt(issue, candidates) {
|
||||
const candidatesText = candidates
|
||||
.map(c => `#${c.number}: ${c.title}\nURL: ${c.html_url}\n${(c.body || '').substring(0, 500)}`)
|
||||
.join('\n---\n');
|
||||
|
||||
return `You are a GitHub issue triage assistant.
|
||||
|
||||
Analyze whether the following open issue is a duplicate of any of the candidate issues listed below.
|
||||
|
||||
== NEW ISSUE #${issue.number} ==
|
||||
Title: ${issue.title}
|
||||
Body:
|
||||
${(issue.body || '(no body)').substring(0, 3000)}
|
||||
|
||||
== CANDIDATE ISSUES (up to 10) ==
|
||||
${candidatesText}
|
||||
|
||||
RULES:
|
||||
- Only flag as a duplicate if you are at least 85% confident.
|
||||
- A minor difference in wording does NOT make an issue non-duplicate if they describe the same underlying problem or feature request.
|
||||
|
||||
Respond with ONLY a JSON object (no markdown, no other text):
|
||||
{
|
||||
"is_duplicate": true or false,
|
||||
"duplicate_issues": [array of integer issue numbers that this is a duplicate of, empty if none],
|
||||
"explanation": "one or two sentences explaining your reasoning"
|
||||
}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses Claude's JSON response robustly.
|
||||
* Returns { is_duplicate, duplicate_issues, explanation } or null on failure.
|
||||
*/
|
||||
function parseClaudeResponse(text) {
|
||||
// Try to extract a JSON object from the response
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) return null;
|
||||
try {
|
||||
const parsed = JSON.parse(jsonMatch[0]);
|
||||
return {
|
||||
is_duplicate: Boolean(parsed.is_duplicate),
|
||||
duplicate_issues: Array.isArray(parsed.duplicate_issues) ? parsed.duplicate_issues.map(Number) : [],
|
||||
explanation: String(parsed.explanation || ''),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Posts a duplicate-found comment on the issue.
|
||||
*/
|
||||
async function postDuplicateComment(issueNumber, duplicateIssueNumbers, explanation) {
|
||||
const links = duplicateIssueNumbers
|
||||
.map(n => `- #${n}`)
|
||||
.join('\n');
|
||||
|
||||
const body =
|
||||
`👋 Thank you for taking the time to open this issue!\n\n` +
|
||||
`After automated analysis, this issue appears to be a duplicate of:\n\n` +
|
||||
`${links}\n\n` +
|
||||
`${explanation}\n\n` +
|
||||
`Please subscribe to the original issue(s) above to follow updates. ` +
|
||||
`This issue will be automatically closed after a short inactivity period.\n\n` +
|
||||
`<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":${JSON.stringify(duplicateIssueNumbers)}} -->`;
|
||||
|
||||
await githubRequest(
|
||||
'POST',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
|
||||
{ body }
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds labels to an issue, creating them if they do not exist.
|
||||
*/
|
||||
async function ensureLabelAndApply(issueNumber, labelNames) {
|
||||
const knownLabels = {
|
||||
duplicate: { color: 'cfd3d7', description: 'This issue or pull request already exists' },
|
||||
autoclose: { color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
|
||||
};
|
||||
|
||||
for (const name of labelNames) {
|
||||
try {
|
||||
await githubRequest('GET', `/repos/${REPO_OWNER}/${REPO_NAME}/labels/${encodeURIComponent(name)}`);
|
||||
} catch {
|
||||
const meta = knownLabels[name] || { color: 'ededed', description: '' };
|
||||
await githubRequest('POST', `/repos/${REPO_OWNER}/${REPO_NAME}/labels`, { name, ...meta });
|
||||
}
|
||||
}
|
||||
|
||||
await githubRequest(
|
||||
'POST',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
|
||||
{ labels: labelNames }
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single issue: finds candidates, asks Claude, and acts on the result.
|
||||
*/
|
||||
async function processIssue(issue) {
|
||||
const num = issue.number;
|
||||
console.log(`\nProcessing issue #${num}: ${issue.title}`);
|
||||
|
||||
// Skip already-labelled issues
|
||||
const existingLabels = (issue.labels || []).map(l => l.name);
|
||||
if (existingLabels.includes('duplicate')) {
|
||||
console.log(` → Already labelled as duplicate, skipping.`);
|
||||
return;
|
||||
}
|
||||
|
||||
const candidates = await findCandidates(issue);
|
||||
if (candidates.length === 0) {
|
||||
console.log(` → No candidates found, skipping.`);
|
||||
return;
|
||||
}
|
||||
console.log(` → Found ${candidates.length} candidate(s): ${candidates.map(c => `#${c.number}`).join(', ')}`);
|
||||
|
||||
const prompt = buildPrompt(issue, candidates);
|
||||
const rawReply = await callClaude(prompt);
|
||||
const result = parseClaudeResponse(rawReply);
|
||||
|
||||
if (!result) {
|
||||
console.warn(` ⚠️ Could not parse Claude response for #${num}. Raw:\n${rawReply.substring(0, 300)}`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(` → is_duplicate=${result.is_duplicate}, issues=${JSON.stringify(result.duplicate_issues)}`);
|
||||
console.log(` ${result.explanation}`);
|
||||
|
||||
if (!result.is_duplicate || result.duplicate_issues.length === 0) {
|
||||
console.log(` → Not a duplicate.`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (DRY_RUN) {
|
||||
console.log(` [DRY RUN] Would post comment and apply labels to #${num}`);
|
||||
return;
|
||||
}
|
||||
|
||||
await postDuplicateComment(num, result.duplicate_issues, result.explanation);
|
||||
await ensureLabelAndApply(num, ['duplicate', 'autoclose']);
|
||||
console.log(` ✅ Commented and labelled #${num}`);
|
||||
}
|
||||
|
||||
// ── Entry point ───────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
// Validate required env vars
|
||||
const missing = ['GITHUB_TOKEN', 'ANTHROPIC_API_KEY', 'REPO_OWNER', 'REPO_NAME']
|
||||
.filter(k => !process.env[k]);
|
||||
if (missing.length) {
|
||||
console.error(`Missing required environment variables: ${missing.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const since = new Date(Date.now() - DAYS_BACK * 24 * 60 * 60 * 1000).toISOString();
|
||||
|
||||
console.log(`Backfilling duplicate detection`);
|
||||
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
|
||||
console.log(` Days back: ${DAYS_BACK} (since ${since})`);
|
||||
console.log(` Dry run: ${DRY_RUN}`);
|
||||
|
||||
const issues = await fetchIssuesSince(since);
|
||||
console.log(`\nFetched ${issues.length} open issue(s) to process.`);
|
||||
|
||||
for (const issue of issues) {
|
||||
await processIssue(issue);
|
||||
// Respect GitHub and Anthropic rate limits
|
||||
await sleep(2500);
|
||||
}
|
||||
|
||||
console.log('\nBackfill complete.');
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Fatal error:', err.message);
|
||||
process.exit(1);
|
||||
});
|
||||
106
scripts/comment-on-duplicates.sh
Executable file
106
scripts/comment-on-duplicates.sh
Executable file
|
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# comment-on-duplicates.sh - Posts a duplicate issue comment with auto-close warning.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/comment-on-duplicates.sh --base-issue 123 --potential-duplicates 456 789
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
REPO="${GITHUB_REPOSITORY:-}"
|
||||
if [ -z "$REPO" ]; then
|
||||
echo "Error: GITHUB_REPOSITORY is not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BASE_ISSUE=""
|
||||
DUPLICATES=()
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--base-issue)
|
||||
BASE_ISSUE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--potential-duplicates)
|
||||
shift
|
||||
while [[ $# -gt 0 && ! "$1" =~ ^-- ]]; do
|
||||
DUPLICATES+=("$1")
|
||||
shift
|
||||
done
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate inputs
|
||||
if [ -z "$BASE_ISSUE" ]; then
|
||||
echo "Error: --base-issue is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! [[ "$BASE_ISSUE" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: --base-issue must be a number, got: $BASE_ISSUE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ${#DUPLICATES[@]} -eq 0 ]; then
|
||||
echo "Error: --potential-duplicates requires at least one issue number" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for dup in "${DUPLICATES[@]}"; do
|
||||
if ! [[ "$dup" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: duplicate issue must be a number, got: $dup" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Limit to 3 duplicates max
|
||||
if [ ${#DUPLICATES[@]} -gt 3 ]; then
|
||||
echo "Warning: Limiting to first 3 duplicates" >&2
|
||||
DUPLICATES=("${DUPLICATES[@]:0:3}")
|
||||
fi
|
||||
|
||||
# Validate that the base issue exists and is open
|
||||
if ! gh issue view "$BASE_ISSUE" --repo "$REPO" --json state -q '.state' | grep -qi 'open'; then
|
||||
echo "Error: Issue #$BASE_ISSUE is not open or does not exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build the duplicate links list
|
||||
LINKS=""
|
||||
COUNT=0
|
||||
for dup in "${DUPLICATES[@]}"; do
|
||||
# Validate duplicate issue exists
|
||||
if gh issue view "$dup" --repo "$REPO" --json number -q '.number' > /dev/null 2>&1; then
|
||||
COUNT=$((COUNT + 1))
|
||||
LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup}
|
||||
"
|
||||
else
|
||||
echo "Warning: Issue #$dup does not exist, skipping" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$COUNT" -eq 0 ]; then
|
||||
echo "Error: None of the specified duplicate issues exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build and post the comment
|
||||
COMMENT="Found ${COUNT} possible duplicate issue(s):
|
||||
|
||||
${LINKS}
|
||||
This issue will be automatically closed as a duplicate in 3 days.
|
||||
- To prevent auto-closure, add a comment or react with :thumbsdown: on this comment."
|
||||
|
||||
gh issue comment "$BASE_ISSUE" --repo "$REPO" --body "$COMMENT"
|
||||
|
||||
# Add the duplicate label
|
||||
gh issue edit "$BASE_ISSUE" --repo "$REPO" --add-label "duplicate"
|
||||
|
||||
echo "Posted duplicate comment on issue #$BASE_ISSUE with $COUNT potential duplicate(s)"
|
||||
Loading…
Add table
Add a link
Reference in a new issue