Refactor issue dedup system to use claude-code-action with /dedupe command

Replace the copilot-generated inline search logic with a claude-code-action
based architecture inspired by anthropic/claude-code's approach:

- Add .claude/commands/dedupe.md with 5-parallel-search strategy
- Add scripts/comment-on-duplicates.sh with 3-day grace period warning
- Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command
- Rewrite autoclose script to check bot comments, human activity, and thumbsdown
- Rewrite backfill to trigger dedupe workflow per issue with rate limiting
- Add concurrency control, timeout, input validation, and rate limit retry
- Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger)
This commit is contained in:
BukeLy 2026-03-02 17:05:44 +08:00
parent b3cb9531a4
commit fd9330c434
8 changed files with 413 additions and 752 deletions

View file

@ -1,34 +1,32 @@
/**
* scripts/autoclose-labeled-issues.js
*
* Closes open issues that carry the "autoclose" label and have been inactive
* (no updates) for more than INACTIVITY_DAYS days.
* Auto-closes issues that have a bot "possible duplicate" comment older than
* 3 days, unless:
* - A human has commented after the bot's duplicate comment
* - The author reacted with thumbs-down on the duplicate comment
*
* Required environment variables:
* GITHUB_TOKEN GitHub Actions token (or PAT with repo:issues write access)
* REPO_OWNER Repository owner (e.g. VectifyAI)
* REPO_NAME Repository name (e.g. PageIndex)
* GITHUB_TOKEN - GitHub Actions token
* REPO_OWNER - Repository owner
* REPO_NAME - Repository name
*
* Optional environment variables:
* INACTIVITY_DAYS Days of inactivity before closing (default: 7)
* DRY_RUN If "true", report but do not close issues (default: false)
* Optional:
* DRY_RUN - If "true", report but do not close (default: false)
*/
'use strict';
const https = require('https');
// ── Configuration ─────────────────────────────────────────────────────────────
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const DRY_RUN = process.env.DRY_RUN === 'true';
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10);
const DRY_RUN = process.env.DRY_RUN === 'true';
const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000;
// ── HTTP helper ───────────────────────────────────────────────────────────────
function githubRequest(method, path, body = null) {
function githubRequest(method, path, body = null, retried = false) {
return new Promise((resolve, reject) => {
const payload = body ? JSON.stringify(body) : null;
const options = {
@ -37,29 +35,31 @@ function githubRequest(method, path, body = null) {
method,
headers: {
'Authorization': `Bearer ${GITHUB_TOKEN}`,
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Autoclose-Script/1.0',
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Autoclose/1.0',
'X-GitHub-Api-Version': '2022-11-28',
...(payload ? {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(payload),
} : {}),
...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}),
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path}${res.statusCode}: ${data}`));
res.on('end', async () => {
if ((res.statusCode === 403 || res.statusCode === 429) && !retried) {
const retryAfter = parseInt(res.headers['retry-after'] || '60', 10);
console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`);
await sleep(retryAfter * 1000);
try { resolve(await githubRequest(method, path, body, true)); }
catch (err) { reject(err); }
return;
}
try {
resolve(data ? JSON.parse(data) : {});
} catch {
resolve({});
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`));
return;
}
try { resolve(data ? JSON.parse(data) : {}); }
catch { resolve({}); }
});
});
req.on('error', reject);
@ -68,113 +68,162 @@ function githubRequest(method, path, body = null) {
});
}
/** Simple sleep helper for rate-limiting. */
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ── Core logic ────────────────────────────────────────────────────────────────
/**
* Fetches all open issues with the "autoclose" label, paginating as needed.
* Fetches open issues with the "duplicate" label, paginating as needed.
* Only returns issues created more than 3 days ago.
*/
async function fetchAutocloseIssues() {
async function fetchDuplicateIssues() {
const issues = [];
let page = 1;
while (true) {
const data = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}`
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=duplicate&per_page=100&page=${page}`
);
if (!Array.isArray(data) || data.length === 0) break;
// Filter out any pull requests that may surface
issues.push(...data.filter(i => !i.pull_request));
if (data.length < 100) break;
page++;
}
return issues;
const cutoff = new Date(Date.now() - THREE_DAYS_MS);
return issues.filter(i => new Date(i.created_at) < cutoff);
}
/**
* Closes a single issue with a polite explanatory comment.
* Finds the bot's duplicate comment on an issue (contains "possible duplicate").
*/
async function closeIssue(issueNumber, inactivityDays) {
const body =
`This issue has been automatically closed because it was marked as a **duplicate** ` +
`and has had no new activity for ${inactivityDays} day(s).\n\n` +
`If you believe this was closed in error, please reopen the issue and leave a comment. ` +
`New human activity will prevent automatic closure in the future.\n\n` +
`Thank you for your contribution! 🙏`;
function findDuplicateComment(comments) {
return comments.find(c =>
(c.user.type === 'Bot' || c.user.login === 'github-actions[bot]') &&
c.body.includes('possible duplicate')
);
}
/**
* Checks if there are human comments after the duplicate comment.
*/
function hasHumanCommentAfter(comments, afterDate) {
return comments.some(c => {
if (c.user.type === 'Bot' || c.user.login.endsWith('[bot]') || c.user.login === 'github-actions') {
return false;
}
return new Date(c.created_at) > afterDate;
});
}
/**
* Checks if the duplicate comment has a thumbs-down reaction.
*/
async function hasThumbsDownReaction(commentId) {
const reactions = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/comments/${commentId}/reactions`
);
return Array.isArray(reactions) && reactions.some(r => r.content === '-1');
}
/**
* Closes an issue as duplicate with a comment.
*/
async function closeAsDuplicate(issueNumber) {
const body =
'This issue has been automatically closed as a duplicate. ' +
'No human activity or objection was received within the 3-day grace period.\n\n' +
'If you believe this was closed in error, please reopen the issue and leave a comment.';
// Post closing comment first
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
{ body }
);
// Close the issue
await githubRequest(
'PATCH',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`,
{ state: 'closed', state_reason: 'not_planned' }
{ state: 'closed', state_reason: 'completed' }
);
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
{ labels: ['duplicate'] }
);
}
// ── Entry point ───────────────────────────────────────────────────────────────
async function processIssue(issue) {
const num = issue.number;
console.log(`\nChecking issue #${num}: ${issue.title}`);
const comments = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${num}/comments?per_page=100`
);
if (!Array.isArray(comments)) {
console.log(` -> Could not fetch comments, skipping.`);
return false;
}
const dupeComment = findDuplicateComment(comments);
if (!dupeComment) {
console.log(` -> No duplicate comment found, skipping.`);
return false;
}
const commentDate = new Date(dupeComment.created_at);
const ageMs = Date.now() - commentDate.getTime();
if (ageMs < THREE_DAYS_MS) {
const daysLeft = Math.ceil((THREE_DAYS_MS - ageMs) / (24 * 60 * 60 * 1000));
console.log(` -> Duplicate comment is less than 3 days old (${daysLeft}d remaining), skipping.`);
return false;
}
if (hasHumanCommentAfter(comments, commentDate)) {
console.log(` -> Human commented after duplicate comment, skipping.`);
return false;
}
if (await hasThumbsDownReaction(dupeComment.id)) {
console.log(` -> Author reacted with thumbs-down, skipping.`);
return false;
}
if (DRY_RUN) {
console.log(` [DRY RUN] Would close issue #${num}`);
return true;
}
await closeAsDuplicate(num);
console.log(` -> Closed issue #${num} as duplicate`);
return true;
}
async function main() {
// Validate required env vars
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME']
.filter(k => !process.env[k]);
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'].filter(k => !process.env[k]);
if (missing.length) {
console.error(`Missing required environment variables: ${missing.join(', ')}`);
process.exit(1);
}
const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000);
console.log('Auto-close duplicate issues');
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Dry run: ${DRY_RUN}`);
console.log(`Auto-close inactive labelled issues`);
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`);
console.log(` Dry run: ${DRY_RUN}`);
const issues = await fetchAutocloseIssues();
console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`);
const issues = await fetchDuplicateIssues();
console.log(`\nFound ${issues.length} duplicate-labeled issue(s) older than 3 days.`);
let closedCount = 0;
let skippedCount = 0;
for (const issue of issues) {
const lastActivity = new Date(issue.updated_at);
const inactive = lastActivity < cutoff;
const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24));
if (!inactive) {
console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`);
skippedCount++;
continue;
}
console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`);
if (DRY_RUN) {
console.log(` [DRY RUN] Would close issue #${issue.number}`);
closedCount++;
continue;
}
try {
await closeIssue(issue.number, INACTIVITY_DAYS);
console.log(` ✅ Closed issue #${issue.number}`);
closedCount++;
} catch (err) {
console.error(` ❌ Failed to close #${issue.number}: ${err.message}`);
}
// Respect GitHub's secondary rate limit
const closed = await processIssue(issue);
if (closed) closedCount++;
await sleep(1000);
}
console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`);
console.log(`\nSummary: ${closedCount} issue(s) closed.`);
}
main().catch(err => {