Add GitHub Actions workflows for issue deduplication and auto-close

Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-02 03:54:18 +00:00
parent f56261cee1
commit b3cb9531a4
7 changed files with 1013 additions and 0 deletions

View file

@ -0,0 +1,183 @@
/**
* scripts/autoclose-labeled-issues.js
*
* Closes open issues that carry the "autoclose" label and have been inactive
* (no updates) for more than INACTIVITY_DAYS days.
*
* Required environment variables:
* GITHUB_TOKEN GitHub Actions token (or PAT with repo:issues write access)
* REPO_OWNER Repository owner (e.g. VectifyAI)
* REPO_NAME Repository name (e.g. PageIndex)
*
* Optional environment variables:
* INACTIVITY_DAYS Days of inactivity before closing (default: 7)
* DRY_RUN If "true", report but do not close issues (default: false)
*/
'use strict';
const https = require('https');
// ── Configuration ─────────────────────────────────────────────────────────────
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10);
const DRY_RUN = process.env.DRY_RUN === 'true';
// ── HTTP helper ───────────────────────────────────────────────────────────────
function githubRequest(method, path, body = null) {
return new Promise((resolve, reject) => {
const payload = body ? JSON.stringify(body) : null;
const options = {
hostname: 'api.github.com',
path,
method,
headers: {
'Authorization': `Bearer ${GITHUB_TOKEN}`,
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Autoclose-Script/1.0',
'X-GitHub-Api-Version': '2022-11-28',
...(payload ? {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(payload),
} : {}),
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path}${res.statusCode}: ${data}`));
return;
}
try {
resolve(data ? JSON.parse(data) : {});
} catch {
resolve({});
}
});
});
req.on('error', reject);
if (payload) req.write(payload);
req.end();
});
}
/** Simple sleep helper for rate-limiting. */
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ── Core logic ────────────────────────────────────────────────────────────────
/**
* Fetches all open issues with the "autoclose" label, paginating as needed.
*/
async function fetchAutocloseIssues() {
const issues = [];
let page = 1;
while (true) {
const data = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}`
);
if (!Array.isArray(data) || data.length === 0) break;
// Filter out any pull requests that may surface
issues.push(...data.filter(i => !i.pull_request));
if (data.length < 100) break;
page++;
}
return issues;
}
/**
* Closes a single issue with a polite explanatory comment.
*/
async function closeIssue(issueNumber, inactivityDays) {
const body =
`This issue has been automatically closed because it was marked as a **duplicate** ` +
`and has had no new activity for ${inactivityDays} day(s).\n\n` +
`If you believe this was closed in error, please reopen the issue and leave a comment. ` +
`New human activity will prevent automatic closure in the future.\n\n` +
`Thank you for your contribution! 🙏`;
// Post closing comment first
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
{ body }
);
// Close the issue
await githubRequest(
'PATCH',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`,
{ state: 'closed', state_reason: 'not_planned' }
);
}
// ── Entry point ───────────────────────────────────────────────────────────────
async function main() {
// Validate required env vars
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME']
.filter(k => !process.env[k]);
if (missing.length) {
console.error(`Missing required environment variables: ${missing.join(', ')}`);
process.exit(1);
}
const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000);
console.log(`Auto-close inactive labelled issues`);
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`);
console.log(` Dry run: ${DRY_RUN}`);
const issues = await fetchAutocloseIssues();
console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`);
let closedCount = 0;
let skippedCount = 0;
for (const issue of issues) {
const lastActivity = new Date(issue.updated_at);
const inactive = lastActivity < cutoff;
const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24));
if (!inactive) {
console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`);
skippedCount++;
continue;
}
console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`);
if (DRY_RUN) {
console.log(` [DRY RUN] Would close issue #${issue.number}`);
closedCount++;
continue;
}
try {
await closeIssue(issue.number, INACTIVITY_DAYS);
console.log(` ✅ Closed issue #${issue.number}`);
closedCount++;
} catch (err) {
console.error(` ❌ Failed to close #${issue.number}: ${err.message}`);
}
// Respect GitHub's secondary rate limit
await sleep(1000);
}
console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`);
}
main().catch(err => {
console.error('Fatal error:', err.message);
process.exit(1);
});

370
scripts/backfill-dedupe.js Normal file
View file

@ -0,0 +1,370 @@
/**
* scripts/backfill-dedupe.js
*
* Backfills duplicate detection for historical issues.
* Fetches issues created within the last DAYS_BACK days, searches for
* candidate duplicates via the GitHub Search API, and asks the Anthropic
* API to determine whether each issue is a duplicate.
*
* Required environment variables:
* GITHUB_TOKEN GitHub Actions token (or PAT with repo access)
* ANTHROPIC_API_KEY Anthropic API key (mapped from AUTHROPIC_API_KEY secret)
* REPO_OWNER Repository owner (e.g. VectifyAI)
* REPO_NAME Repository name (e.g. PageIndex)
*
* Optional environment variables:
* DAYS_BACK How many days back to process (default: 30)
* DRY_RUN If "true", analyse but do not write to GitHub (default: false)
*/
'use strict';
const https = require('https');
// ── Configuration ─────────────────────────────────────────────────────────────
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const DAYS_BACK = parseInt(process.env.DAYS_BACK || '30', 10);
const DRY_RUN = process.env.DRY_RUN === 'true';
const STOP_WORDS = new Set([
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
'with','this','that','it','be','are','was','has','have','does','do','how',
'why','when','where','what','which','who','will','can','could','should',
'would','may','might','must','get','got','use','using','used','error',
'issue','bug','feature','request','problem','question','please','just',
'after','before','during','about','from','into','also','then','than',
]);
// ── HTTP helpers ──────────────────────────────────────────────────────────────
/**
* Makes an authenticated GitHub REST API request.
* @param {string} method HTTP method
* @param {string} path API path (e.g. '/repos/owner/repo/issues')
* @param {object|null} body Request body (will be JSON-encoded)
* @returns {Promise<object>}
*/
function githubRequest(method, path, body = null) {
return new Promise((resolve, reject) => {
const payload = body ? JSON.stringify(body) : null;
const options = {
hostname: 'api.github.com',
path,
method,
headers: {
'Authorization': `Bearer ${GITHUB_TOKEN}`,
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Backfill-Script/1.0',
'X-GitHub-Api-Version': '2022-11-28',
...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}),
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path}${res.statusCode}: ${data}`));
return;
}
try {
resolve(data ? JSON.parse(data) : {});
} catch {
resolve({});
}
});
});
req.on('error', reject);
if (payload) req.write(payload);
req.end();
});
}
/**
* Calls the Anthropic Messages API and returns Claude's text response.
* @param {string} prompt User prompt
* @returns {Promise<string>}
*/
function callClaude(prompt) {
return new Promise((resolve, reject) => {
const body = JSON.stringify({
model: 'claude-haiku-4-5',
max_tokens: 1024,
messages: [{ role: 'user', content: prompt }],
});
const options = {
hostname: 'api.anthropic.com',
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(body),
'x-api-key': ANTHROPIC_API_KEY,
'anthropic-version': '2023-06-01',
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
try {
const parsed = JSON.parse(data);
if (parsed.error) {
reject(new Error(`Anthropic API error: ${parsed.error.message}`));
return;
}
const text = (parsed.content || [])
.filter(b => b.type === 'text')
.map(b => b.text)
.join('');
resolve(text);
} catch (err) {
reject(new Error(`Failed to parse Anthropic response: ${err.message}`));
}
});
});
req.on('error', reject);
req.write(body);
req.end();
});
}
/** Simple sleep helper for rate-limiting. */
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ── Core logic ────────────────────────────────────────────────────────────────
/**
* Fetches open issues created since `since` (ISO 8601 string), paginating as needed.
*/
async function fetchIssuesSince(since) {
const issues = [];
let page = 1;
while (true) {
const data = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&sort=created&direction=desc&since=${since}&per_page=100&page=${page}`
);
if (!Array.isArray(data) || data.length === 0) break;
// Filter out pull requests
issues.push(...data.filter(i => !i.pull_request));
if (data.length < 100) break;
page++;
}
return issues;
}
/**
* Searches for up to 10 candidate duplicate issues for the given issue.
*/
async function findCandidates(issue) {
const keywords = (issue.title || '')
.toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 2 && !STOP_WORDS.has(w))
.slice(0, 6)
.join(' ');
if (!keywords) return [];
const q = encodeURIComponent(
`repo:${REPO_OWNER}/${REPO_NAME} is:issue state:open ${keywords}`
);
const data = await githubRequest('GET', `/search/issues?q=${q}&per_page=15`);
return (data.items || [])
.filter(item => item.number !== issue.number && !item.pull_request)
.slice(0, 10);
}
/**
* Builds the duplicate-detection prompt for Claude.
*/
function buildPrompt(issue, candidates) {
const candidatesText = candidates
.map(c => `#${c.number}: ${c.title}\nURL: ${c.html_url}\n${(c.body || '').substring(0, 500)}`)
.join('\n---\n');
return `You are a GitHub issue triage assistant.
Analyze whether the following open issue is a duplicate of any of the candidate issues listed below.
== NEW ISSUE #${issue.number} ==
Title: ${issue.title}
Body:
${(issue.body || '(no body)').substring(0, 3000)}
== CANDIDATE ISSUES (up to 10) ==
${candidatesText}
RULES:
- Only flag as a duplicate if you are at least 85% confident.
- A minor difference in wording does NOT make an issue non-duplicate if they describe the same underlying problem or feature request.
Respond with ONLY a JSON object (no markdown, no other text):
{
"is_duplicate": true or false,
"duplicate_issues": [array of integer issue numbers that this is a duplicate of, empty if none],
"explanation": "one or two sentences explaining your reasoning"
}`;
}
/**
* Parses Claude's JSON response robustly.
* Returns { is_duplicate, duplicate_issues, explanation } or null on failure.
*/
function parseClaudeResponse(text) {
// Try to extract a JSON object from the response
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (!jsonMatch) return null;
try {
const parsed = JSON.parse(jsonMatch[0]);
return {
is_duplicate: Boolean(parsed.is_duplicate),
duplicate_issues: Array.isArray(parsed.duplicate_issues) ? parsed.duplicate_issues.map(Number) : [],
explanation: String(parsed.explanation || ''),
};
} catch {
return null;
}
}
/**
* Posts a duplicate-found comment on the issue.
*/
async function postDuplicateComment(issueNumber, duplicateIssueNumbers, explanation) {
const links = duplicateIssueNumbers
.map(n => `- #${n}`)
.join('\n');
const body =
`👋 Thank you for taking the time to open this issue!\n\n` +
`After automated analysis, this issue appears to be a duplicate of:\n\n` +
`${links}\n\n` +
`${explanation}\n\n` +
`Please subscribe to the original issue(s) above to follow updates. ` +
`This issue will be automatically closed after a short inactivity period.\n\n` +
`<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":${JSON.stringify(duplicateIssueNumbers)}} -->`;
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
{ body }
);
}
/**
* Adds labels to an issue, creating them if they do not exist.
*/
async function ensureLabelAndApply(issueNumber, labelNames) {
const knownLabels = {
duplicate: { color: 'cfd3d7', description: 'This issue or pull request already exists' },
autoclose: { color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
};
for (const name of labelNames) {
try {
await githubRequest('GET', `/repos/${REPO_OWNER}/${REPO_NAME}/labels/${encodeURIComponent(name)}`);
} catch {
const meta = knownLabels[name] || { color: 'ededed', description: '' };
await githubRequest('POST', `/repos/${REPO_OWNER}/${REPO_NAME}/labels`, { name, ...meta });
}
}
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
{ labels: labelNames }
);
}
/**
* Processes a single issue: finds candidates, asks Claude, and acts on the result.
*/
async function processIssue(issue) {
const num = issue.number;
console.log(`\nProcessing issue #${num}: ${issue.title}`);
// Skip already-labelled issues
const existingLabels = (issue.labels || []).map(l => l.name);
if (existingLabels.includes('duplicate')) {
console.log(` → Already labelled as duplicate, skipping.`);
return;
}
const candidates = await findCandidates(issue);
if (candidates.length === 0) {
console.log(` → No candidates found, skipping.`);
return;
}
console.log(` → Found ${candidates.length} candidate(s): ${candidates.map(c => `#${c.number}`).join(', ')}`);
const prompt = buildPrompt(issue, candidates);
const rawReply = await callClaude(prompt);
const result = parseClaudeResponse(rawReply);
if (!result) {
console.warn(` ⚠️ Could not parse Claude response for #${num}. Raw:\n${rawReply.substring(0, 300)}`);
return;
}
console.log(` → is_duplicate=${result.is_duplicate}, issues=${JSON.stringify(result.duplicate_issues)}`);
console.log(` ${result.explanation}`);
if (!result.is_duplicate || result.duplicate_issues.length === 0) {
console.log(` → Not a duplicate.`);
return;
}
if (DRY_RUN) {
console.log(` [DRY RUN] Would post comment and apply labels to #${num}`);
return;
}
await postDuplicateComment(num, result.duplicate_issues, result.explanation);
await ensureLabelAndApply(num, ['duplicate', 'autoclose']);
console.log(` ✅ Commented and labelled #${num}`);
}
// ── Entry point ───────────────────────────────────────────────────────────────
async function main() {
// Validate required env vars
const missing = ['GITHUB_TOKEN', 'ANTHROPIC_API_KEY', 'REPO_OWNER', 'REPO_NAME']
.filter(k => !process.env[k]);
if (missing.length) {
console.error(`Missing required environment variables: ${missing.join(', ')}`);
process.exit(1);
}
const since = new Date(Date.now() - DAYS_BACK * 24 * 60 * 60 * 1000).toISOString();
console.log(`Backfilling duplicate detection`);
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Days back: ${DAYS_BACK} (since ${since})`);
console.log(` Dry run: ${DRY_RUN}`);
const issues = await fetchIssuesSince(since);
console.log(`\nFetched ${issues.length} open issue(s) to process.`);
for (const issue of issues) {
await processIssue(issue);
// Respect GitHub and Anthropic rate limits
await sleep(2500);
}
console.log('\nBackfill complete.');
}
main().catch(err => {
console.error('Fatal error:', err.message);
process.exit(1);
});