Add GitHub Actions workflows for issue deduplication and auto-close

Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-02 03:54:18 +00:00
parent f56261cee1
commit b3cb9531a4
7 changed files with 1013 additions and 0 deletions

View file

@ -0,0 +1,45 @@
# Closes open issues that carry the "autoclose" label and have been inactive
# for more than INACTIVITY_DAYS days. Runs on a daily schedule and can also
# be triggered manually.
name: Auto-close Inactive Labeled Issues
on:
schedule:
# Runs every day at 01:00 UTC
- cron: '0 1 * * *'
workflow_dispatch:
inputs:
inactivity_days:
description: 'Days of inactivity before closing (default: 7)'
required: false
default: '7'
type: number
dry_run:
description: 'Dry run report but do not actually close issues'
required: false
default: 'false'
type: choice
options:
- 'false'
- 'true'
permissions:
issues: write
contents: read
jobs:
autoclose:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Close inactive autoclose-labeled issues
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
# workflow_dispatch overrides the default; schedule uses the default (7)
INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }}
DRY_RUN: ${{ inputs.dry_run || 'false' }}
run: node scripts/autoclose-labeled-issues.js

67
.github/workflows/backfill-dedupe.yml vendored Normal file
View file

@ -0,0 +1,67 @@
# Backfills duplicate detection for historical issues.
# Triggered manually via workflow_dispatch.
name: Backfill Duplicate Detection
on:
workflow_dispatch:
inputs:
days_back:
description: 'How many days back to look for issues (default: 30)'
required: false
default: '30'
type: number
dry_run:
description: 'Dry run analyze but do not post comments or apply labels'
required: false
default: 'false'
type: choice
options:
- 'false'
- 'true'
permissions:
issues: write
contents: read
jobs:
backfill:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Ensure required labels exist
uses: actions/github-script@v7
with:
script: |
const labels = [
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
];
for (const label of labels) {
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
});
} catch (err) {
if (err.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner, repo: context.repo.repo,
name: label.name, color: label.color, description: label.description,
});
core.info(`Created label: ${label.name}`);
}
}
}
- name: Run backfill script
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
DAYS_BACK: ${{ inputs.days_back }}
DRY_RUN: ${{ inputs.dry_run }}
run: node scripts/backfill-dedupe.js

235
.github/workflows/issue-dedupe.yml vendored Normal file
View file

@ -0,0 +1,235 @@
# Detects duplicate issues using Claude Code.
# Triggered automatically when a new issue is opened, or manually for a single issue.
name: Issue Duplicate Detection
on:
issues:
types: [opened]
workflow_dispatch:
inputs:
issue_number:
description: 'Issue number to check for duplicates'
required: true
type: number
permissions:
issues: write
contents: read
jobs:
detect-duplicate:
runs-on: ubuntu-latest
# Skip pull-requests that surface as issues and bot-opened issues
if: >
(github.event_name == 'workflow_dispatch') ||
(github.event.issue.pull_request == null &&
!endsWith(github.actor, '[bot]') &&
github.actor != 'github-actions')
steps:
# ── 1. Ensure required labels exist ─────────────────────────────────────
- name: Ensure labels exist
uses: actions/github-script@v7
with:
script: |
const labels = [
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
];
for (const label of labels) {
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
});
} catch (err) {
if (err.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
color: label.color,
description: label.description,
});
core.info(`Created label: ${label.name}`);
}
}
}
# ── 2. Gather issue data and find candidate duplicates ──────────────────
- name: Gather issue data and candidates
id: data
uses: actions/github-script@v7
with:
script: |
const issueNumber =
context.eventName === 'issues'
? context.payload.issue.number
: parseInt(core.getInput('issue_number') || '${{ inputs.issue_number }}');
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
});
// Skip already-closed or already-labelled issues
if (issue.state === 'closed') {
core.setOutput('skip', 'true');
core.info('Issue is already closed skipping.');
return;
}
if (issue.labels.some(l => l.name === 'duplicate')) {
core.setOutput('skip', 'true');
core.info('Issue already has "duplicate" label skipping.');
return;
}
// Extract meaningful keywords from the title
const stopWords = new Set([
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
'with','this','that','it','be','are','was','has','have','does','do','how',
'why','when','where','what','which','who','will','can','could','should',
'would','may','might','must','get','got','use','using','used','error',
'issue','bug','feature','request','problem','question','please','just',
'after','before','during','about','from','into','also','then','than',
]);
const keywords = issue.title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 2 && !stopWords.has(w))
.slice(0, 6)
.join(' ');
let candidates = [];
if (keywords) {
try {
const q = `repo:${context.repo.owner}/${context.repo.repo} is:issue state:open ${keywords}`;
const { data: results } = await github.rest.search.issuesAndPullRequests({
q,
per_page: 15,
});
candidates = results.items
.filter(item => item.number !== issueNumber && !item.pull_request)
.slice(0, 10);
} catch (err) {
core.warning('GitHub search failed: ' + err.message);
}
}
if (candidates.length === 0) {
core.setOutput('skip', 'true');
core.info('No candidate issues found skipping Claude analysis.');
return;
}
core.setOutput('skip', 'false');
core.setOutput('issue_number', String(issueNumber));
core.setOutput('issue_title', issue.title);
core.setOutput('issue_body', (issue.body || '').substring(0, 3000));
core.setOutput('candidates',
JSON.stringify(candidates.map(c => ({
number: c.number,
title: c.title,
url: c.html_url,
body: (c.body || '').substring(0, 500),
})))
);
# ── 3. Write data files (avoids YAML-injection of arbitrary text) ───────
- name: Write issue data to files
if: steps.data.outputs.skip == 'false'
env:
ISSUE_TITLE: ${{ steps.data.outputs.issue_title }}
ISSUE_BODY: ${{ steps.data.outputs.issue_body }}
CANDIDATES: ${{ steps.data.outputs.candidates }}
run: |
printf '%s' "$ISSUE_TITLE" > /tmp/issue-title.txt
printf '%s' "$ISSUE_BODY" > /tmp/issue-body.txt
printf '%s' "$CANDIDATES" > /tmp/issue-candidates.json
# ── 4. Ask Claude to decide whether this is a duplicate ─────────────────
- name: Run Claude duplicate analysis
if: steps.data.outputs.skip == 'false'
uses: anthropics/claude-code-action@v1
with:
anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
track_progress: 'false'
prompt: |
You are a GitHub issue triage assistant.
Analyze whether issue #${{ steps.data.outputs.issue_number }} in this repository
is a duplicate of any existing open issues.
The issue data is stored in temporary files on this runner:
- /tmp/issue-title.txt — title of the new issue
- /tmp/issue-body.txt — body of the new issue
- /tmp/issue-candidates.json — JSON array of up to 10 candidate issues
(each has: number, title, url, body)
Read those files first, then follow these rules:
1. Compare the new issue against every candidate.
Focus on whether they describe the *same underlying problem or request*.
2. Only flag as a duplicate if you are at least 85 % confident.
Superficial wording differences do NOT make an issue non-duplicate.
3. IF the new issue IS a duplicate:
a. Post a friendly, helpful comment on issue #${{ steps.data.outputs.issue_number }}.
The comment must:
- Thank the reporter
- Explain which existing issue(s) it duplicates and why (include markdown links)
- Invite them to subscribe to the original for updates
b. The LAST line of the comment must be exactly (fill in real numbers):
<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[ISSUE_NUMBERS]} -->
Example: <!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[42,73]} -->
4. IF the issue is NOT a duplicate, or you are unsure:
- Do NOT post any comment.
- Do NOT take any other action.
# ── 5. Parse Claude's comment and apply labels ──────────────────────────
- name: Apply labels if duplicate found
if: steps.data.outputs.skip == 'false'
uses: actions/github-script@v7
with:
script: |
const issueNumber = parseInt('${{ steps.data.outputs.issue_number }}');
// Allow a moment for the comment to land
await new Promise(r => setTimeout(r, 5000));
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
per_page: 50,
});
const pattern = /<!--\s*DEDUPE_RESULT:\s*(\{[^}]+\})\s*-->/;
let isDuplicate = false;
for (const comment of [...comments].reverse()) {
const m = comment.body.match(pattern);
if (m) {
try {
const result = JSON.parse(m[1]);
isDuplicate = result.is_duplicate === true;
} catch (err) {
core.warning('Failed to parse DEDUPE_RESULT JSON: ' + err.message);
}
break;
}
}
if (isDuplicate) {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['duplicate', 'autoclose'],
});
core.info(`✅ Applied "duplicate" and "autoclose" labels to #${issueNumber}`);
} else {
core.info(` Issue #${issueNumber} is not a duplicate no labels applied.`);
}

View file

@ -0,0 +1,48 @@
# Removes the "autoclose" label whenever a human (non-bot) posts a new comment
# on an issue that carries the label. This resets the inactivity clock.
name: Remove Autoclose Label on Human Activity
on:
issue_comment:
types: [created]
permissions:
issues: write
jobs:
remove-autoclose:
# Only run for issue comments (not PR comments)
if: ${{ github.event.issue.pull_request == null }}
runs-on: ubuntu-latest
steps:
- name: Remove autoclose label if human commented
uses: actions/github-script@v7
with:
script: |
const actor = context.actor;
// Ignore bot accounts
if (actor.endsWith('[bot]') || actor === 'github-actions') {
core.info(`Skipping bot comment from ${actor}`);
return;
}
const issue = context.payload.issue;
const labels = (issue.labels || []).map(l => l.name);
if (!labels.includes('autoclose')) {
core.info('Issue does not have "autoclose" label nothing to do.');
return;
}
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issue.number,
name: 'autoclose',
});
core.info(
`Removed "autoclose" label from #${issue.number} ` +
`after human activity by ${actor}`
);

View file

@ -267,4 +267,69 @@ Leave us a star 🌟 if you like our project. Thank you!
---
## 🤖 GitHub Automation
This repository uses automated GitHub Actions workflows to keep the issue tracker tidy.
### Overview
| Workflow | Trigger | Purpose |
|---|---|---|
| `issue-dedupe.yml` | Issue opened · `workflow_dispatch` | Detects duplicate issues using Claude and labels them |
| `backfill-dedupe.yml` | `workflow_dispatch` | Runs duplicate detection over historical issues |
| `autoclose-labeled-issues.yml` | Daily schedule · `workflow_dispatch` | Closes issues labelled `autoclose` after N days of inactivity |
| `remove-autoclose-label.yml` | Issue comment created | Removes the `autoclose` label when a human posts a new comment |
### Required Secrets
Add the following secret to the repository (**Settings → Secrets and variables → Actions**):
| Secret | Description |
|---|---|
| `AUTHROPIC_API_KEY` | Your Anthropic API key (used by `anthropics/claude-code-action`) |
`GITHUB_TOKEN` is provided automatically by GitHub Actions and does not need to be added manually.
### Labels
The workflows create the following labels automatically if they do not exist:
| Label | Description |
|---|---|
| `duplicate` | Marks issues identified as duplicates |
| `autoclose` | Marks issues that will be automatically closed after inactivity |
### Running the Backfill
To scan historical issues for duplicates, trigger the **Backfill Duplicate Detection** workflow manually from the **Actions** tab:
- **`days_back`** (default `30`) — how many days into the past to scan
- **`dry_run`** (default `false`) — set to `true` to preview results without modifying issues
```
Actions → Backfill Duplicate Detection → Run workflow
```
### Changing the Inactivity Threshold
The default inactivity period before an `autoclose`-labelled issue is closed is **7 days**.
To change it for a one-off run, trigger **Auto-close Inactive Labeled Issues** with the `inactivity_days` input.
To change the default permanently, edit the `INACTIVITY_DAYS` env variable default in `.github/workflows/autoclose-labeled-issues.yml`:
```yaml
INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} # ← change '7' here
```
### How Duplicate Detection Works
1. When a new issue is opened, keywords from the title are used to search for the top 10 most relevant existing open issues via the GitHub Search API.
2. The issue title, body, and candidate list are passed to **Claude** (`anthropics/claude-code-action`) with a structured prompt.
3. Claude posts a comment on the issue (if it is highly confident it is a duplicate), including links to the original issue(s) and a brief explanation.
4. A follow-up step reads the comment, extracts the machine-readable result, and applies the `duplicate` and `autoclose` labels.
5. If Claude is not confident, no comment or labels are applied.
---
© 2025 [Vectify AI](https://vectify.ai)

View file

@ -0,0 +1,183 @@
/**
* scripts/autoclose-labeled-issues.js
*
* Closes open issues that carry the "autoclose" label and have been inactive
* (no updates) for more than INACTIVITY_DAYS days.
*
* Required environment variables:
* GITHUB_TOKEN GitHub Actions token (or PAT with repo:issues write access)
* REPO_OWNER Repository owner (e.g. VectifyAI)
* REPO_NAME Repository name (e.g. PageIndex)
*
* Optional environment variables:
* INACTIVITY_DAYS Days of inactivity before closing (default: 7)
* DRY_RUN If "true", report but do not close issues (default: false)
*/
'use strict';
const https = require('https');
// ── Configuration ─────────────────────────────────────────────────────────────
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10);
const DRY_RUN = process.env.DRY_RUN === 'true';
// ── HTTP helper ───────────────────────────────────────────────────────────────
function githubRequest(method, path, body = null) {
return new Promise((resolve, reject) => {
const payload = body ? JSON.stringify(body) : null;
const options = {
hostname: 'api.github.com',
path,
method,
headers: {
'Authorization': `Bearer ${GITHUB_TOKEN}`,
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Autoclose-Script/1.0',
'X-GitHub-Api-Version': '2022-11-28',
...(payload ? {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(payload),
} : {}),
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path}${res.statusCode}: ${data}`));
return;
}
try {
resolve(data ? JSON.parse(data) : {});
} catch {
resolve({});
}
});
});
req.on('error', reject);
if (payload) req.write(payload);
req.end();
});
}
/** Simple sleep helper for rate-limiting. */
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ── Core logic ────────────────────────────────────────────────────────────────
/**
* Fetches all open issues with the "autoclose" label, paginating as needed.
*/
async function fetchAutocloseIssues() {
const issues = [];
let page = 1;
while (true) {
const data = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}`
);
if (!Array.isArray(data) || data.length === 0) break;
// Filter out any pull requests that may surface
issues.push(...data.filter(i => !i.pull_request));
if (data.length < 100) break;
page++;
}
return issues;
}
/**
* Closes a single issue with a polite explanatory comment.
*/
async function closeIssue(issueNumber, inactivityDays) {
const body =
`This issue has been automatically closed because it was marked as a **duplicate** ` +
`and has had no new activity for ${inactivityDays} day(s).\n\n` +
`If you believe this was closed in error, please reopen the issue and leave a comment. ` +
`New human activity will prevent automatic closure in the future.\n\n` +
`Thank you for your contribution! 🙏`;
// Post closing comment first
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
{ body }
);
// Close the issue
await githubRequest(
'PATCH',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`,
{ state: 'closed', state_reason: 'not_planned' }
);
}
// ── Entry point ───────────────────────────────────────────────────────────────
async function main() {
// Validate required env vars
const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME']
.filter(k => !process.env[k]);
if (missing.length) {
console.error(`Missing required environment variables: ${missing.join(', ')}`);
process.exit(1);
}
const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000);
console.log(`Auto-close inactive labelled issues`);
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`);
console.log(` Dry run: ${DRY_RUN}`);
const issues = await fetchAutocloseIssues();
console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`);
let closedCount = 0;
let skippedCount = 0;
for (const issue of issues) {
const lastActivity = new Date(issue.updated_at);
const inactive = lastActivity < cutoff;
const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24));
if (!inactive) {
console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`);
skippedCount++;
continue;
}
console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`);
if (DRY_RUN) {
console.log(` [DRY RUN] Would close issue #${issue.number}`);
closedCount++;
continue;
}
try {
await closeIssue(issue.number, INACTIVITY_DAYS);
console.log(` ✅ Closed issue #${issue.number}`);
closedCount++;
} catch (err) {
console.error(` ❌ Failed to close #${issue.number}: ${err.message}`);
}
// Respect GitHub's secondary rate limit
await sleep(1000);
}
console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`);
}
main().catch(err => {
console.error('Fatal error:', err.message);
process.exit(1);
});

370
scripts/backfill-dedupe.js Normal file
View file

@ -0,0 +1,370 @@
/**
* scripts/backfill-dedupe.js
*
* Backfills duplicate detection for historical issues.
* Fetches issues created within the last DAYS_BACK days, searches for
* candidate duplicates via the GitHub Search API, and asks the Anthropic
* API to determine whether each issue is a duplicate.
*
* Required environment variables:
* GITHUB_TOKEN GitHub Actions token (or PAT with repo access)
* ANTHROPIC_API_KEY Anthropic API key (mapped from AUTHROPIC_API_KEY secret)
* REPO_OWNER Repository owner (e.g. VectifyAI)
* REPO_NAME Repository name (e.g. PageIndex)
*
* Optional environment variables:
* DAYS_BACK How many days back to process (default: 30)
* DRY_RUN If "true", analyse but do not write to GitHub (default: false)
*/
'use strict';
const https = require('https');
// ── Configuration ─────────────────────────────────────────────────────────────
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
const REPO_OWNER = process.env.REPO_OWNER;
const REPO_NAME = process.env.REPO_NAME;
const DAYS_BACK = parseInt(process.env.DAYS_BACK || '30', 10);
const DRY_RUN = process.env.DRY_RUN === 'true';
const STOP_WORDS = new Set([
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
'with','this','that','it','be','are','was','has','have','does','do','how',
'why','when','where','what','which','who','will','can','could','should',
'would','may','might','must','get','got','use','using','used','error',
'issue','bug','feature','request','problem','question','please','just',
'after','before','during','about','from','into','also','then','than',
]);
// ── HTTP helpers ──────────────────────────────────────────────────────────────
/**
* Makes an authenticated GitHub REST API request.
* @param {string} method HTTP method
* @param {string} path API path (e.g. '/repos/owner/repo/issues')
* @param {object|null} body Request body (will be JSON-encoded)
* @returns {Promise<object>}
*/
function githubRequest(method, path, body = null) {
return new Promise((resolve, reject) => {
const payload = body ? JSON.stringify(body) : null;
const options = {
hostname: 'api.github.com',
path,
method,
headers: {
'Authorization': `Bearer ${GITHUB_TOKEN}`,
'Accept': 'application/vnd.github+json',
'User-Agent': 'PageIndex-Backfill-Script/1.0',
'X-GitHub-Api-Version': '2022-11-28',
...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}),
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
if (res.statusCode >= 400) {
reject(new Error(`GitHub API ${method} ${path}${res.statusCode}: ${data}`));
return;
}
try {
resolve(data ? JSON.parse(data) : {});
} catch {
resolve({});
}
});
});
req.on('error', reject);
if (payload) req.write(payload);
req.end();
});
}
/**
* Calls the Anthropic Messages API and returns Claude's text response.
* @param {string} prompt User prompt
* @returns {Promise<string>}
*/
function callClaude(prompt) {
return new Promise((resolve, reject) => {
const body = JSON.stringify({
model: 'claude-haiku-4-5',
max_tokens: 1024,
messages: [{ role: 'user', content: prompt }],
});
const options = {
hostname: 'api.anthropic.com',
path: '/v1/messages',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(body),
'x-api-key': ANTHROPIC_API_KEY,
'anthropic-version': '2023-06-01',
},
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => (data += chunk));
res.on('end', () => {
try {
const parsed = JSON.parse(data);
if (parsed.error) {
reject(new Error(`Anthropic API error: ${parsed.error.message}`));
return;
}
const text = (parsed.content || [])
.filter(b => b.type === 'text')
.map(b => b.text)
.join('');
resolve(text);
} catch (err) {
reject(new Error(`Failed to parse Anthropic response: ${err.message}`));
}
});
});
req.on('error', reject);
req.write(body);
req.end();
});
}
/** Simple sleep helper for rate-limiting. */
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ── Core logic ────────────────────────────────────────────────────────────────
/**
* Fetches open issues created since `since` (ISO 8601 string), paginating as needed.
*/
async function fetchIssuesSince(since) {
const issues = [];
let page = 1;
while (true) {
const data = await githubRequest(
'GET',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&sort=created&direction=desc&since=${since}&per_page=100&page=${page}`
);
if (!Array.isArray(data) || data.length === 0) break;
// Filter out pull requests
issues.push(...data.filter(i => !i.pull_request));
if (data.length < 100) break;
page++;
}
return issues;
}
/**
* Searches for up to 10 candidate duplicate issues for the given issue.
*/
async function findCandidates(issue) {
const keywords = (issue.title || '')
.toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 2 && !STOP_WORDS.has(w))
.slice(0, 6)
.join(' ');
if (!keywords) return [];
const q = encodeURIComponent(
`repo:${REPO_OWNER}/${REPO_NAME} is:issue state:open ${keywords}`
);
const data = await githubRequest('GET', `/search/issues?q=${q}&per_page=15`);
return (data.items || [])
.filter(item => item.number !== issue.number && !item.pull_request)
.slice(0, 10);
}
/**
* Builds the duplicate-detection prompt for Claude.
*/
function buildPrompt(issue, candidates) {
const candidatesText = candidates
.map(c => `#${c.number}: ${c.title}\nURL: ${c.html_url}\n${(c.body || '').substring(0, 500)}`)
.join('\n---\n');
return `You are a GitHub issue triage assistant.
Analyze whether the following open issue is a duplicate of any of the candidate issues listed below.
== NEW ISSUE #${issue.number} ==
Title: ${issue.title}
Body:
${(issue.body || '(no body)').substring(0, 3000)}
== CANDIDATE ISSUES (up to 10) ==
${candidatesText}
RULES:
- Only flag as a duplicate if you are at least 85% confident.
- A minor difference in wording does NOT make an issue non-duplicate if they describe the same underlying problem or feature request.
Respond with ONLY a JSON object (no markdown, no other text):
{
"is_duplicate": true or false,
"duplicate_issues": [array of integer issue numbers that this is a duplicate of, empty if none],
"explanation": "one or two sentences explaining your reasoning"
}`;
}
/**
* Parses Claude's JSON response robustly.
* Returns { is_duplicate, duplicate_issues, explanation } or null on failure.
*/
function parseClaudeResponse(text) {
// Try to extract a JSON object from the response
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (!jsonMatch) return null;
try {
const parsed = JSON.parse(jsonMatch[0]);
return {
is_duplicate: Boolean(parsed.is_duplicate),
duplicate_issues: Array.isArray(parsed.duplicate_issues) ? parsed.duplicate_issues.map(Number) : [],
explanation: String(parsed.explanation || ''),
};
} catch {
return null;
}
}
/**
* Posts a duplicate-found comment on the issue.
*/
async function postDuplicateComment(issueNumber, duplicateIssueNumbers, explanation) {
const links = duplicateIssueNumbers
.map(n => `- #${n}`)
.join('\n');
const body =
`👋 Thank you for taking the time to open this issue!\n\n` +
`After automated analysis, this issue appears to be a duplicate of:\n\n` +
`${links}\n\n` +
`${explanation}\n\n` +
`Please subscribe to the original issue(s) above to follow updates. ` +
`This issue will be automatically closed after a short inactivity period.\n\n` +
`<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":${JSON.stringify(duplicateIssueNumbers)}} -->`;
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`,
{ body }
);
}
/**
* Adds labels to an issue, creating them if they do not exist.
*/
async function ensureLabelAndApply(issueNumber, labelNames) {
const knownLabels = {
duplicate: { color: 'cfd3d7', description: 'This issue or pull request already exists' },
autoclose: { color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
};
for (const name of labelNames) {
try {
await githubRequest('GET', `/repos/${REPO_OWNER}/${REPO_NAME}/labels/${encodeURIComponent(name)}`);
} catch {
const meta = knownLabels[name] || { color: 'ededed', description: '' };
await githubRequest('POST', `/repos/${REPO_OWNER}/${REPO_NAME}/labels`, { name, ...meta });
}
}
await githubRequest(
'POST',
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
{ labels: labelNames }
);
}
/**
* Processes a single issue: finds candidates, asks Claude, and acts on the result.
*/
async function processIssue(issue) {
const num = issue.number;
console.log(`\nProcessing issue #${num}: ${issue.title}`);
// Skip already-labelled issues
const existingLabels = (issue.labels || []).map(l => l.name);
if (existingLabels.includes('duplicate')) {
console.log(` → Already labelled as duplicate, skipping.`);
return;
}
const candidates = await findCandidates(issue);
if (candidates.length === 0) {
console.log(` → No candidates found, skipping.`);
return;
}
console.log(` → Found ${candidates.length} candidate(s): ${candidates.map(c => `#${c.number}`).join(', ')}`);
const prompt = buildPrompt(issue, candidates);
const rawReply = await callClaude(prompt);
const result = parseClaudeResponse(rawReply);
if (!result) {
console.warn(` ⚠️ Could not parse Claude response for #${num}. Raw:\n${rawReply.substring(0, 300)}`);
return;
}
console.log(` → is_duplicate=${result.is_duplicate}, issues=${JSON.stringify(result.duplicate_issues)}`);
console.log(` ${result.explanation}`);
if (!result.is_duplicate || result.duplicate_issues.length === 0) {
console.log(` → Not a duplicate.`);
return;
}
if (DRY_RUN) {
console.log(` [DRY RUN] Would post comment and apply labels to #${num}`);
return;
}
await postDuplicateComment(num, result.duplicate_issues, result.explanation);
await ensureLabelAndApply(num, ['duplicate', 'autoclose']);
console.log(` ✅ Commented and labelled #${num}`);
}
// ── Entry point ───────────────────────────────────────────────────────────────
async function main() {
// Validate required env vars
const missing = ['GITHUB_TOKEN', 'ANTHROPIC_API_KEY', 'REPO_OWNER', 'REPO_NAME']
.filter(k => !process.env[k]);
if (missing.length) {
console.error(`Missing required environment variables: ${missing.join(', ')}`);
process.exit(1);
}
const since = new Date(Date.now() - DAYS_BACK * 24 * 60 * 60 * 1000).toISOString();
console.log(`Backfilling duplicate detection`);
console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`);
console.log(` Days back: ${DAYS_BACK} (since ${since})`);
console.log(` Dry run: ${DRY_RUN}`);
const issues = await fetchIssuesSince(since);
console.log(`\nFetched ${issues.length} open issue(s) to process.`);
for (const issue of issues) {
await processIssue(issue);
// Respect GitHub and Anthropic rate limits
await sleep(2500);
}
console.log('\nBackfill complete.');
}
main().catch(err => {
console.error('Fatal error:', err.message);
process.exit(1);
});