From f56261cee1c8530d633b8535a4bf224946d2b019 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 03:42:51 +0000 Subject: [PATCH 1/6] Initial plan From b3cb9531a4f57831f377df44f526931b72e6eb11 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 03:54:18 +0000 Subject: [PATCH 2/6] Add GitHub Actions workflows for issue deduplication and auto-close Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com> --- .../workflows/autoclose-labeled-issues.yml | 45 +++ .github/workflows/backfill-dedupe.yml | 67 ++++ .github/workflows/issue-dedupe.yml | 235 +++++++++++ .github/workflows/remove-autoclose-label.yml | 48 +++ README.md | 65 +++ scripts/autoclose-labeled-issues.js | 183 +++++++++ scripts/backfill-dedupe.js | 370 ++++++++++++++++++ 7 files changed, 1013 insertions(+) create mode 100644 .github/workflows/autoclose-labeled-issues.yml create mode 100644 .github/workflows/backfill-dedupe.yml create mode 100644 .github/workflows/issue-dedupe.yml create mode 100644 .github/workflows/remove-autoclose-label.yml create mode 100644 scripts/autoclose-labeled-issues.js create mode 100644 scripts/backfill-dedupe.js diff --git a/.github/workflows/autoclose-labeled-issues.yml b/.github/workflows/autoclose-labeled-issues.yml new file mode 100644 index 0000000..158d8e7 --- /dev/null +++ b/.github/workflows/autoclose-labeled-issues.yml @@ -0,0 +1,45 @@ +# Closes open issues that carry the "autoclose" label and have been inactive +# for more than INACTIVITY_DAYS days. Runs on a daily schedule and can also +# be triggered manually. +name: Auto-close Inactive Labeled Issues + +on: + schedule: + # Runs every day at 01:00 UTC + - cron: '0 1 * * *' + workflow_dispatch: + inputs: + inactivity_days: + description: 'Days of inactivity before closing (default: 7)' + required: false + default: '7' + type: number + dry_run: + description: 'Dry run – report but do not actually close issues' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + +permissions: + issues: write + contents: read + +jobs: + autoclose: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Close inactive autoclose-labeled issues + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_OWNER: ${{ github.repository_owner }} + REPO_NAME: ${{ github.event.repository.name }} + # workflow_dispatch overrides the default; schedule uses the default (7) + INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} + DRY_RUN: ${{ inputs.dry_run || 'false' }} + run: node scripts/autoclose-labeled-issues.js diff --git a/.github/workflows/backfill-dedupe.yml b/.github/workflows/backfill-dedupe.yml new file mode 100644 index 0000000..5a85f91 --- /dev/null +++ b/.github/workflows/backfill-dedupe.yml @@ -0,0 +1,67 @@ +# Backfills duplicate detection for historical issues. +# Triggered manually via workflow_dispatch. +name: Backfill Duplicate Detection + +on: + workflow_dispatch: + inputs: + days_back: + description: 'How many days back to look for issues (default: 30)' + required: false + default: '30' + type: number + dry_run: + description: 'Dry run – analyze but do not post comments or apply labels' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + +permissions: + issues: write + contents: read + +jobs: + backfill: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Ensure required labels exist + uses: actions/github-script@v7 + with: + script: | + const labels = [ + { name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' }, + { name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, + ]; + for (const label of labels) { + try { + await github.rest.issues.getLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + name: label.name, + }); + } catch (err) { + if (err.status === 404) { + await github.rest.issues.createLabel({ + owner: context.repo.owner, repo: context.repo.repo, + name: label.name, color: label.color, description: label.description, + }); + core.info(`Created label: ${label.name}`); + } + } + } + + - name: Run backfill script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }} + REPO_OWNER: ${{ github.repository_owner }} + REPO_NAME: ${{ github.event.repository.name }} + DAYS_BACK: ${{ inputs.days_back }} + DRY_RUN: ${{ inputs.dry_run }} + run: node scripts/backfill-dedupe.js diff --git a/.github/workflows/issue-dedupe.yml b/.github/workflows/issue-dedupe.yml new file mode 100644 index 0000000..58d627d --- /dev/null +++ b/.github/workflows/issue-dedupe.yml @@ -0,0 +1,235 @@ +# Detects duplicate issues using Claude Code. +# Triggered automatically when a new issue is opened, or manually for a single issue. +name: Issue Duplicate Detection + +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to check for duplicates' + required: true + type: number + +permissions: + issues: write + contents: read + +jobs: + detect-duplicate: + runs-on: ubuntu-latest + # Skip pull-requests that surface as issues and bot-opened issues + if: > + (github.event_name == 'workflow_dispatch') || + (github.event.issue.pull_request == null && + !endsWith(github.actor, '[bot]') && + github.actor != 'github-actions') + steps: + # ── 1. Ensure required labels exist ───────────────────────────────────── + - name: Ensure labels exist + uses: actions/github-script@v7 + with: + script: | + const labels = [ + { name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' }, + { name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, + ]; + for (const label of labels) { + try { + await github.rest.issues.getLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + name: label.name, + }); + } catch (err) { + if (err.status === 404) { + await github.rest.issues.createLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + name: label.name, + color: label.color, + description: label.description, + }); + core.info(`Created label: ${label.name}`); + } + } + } + + # ── 2. Gather issue data and find candidate duplicates ────────────────── + - name: Gather issue data and candidates + id: data + uses: actions/github-script@v7 + with: + script: | + const issueNumber = + context.eventName === 'issues' + ? context.payload.issue.number + : parseInt(core.getInput('issue_number') || '${{ inputs.issue_number }}'); + + const { data: issue } = await github.rest.issues.get({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + }); + + // Skip already-closed or already-labelled issues + if (issue.state === 'closed') { + core.setOutput('skip', 'true'); + core.info('Issue is already closed – skipping.'); + return; + } + if (issue.labels.some(l => l.name === 'duplicate')) { + core.setOutput('skip', 'true'); + core.info('Issue already has "duplicate" label – skipping.'); + return; + } + + // Extract meaningful keywords from the title + const stopWords = new Set([ + 'a','an','the','is','in','on','at','to','for','of','and','or','but','not', + 'with','this','that','it','be','are','was','has','have','does','do','how', + 'why','when','where','what','which','who','will','can','could','should', + 'would','may','might','must','get','got','use','using','used','error', + 'issue','bug','feature','request','problem','question','please','just', + 'after','before','during','about','from','into','also','then','than', + ]); + const keywords = issue.title + .toLowerCase() + .replace(/[^a-z0-9\s]/g, ' ') + .split(/\s+/) + .filter(w => w.length > 2 && !stopWords.has(w)) + .slice(0, 6) + .join(' '); + + let candidates = []; + if (keywords) { + try { + const q = `repo:${context.repo.owner}/${context.repo.repo} is:issue state:open ${keywords}`; + const { data: results } = await github.rest.search.issuesAndPullRequests({ + q, + per_page: 15, + }); + candidates = results.items + .filter(item => item.number !== issueNumber && !item.pull_request) + .slice(0, 10); + } catch (err) { + core.warning('GitHub search failed: ' + err.message); + } + } + + if (candidates.length === 0) { + core.setOutput('skip', 'true'); + core.info('No candidate issues found – skipping Claude analysis.'); + return; + } + + core.setOutput('skip', 'false'); + core.setOutput('issue_number', String(issueNumber)); + core.setOutput('issue_title', issue.title); + core.setOutput('issue_body', (issue.body || '').substring(0, 3000)); + core.setOutput('candidates', + JSON.stringify(candidates.map(c => ({ + number: c.number, + title: c.title, + url: c.html_url, + body: (c.body || '').substring(0, 500), + }))) + ); + + # ── 3. Write data files (avoids YAML-injection of arbitrary text) ─────── + - name: Write issue data to files + if: steps.data.outputs.skip == 'false' + env: + ISSUE_TITLE: ${{ steps.data.outputs.issue_title }} + ISSUE_BODY: ${{ steps.data.outputs.issue_body }} + CANDIDATES: ${{ steps.data.outputs.candidates }} + run: | + printf '%s' "$ISSUE_TITLE" > /tmp/issue-title.txt + printf '%s' "$ISSUE_BODY" > /tmp/issue-body.txt + printf '%s' "$CANDIDATES" > /tmp/issue-candidates.json + + # ── 4. Ask Claude to decide whether this is a duplicate ───────────────── + - name: Run Claude duplicate analysis + if: steps.data.outputs.skip == 'false' + uses: anthropics/claude-code-action@v1 + with: + anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }} + github_token: ${{ secrets.GITHUB_TOKEN }} + track_progress: 'false' + prompt: | + You are a GitHub issue triage assistant. + + Analyze whether issue #${{ steps.data.outputs.issue_number }} in this repository + is a duplicate of any existing open issues. + + The issue data is stored in temporary files on this runner: + - /tmp/issue-title.txt — title of the new issue + - /tmp/issue-body.txt — body of the new issue + - /tmp/issue-candidates.json — JSON array of up to 10 candidate issues + (each has: number, title, url, body) + + Read those files first, then follow these rules: + + 1. Compare the new issue against every candidate. + Focus on whether they describe the *same underlying problem or request*. + 2. Only flag as a duplicate if you are at least 85 % confident. + Superficial wording differences do NOT make an issue non-duplicate. + 3. IF the new issue IS a duplicate: + a. Post a friendly, helpful comment on issue #${{ steps.data.outputs.issue_number }}. + The comment must: + - Thank the reporter + - Explain which existing issue(s) it duplicates and why (include markdown links) + - Invite them to subscribe to the original for updates + b. The LAST line of the comment must be exactly (fill in real numbers): + + Example: + 4. IF the issue is NOT a duplicate, or you are unsure: + - Do NOT post any comment. + - Do NOT take any other action. + + # ── 5. Parse Claude's comment and apply labels ────────────────────────── + - name: Apply labels if duplicate found + if: steps.data.outputs.skip == 'false' + uses: actions/github-script@v7 + with: + script: | + const issueNumber = parseInt('${{ steps.data.outputs.issue_number }}'); + + // Allow a moment for the comment to land + await new Promise(r => setTimeout(r, 5000)); + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + per_page: 50, + }); + + const pattern = //; + let isDuplicate = false; + + for (const comment of [...comments].reverse()) { + const m = comment.body.match(pattern); + if (m) { + try { + const result = JSON.parse(m[1]); + isDuplicate = result.is_duplicate === true; + } catch (err) { + core.warning('Failed to parse DEDUPE_RESULT JSON: ' + err.message); + } + break; + } + } + + if (isDuplicate) { + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issueNumber, + labels: ['duplicate', 'autoclose'], + }); + core.info(`✅ Applied "duplicate" and "autoclose" labels to #${issueNumber}`); + } else { + core.info(`ℹ️ Issue #${issueNumber} is not a duplicate – no labels applied.`); + } diff --git a/.github/workflows/remove-autoclose-label.yml b/.github/workflows/remove-autoclose-label.yml new file mode 100644 index 0000000..38fc8ee --- /dev/null +++ b/.github/workflows/remove-autoclose-label.yml @@ -0,0 +1,48 @@ +# Removes the "autoclose" label whenever a human (non-bot) posts a new comment +# on an issue that carries the label. This resets the inactivity clock. +name: Remove Autoclose Label on Human Activity + +on: + issue_comment: + types: [created] + +permissions: + issues: write + +jobs: + remove-autoclose: + # Only run for issue comments (not PR comments) + if: ${{ github.event.issue.pull_request == null }} + runs-on: ubuntu-latest + steps: + - name: Remove autoclose label if human commented + uses: actions/github-script@v7 + with: + script: | + const actor = context.actor; + + // Ignore bot accounts + if (actor.endsWith('[bot]') || actor === 'github-actions') { + core.info(`Skipping bot comment from ${actor}`); + return; + } + + const issue = context.payload.issue; + const labels = (issue.labels || []).map(l => l.name); + + if (!labels.includes('autoclose')) { + core.info('Issue does not have "autoclose" label – nothing to do.'); + return; + } + + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + name: 'autoclose', + }); + + core.info( + `Removed "autoclose" label from #${issue.number} ` + + `after human activity by ${actor}` + ); diff --git a/README.md b/README.md index 7180efd..131cded 100644 --- a/README.md +++ b/README.md @@ -267,4 +267,69 @@ Leave us a star 🌟 if you like our project. Thank you! --- +## 🤖 GitHub Automation + +This repository uses automated GitHub Actions workflows to keep the issue tracker tidy. + +### Overview + +| Workflow | Trigger | Purpose | +|---|---|---| +| `issue-dedupe.yml` | Issue opened · `workflow_dispatch` | Detects duplicate issues using Claude and labels them | +| `backfill-dedupe.yml` | `workflow_dispatch` | Runs duplicate detection over historical issues | +| `autoclose-labeled-issues.yml` | Daily schedule · `workflow_dispatch` | Closes issues labelled `autoclose` after N days of inactivity | +| `remove-autoclose-label.yml` | Issue comment created | Removes the `autoclose` label when a human posts a new comment | + +### Required Secrets + +Add the following secret to the repository (**Settings → Secrets and variables → Actions**): + +| Secret | Description | +|---|---| +| `AUTHROPIC_API_KEY` | Your Anthropic API key (used by `anthropics/claude-code-action`) | + +`GITHUB_TOKEN` is provided automatically by GitHub Actions and does not need to be added manually. + +### Labels + +The workflows create the following labels automatically if they do not exist: + +| Label | Description | +|---|---| +| `duplicate` | Marks issues identified as duplicates | +| `autoclose` | Marks issues that will be automatically closed after inactivity | + +### Running the Backfill + +To scan historical issues for duplicates, trigger the **Backfill Duplicate Detection** workflow manually from the **Actions** tab: + +- **`days_back`** (default `30`) — how many days into the past to scan +- **`dry_run`** (default `false`) — set to `true` to preview results without modifying issues + +``` +Actions → Backfill Duplicate Detection → Run workflow +``` + +### Changing the Inactivity Threshold + +The default inactivity period before an `autoclose`-labelled issue is closed is **7 days**. + +To change it for a one-off run, trigger **Auto-close Inactive Labeled Issues** with the `inactivity_days` input. + +To change the default permanently, edit the `INACTIVITY_DAYS` env variable default in `.github/workflows/autoclose-labeled-issues.yml`: + +```yaml +INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} # ← change '7' here +``` + +### How Duplicate Detection Works + +1. When a new issue is opened, keywords from the title are used to search for the top 10 most relevant existing open issues via the GitHub Search API. +2. The issue title, body, and candidate list are passed to **Claude** (`anthropics/claude-code-action`) with a structured prompt. +3. Claude posts a comment on the issue (if it is highly confident it is a duplicate), including links to the original issue(s) and a brief explanation. +4. A follow-up step reads the comment, extracts the machine-readable result, and applies the `duplicate` and `autoclose` labels. +5. If Claude is not confident, no comment or labels are applied. + +--- + © 2025 [Vectify AI](https://vectify.ai) diff --git a/scripts/autoclose-labeled-issues.js b/scripts/autoclose-labeled-issues.js new file mode 100644 index 0000000..8e85da8 --- /dev/null +++ b/scripts/autoclose-labeled-issues.js @@ -0,0 +1,183 @@ +/** + * scripts/autoclose-labeled-issues.js + * + * Closes open issues that carry the "autoclose" label and have been inactive + * (no updates) for more than INACTIVITY_DAYS days. + * + * Required environment variables: + * GITHUB_TOKEN – GitHub Actions token (or PAT with repo:issues write access) + * REPO_OWNER – Repository owner (e.g. VectifyAI) + * REPO_NAME – Repository name (e.g. PageIndex) + * + * Optional environment variables: + * INACTIVITY_DAYS – Days of inactivity before closing (default: 7) + * DRY_RUN – If "true", report but do not close issues (default: false) + */ + +'use strict'; + +const https = require('https'); + +// ── Configuration ───────────────────────────────────────────────────────────── + +const GITHUB_TOKEN = process.env.GITHUB_TOKEN; +const REPO_OWNER = process.env.REPO_OWNER; +const REPO_NAME = process.env.REPO_NAME; +const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10); +const DRY_RUN = process.env.DRY_RUN === 'true'; + +// ── HTTP helper ─────────────────────────────────────────────────────────────── + +function githubRequest(method, path, body = null) { + return new Promise((resolve, reject) => { + const payload = body ? JSON.stringify(body) : null; + const options = { + hostname: 'api.github.com', + path, + method, + headers: { + 'Authorization': `Bearer ${GITHUB_TOKEN}`, + 'Accept': 'application/vnd.github+json', + 'User-Agent': 'PageIndex-Autoclose-Script/1.0', + 'X-GitHub-Api-Version': '2022-11-28', + ...(payload ? { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(payload), + } : {}), + }, + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', chunk => (data += chunk)); + res.on('end', () => { + if (res.statusCode >= 400) { + reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`)); + return; + } + try { + resolve(data ? JSON.parse(data) : {}); + } catch { + resolve({}); + } + }); + }); + req.on('error', reject); + if (payload) req.write(payload); + req.end(); + }); +} + +/** Simple sleep helper for rate-limiting. */ +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +// ── Core logic ──────────────────────────────────────────────────────────────── + +/** + * Fetches all open issues with the "autoclose" label, paginating as needed. + */ +async function fetchAutocloseIssues() { + const issues = []; + let page = 1; + while (true) { + const data = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}` + ); + if (!Array.isArray(data) || data.length === 0) break; + // Filter out any pull requests that may surface + issues.push(...data.filter(i => !i.pull_request)); + if (data.length < 100) break; + page++; + } + return issues; +} + +/** + * Closes a single issue with a polite explanatory comment. + */ +async function closeIssue(issueNumber, inactivityDays) { + const body = + `This issue has been automatically closed because it was marked as a **duplicate** ` + + `and has had no new activity for ${inactivityDays} day(s).\n\n` + + `If you believe this was closed in error, please reopen the issue and leave a comment. ` + + `New human activity will prevent automatic closure in the future.\n\n` + + `Thank you for your contribution! 🙏`; + + // Post closing comment first + await githubRequest( + 'POST', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`, + { body } + ); + + // Close the issue + await githubRequest( + 'PATCH', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`, + { state: 'closed', state_reason: 'not_planned' } + ); +} + +// ── Entry point ─────────────────────────────────────────────────────────────── + +async function main() { + // Validate required env vars + const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'] + .filter(k => !process.env[k]); + if (missing.length) { + console.error(`Missing required environment variables: ${missing.join(', ')}`); + process.exit(1); + } + + const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000); + + console.log(`Auto-close inactive labelled issues`); + console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); + console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`); + console.log(` Dry run: ${DRY_RUN}`); + + const issues = await fetchAutocloseIssues(); + console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`); + + let closedCount = 0; + let skippedCount = 0; + + for (const issue of issues) { + const lastActivity = new Date(issue.updated_at); + const inactive = lastActivity < cutoff; + const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24)); + + if (!inactive) { + console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`); + skippedCount++; + continue; + } + + console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`); + + if (DRY_RUN) { + console.log(` [DRY RUN] Would close issue #${issue.number}`); + closedCount++; + continue; + } + + try { + await closeIssue(issue.number, INACTIVITY_DAYS); + console.log(` ✅ Closed issue #${issue.number}`); + closedCount++; + } catch (err) { + console.error(` ❌ Failed to close #${issue.number}: ${err.message}`); + } + + // Respect GitHub's secondary rate limit + await sleep(1000); + } + + console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`); +} + +main().catch(err => { + console.error('Fatal error:', err.message); + process.exit(1); +}); diff --git a/scripts/backfill-dedupe.js b/scripts/backfill-dedupe.js new file mode 100644 index 0000000..ade1039 --- /dev/null +++ b/scripts/backfill-dedupe.js @@ -0,0 +1,370 @@ +/** + * scripts/backfill-dedupe.js + * + * Backfills duplicate detection for historical issues. + * Fetches issues created within the last DAYS_BACK days, searches for + * candidate duplicates via the GitHub Search API, and asks the Anthropic + * API to determine whether each issue is a duplicate. + * + * Required environment variables: + * GITHUB_TOKEN – GitHub Actions token (or PAT with repo access) + * ANTHROPIC_API_KEY – Anthropic API key (mapped from AUTHROPIC_API_KEY secret) + * REPO_OWNER – Repository owner (e.g. VectifyAI) + * REPO_NAME – Repository name (e.g. PageIndex) + * + * Optional environment variables: + * DAYS_BACK – How many days back to process (default: 30) + * DRY_RUN – If "true", analyse but do not write to GitHub (default: false) + */ + +'use strict'; + +const https = require('https'); + +// ── Configuration ───────────────────────────────────────────────────────────── + +const GITHUB_TOKEN = process.env.GITHUB_TOKEN; +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +const REPO_OWNER = process.env.REPO_OWNER; +const REPO_NAME = process.env.REPO_NAME; +const DAYS_BACK = parseInt(process.env.DAYS_BACK || '30', 10); +const DRY_RUN = process.env.DRY_RUN === 'true'; + +const STOP_WORDS = new Set([ + 'a','an','the','is','in','on','at','to','for','of','and','or','but','not', + 'with','this','that','it','be','are','was','has','have','does','do','how', + 'why','when','where','what','which','who','will','can','could','should', + 'would','may','might','must','get','got','use','using','used','error', + 'issue','bug','feature','request','problem','question','please','just', + 'after','before','during','about','from','into','also','then','than', +]); + +// ── HTTP helpers ────────────────────────────────────────────────────────────── + +/** + * Makes an authenticated GitHub REST API request. + * @param {string} method HTTP method + * @param {string} path API path (e.g. '/repos/owner/repo/issues') + * @param {object|null} body Request body (will be JSON-encoded) + * @returns {Promise} + */ +function githubRequest(method, path, body = null) { + return new Promise((resolve, reject) => { + const payload = body ? JSON.stringify(body) : null; + const options = { + hostname: 'api.github.com', + path, + method, + headers: { + 'Authorization': `Bearer ${GITHUB_TOKEN}`, + 'Accept': 'application/vnd.github+json', + 'User-Agent': 'PageIndex-Backfill-Script/1.0', + 'X-GitHub-Api-Version': '2022-11-28', + ...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}), + }, + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', chunk => (data += chunk)); + res.on('end', () => { + if (res.statusCode >= 400) { + reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`)); + return; + } + try { + resolve(data ? JSON.parse(data) : {}); + } catch { + resolve({}); + } + }); + }); + req.on('error', reject); + if (payload) req.write(payload); + req.end(); + }); +} + +/** + * Calls the Anthropic Messages API and returns Claude's text response. + * @param {string} prompt User prompt + * @returns {Promise} + */ +function callClaude(prompt) { + return new Promise((resolve, reject) => { + const body = JSON.stringify({ + model: 'claude-haiku-4-5', + max_tokens: 1024, + messages: [{ role: 'user', content: prompt }], + }); + + const options = { + hostname: 'api.anthropic.com', + path: '/v1/messages', + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(body), + 'x-api-key': ANTHROPIC_API_KEY, + 'anthropic-version': '2023-06-01', + }, + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', chunk => (data += chunk)); + res.on('end', () => { + try { + const parsed = JSON.parse(data); + if (parsed.error) { + reject(new Error(`Anthropic API error: ${parsed.error.message}`)); + return; + } + const text = (parsed.content || []) + .filter(b => b.type === 'text') + .map(b => b.text) + .join(''); + resolve(text); + } catch (err) { + reject(new Error(`Failed to parse Anthropic response: ${err.message}`)); + } + }); + }); + req.on('error', reject); + req.write(body); + req.end(); + }); +} + +/** Simple sleep helper for rate-limiting. */ +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +// ── Core logic ──────────────────────────────────────────────────────────────── + +/** + * Fetches open issues created since `since` (ISO 8601 string), paginating as needed. + */ +async function fetchIssuesSince(since) { + const issues = []; + let page = 1; + while (true) { + const data = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&sort=created&direction=desc&since=${since}&per_page=100&page=${page}` + ); + if (!Array.isArray(data) || data.length === 0) break; + // Filter out pull requests + issues.push(...data.filter(i => !i.pull_request)); + if (data.length < 100) break; + page++; + } + return issues; +} + +/** + * Searches for up to 10 candidate duplicate issues for the given issue. + */ +async function findCandidates(issue) { + const keywords = (issue.title || '') + .toLowerCase() + .replace(/[^a-z0-9\s]/g, ' ') + .split(/\s+/) + .filter(w => w.length > 2 && !STOP_WORDS.has(w)) + .slice(0, 6) + .join(' '); + + if (!keywords) return []; + + const q = encodeURIComponent( + `repo:${REPO_OWNER}/${REPO_NAME} is:issue state:open ${keywords}` + ); + + const data = await githubRequest('GET', `/search/issues?q=${q}&per_page=15`); + return (data.items || []) + .filter(item => item.number !== issue.number && !item.pull_request) + .slice(0, 10); +} + +/** + * Builds the duplicate-detection prompt for Claude. + */ +function buildPrompt(issue, candidates) { + const candidatesText = candidates + .map(c => `#${c.number}: ${c.title}\nURL: ${c.html_url}\n${(c.body || '').substring(0, 500)}`) + .join('\n---\n'); + + return `You are a GitHub issue triage assistant. + +Analyze whether the following open issue is a duplicate of any of the candidate issues listed below. + +== NEW ISSUE #${issue.number} == +Title: ${issue.title} +Body: +${(issue.body || '(no body)').substring(0, 3000)} + +== CANDIDATE ISSUES (up to 10) == +${candidatesText} + +RULES: +- Only flag as a duplicate if you are at least 85% confident. +- A minor difference in wording does NOT make an issue non-duplicate if they describe the same underlying problem or feature request. + +Respond with ONLY a JSON object (no markdown, no other text): +{ + "is_duplicate": true or false, + "duplicate_issues": [array of integer issue numbers that this is a duplicate of, empty if none], + "explanation": "one or two sentences explaining your reasoning" +}`; +} + +/** + * Parses Claude's JSON response robustly. + * Returns { is_duplicate, duplicate_issues, explanation } or null on failure. + */ +function parseClaudeResponse(text) { + // Try to extract a JSON object from the response + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (!jsonMatch) return null; + try { + const parsed = JSON.parse(jsonMatch[0]); + return { + is_duplicate: Boolean(parsed.is_duplicate), + duplicate_issues: Array.isArray(parsed.duplicate_issues) ? parsed.duplicate_issues.map(Number) : [], + explanation: String(parsed.explanation || ''), + }; + } catch { + return null; + } +} + +/** + * Posts a duplicate-found comment on the issue. + */ +async function postDuplicateComment(issueNumber, duplicateIssueNumbers, explanation) { + const links = duplicateIssueNumbers + .map(n => `- #${n}`) + .join('\n'); + + const body = + `👋 Thank you for taking the time to open this issue!\n\n` + + `After automated analysis, this issue appears to be a duplicate of:\n\n` + + `${links}\n\n` + + `${explanation}\n\n` + + `Please subscribe to the original issue(s) above to follow updates. ` + + `This issue will be automatically closed after a short inactivity period.\n\n` + + ``; + + await githubRequest( + 'POST', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`, + { body } + ); +} + +/** + * Adds labels to an issue, creating them if they do not exist. + */ +async function ensureLabelAndApply(issueNumber, labelNames) { + const knownLabels = { + duplicate: { color: 'cfd3d7', description: 'This issue or pull request already exists' }, + autoclose: { color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, + }; + + for (const name of labelNames) { + try { + await githubRequest('GET', `/repos/${REPO_OWNER}/${REPO_NAME}/labels/${encodeURIComponent(name)}`); + } catch { + const meta = knownLabels[name] || { color: 'ededed', description: '' }; + await githubRequest('POST', `/repos/${REPO_OWNER}/${REPO_NAME}/labels`, { name, ...meta }); + } + } + + await githubRequest( + 'POST', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`, + { labels: labelNames } + ); +} + +/** + * Processes a single issue: finds candidates, asks Claude, and acts on the result. + */ +async function processIssue(issue) { + const num = issue.number; + console.log(`\nProcessing issue #${num}: ${issue.title}`); + + // Skip already-labelled issues + const existingLabels = (issue.labels || []).map(l => l.name); + if (existingLabels.includes('duplicate')) { + console.log(` → Already labelled as duplicate, skipping.`); + return; + } + + const candidates = await findCandidates(issue); + if (candidates.length === 0) { + console.log(` → No candidates found, skipping.`); + return; + } + console.log(` → Found ${candidates.length} candidate(s): ${candidates.map(c => `#${c.number}`).join(', ')}`); + + const prompt = buildPrompt(issue, candidates); + const rawReply = await callClaude(prompt); + const result = parseClaudeResponse(rawReply); + + if (!result) { + console.warn(` ⚠️ Could not parse Claude response for #${num}. Raw:\n${rawReply.substring(0, 300)}`); + return; + } + + console.log(` → is_duplicate=${result.is_duplicate}, issues=${JSON.stringify(result.duplicate_issues)}`); + console.log(` ${result.explanation}`); + + if (!result.is_duplicate || result.duplicate_issues.length === 0) { + console.log(` → Not a duplicate.`); + return; + } + + if (DRY_RUN) { + console.log(` [DRY RUN] Would post comment and apply labels to #${num}`); + return; + } + + await postDuplicateComment(num, result.duplicate_issues, result.explanation); + await ensureLabelAndApply(num, ['duplicate', 'autoclose']); + console.log(` ✅ Commented and labelled #${num}`); +} + +// ── Entry point ─────────────────────────────────────────────────────────────── + +async function main() { + // Validate required env vars + const missing = ['GITHUB_TOKEN', 'ANTHROPIC_API_KEY', 'REPO_OWNER', 'REPO_NAME'] + .filter(k => !process.env[k]); + if (missing.length) { + console.error(`Missing required environment variables: ${missing.join(', ')}`); + process.exit(1); + } + + const since = new Date(Date.now() - DAYS_BACK * 24 * 60 * 60 * 1000).toISOString(); + + console.log(`Backfilling duplicate detection`); + console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); + console.log(` Days back: ${DAYS_BACK} (since ${since})`); + console.log(` Dry run: ${DRY_RUN}`); + + const issues = await fetchIssuesSince(since); + console.log(`\nFetched ${issues.length} open issue(s) to process.`); + + for (const issue of issues) { + await processIssue(issue); + // Respect GitHub and Anthropic rate limits + await sleep(2500); + } + + console.log('\nBackfill complete.'); +} + +main().catch(err => { + console.error('Fatal error:', err.message); + process.exit(1); +}); From fd9330c4349a6ed5a3b0fb8229a13755808118db Mon Sep 17 00:00:00 2001 From: BukeLy Date: Mon, 2 Mar 2026 17:05:44 +0800 Subject: [PATCH 3/6] Refactor issue dedup system to use claude-code-action with /dedupe command Replace the copilot-generated inline search logic with a claude-code-action based architecture inspired by anthropic/claude-code's approach: - Add .claude/commands/dedupe.md with 5-parallel-search strategy - Add scripts/comment-on-duplicates.sh with 3-day grace period warning - Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command - Rewrite autoclose script to check bot comments, human activity, and thumbsdown - Rewrite backfill to trigger dedupe workflow per issue with rate limiting - Add concurrency control, timeout, input validation, and rate limit retry - Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger) --- .claude/commands/dedupe.md | 69 ++++ .../workflows/autoclose-labeled-issues.yml | 30 +- .github/workflows/backfill-dedupe.yml | 89 ++--- .github/workflows/issue-dedupe.yml | 229 ++--------- .github/workflows/remove-autoclose-label.yml | 41 +- scripts/autoclose-labeled-issues.js | 231 ++++++----- scripts/backfill-dedupe.js | 370 ------------------ scripts/comment-on-duplicates.sh | 106 +++++ 8 files changed, 413 insertions(+), 752 deletions(-) create mode 100644 .claude/commands/dedupe.md delete mode 100644 scripts/backfill-dedupe.js create mode 100755 scripts/comment-on-duplicates.sh diff --git a/.claude/commands/dedupe.md b/.claude/commands/dedupe.md new file mode 100644 index 0000000..d649bb1 --- /dev/null +++ b/.claude/commands/dedupe.md @@ -0,0 +1,69 @@ +--- +allowed-tools: + - Bash(gh:*) + - Bash(./scripts/comment-on-duplicates.sh:*) +--- + +You are a GitHub issue deduplication assistant. Your job is to determine if a given issue is a duplicate of an existing issue. + +## Input + +The issue to check: $ARGUMENTS + +## Steps + +### 1. Pre-checks + +First, check if the issue should be skipped: + +``` +gh issue view --json state,labels,title,body,comments +``` + +Skip if: +- The issue is already closed +- The issue already has a `duplicate` label +- The issue already has a dedupe comment (check comments for "possible duplicate") + +### 2. Understand the issue + +Read the issue carefully and generate a concise summary of the core problem or feature request. Extract 3-5 key technical terms or concepts. + +### 3. Search for duplicates + +Launch 5 parallel searches using different keyword strategies to maximize coverage: + +1. **Exact terms**: Use the most specific technical terms from the issue title +2. **Synonyms**: Use alternative phrasings for the core problem +3. **Error messages**: If the issue contains error messages, search for those +4. **Component names**: Search by the specific component/module mentioned +5. **Broad category**: Search by the general category of the issue + +For each search, use: +``` +gh search issues "" --repo $REPOSITORY --limit 20 +``` + +### 4. Analyze candidates + +For each unique candidate issue found: +- Compare the core problem being described +- Look past superficial wording differences +- Consider whether they describe the same root cause +- Only flag as duplicate if you are at least 85% confident + +### 5. Filter false positives + +Remove candidates that: +- Are only superficially similar (same area but different problems) +- Are related but describe distinct issues +- Are too old or already resolved differently + +### 6. Report results + +If you found duplicates (max 3), call: +``` +./scripts/comment-on-duplicates.sh --base-issue --potential-duplicates ... +``` + +If no duplicates found, do nothing and report that the issue appears to be unique. diff --git a/.github/workflows/autoclose-labeled-issues.yml b/.github/workflows/autoclose-labeled-issues.yml index 158d8e7..8499dbd 100644 --- a/.github/workflows/autoclose-labeled-issues.yml +++ b/.github/workflows/autoclose-labeled-issues.yml @@ -1,21 +1,14 @@ -# Closes open issues that carry the "autoclose" label and have been inactive -# for more than INACTIVITY_DAYS days. Runs on a daily schedule and can also -# be triggered manually. -name: Auto-close Inactive Labeled Issues +# Auto-closes duplicate issues after 3 days if no human activity or thumbs-down reaction. +# Runs daily at 09:00 UTC. +name: Auto-close Duplicate Issues on: schedule: - # Runs every day at 01:00 UTC - - cron: '0 1 * * *' + - cron: '0 9 * * *' workflow_dispatch: inputs: - inactivity_days: - description: 'Days of inactivity before closing (default: 7)' - required: false - default: '7' - type: number dry_run: - description: 'Dry run – report but do not actually close issues' + description: 'Dry run - report but do not close issues' required: false default: 'false' type: choice @@ -30,16 +23,15 @@ permissions: jobs: autoclose: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Close inactive autoclose-labeled issues + - name: Close inactive duplicate issues env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - REPO_OWNER: ${{ github.repository_owner }} - REPO_NAME: ${{ github.event.repository.name }} - # workflow_dispatch overrides the default; schedule uses the default (7) - INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} - DRY_RUN: ${{ inputs.dry_run || 'false' }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_OWNER: ${{ github.repository_owner }} + REPO_NAME: ${{ github.event.repository.name }} + DRY_RUN: ${{ inputs.dry_run || 'false' }} run: node scripts/autoclose-labeled-issues.js diff --git a/.github/workflows/backfill-dedupe.yml b/.github/workflows/backfill-dedupe.yml index 5a85f91..72c49e9 100644 --- a/.github/workflows/backfill-dedupe.yml +++ b/.github/workflows/backfill-dedupe.yml @@ -1,4 +1,4 @@ -# Backfills duplicate detection for historical issues. +# Backfills duplicate detection for historical issues using Claude Code. # Triggered manually via workflow_dispatch. name: Backfill Duplicate Detection @@ -10,58 +10,55 @@ on: required: false default: '30' type: number - dry_run: - description: 'Dry run – analyze but do not post comments or apply labels' - required: false - default: 'false' - type: choice - options: - - 'false' - - 'true' permissions: - issues: write contents: read + issues: write + actions: write jobs: backfill: runs-on: ubuntu-latest + timeout-minutes: 10 steps: - - name: Checkout repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Ensure required labels exist - uses: actions/github-script@v7 - with: - script: | - const labels = [ - { name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' }, - { name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, - ]; - for (const label of labels) { - try { - await github.rest.issues.getLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - name: label.name, - }); - } catch (err) { - if (err.status === 404) { - await github.rest.issues.createLabel({ - owner: context.repo.owner, repo: context.repo.repo, - name: label.name, color: label.color, description: label.description, - }); - core.info(`Created label: ${label.name}`); - } - } - } - - - name: Run backfill script + - name: Fetch issues and run dedupe env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }} - REPO_OWNER: ${{ github.repository_owner }} - REPO_NAME: ${{ github.event.repository.name }} - DAYS_BACK: ${{ inputs.days_back }} - DRY_RUN: ${{ inputs.dry_run }} - run: node scripts/backfill-dedupe.js + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + DAYS_BACK: ${{ inputs.days_back || '30' }} + run: | + if ! [[ "$DAYS_BACK" =~ ^[0-9]+$ ]]; then + echo "Error: days_back must be a number" + exit 1 + fi + + SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ) + echo "Fetching open issues since $SINCE" + + # Get open issues, filter out PRs and already-labeled ones + ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \ + --jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number") + + if [ -z "$ISSUES" ]; then + echo "No issues to process" + exit 0 + fi + + BATCH_SIZE=10 + COUNT=0 + echo "Issues to process: $ISSUES" + for NUMBER in $ISSUES; do + echo "Triggering dedupe for issue #$NUMBER" + gh workflow run issue-dedupe.yml --repo "$REPO" -f issue_number="$NUMBER" + COUNT=$((COUNT + 1)) + if [ $((COUNT % BATCH_SIZE)) -eq 0 ]; then + echo "Pausing 60s after $COUNT issues..." + sleep 60 + else + sleep 5 + fi + done + + echo "Backfill triggered for $COUNT issues" diff --git a/.github/workflows/issue-dedupe.yml b/.github/workflows/issue-dedupe.yml index 58d627d..88981c2 100644 --- a/.github/workflows/issue-dedupe.yml +++ b/.github/workflows/issue-dedupe.yml @@ -1,4 +1,4 @@ -# Detects duplicate issues using Claude Code. +# Detects duplicate issues using Claude Code with the /dedupe command. # Triggered automatically when a new issue is opened, or manually for a single issue. name: Issue Duplicate Detection @@ -10,15 +10,20 @@ on: issue_number: description: 'Issue number to check for duplicates' required: true - type: number + type: string permissions: - issues: write contents: read + issues: write + +concurrency: + group: dedupe-${{ github.event.issue.number || inputs.issue_number }} + cancel-in-progress: true jobs: detect-duplicate: runs-on: ubuntu-latest + timeout-minutes: 10 # Skip pull-requests that surface as issues and bot-opened issues if: > (github.event_name == 'workflow_dispatch') || @@ -26,210 +31,26 @@ jobs: !endsWith(github.actor, '[bot]') && github.actor != 'github-actions') steps: - # ── 1. Ensure required labels exist ───────────────────────────────────── - - name: Ensure labels exist - uses: actions/github-script@v7 - with: - script: | - const labels = [ - { name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' }, - { name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, - ]; - for (const label of labels) { - try { - await github.rest.issues.getLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - name: label.name, - }); - } catch (err) { - if (err.status === 404) { - await github.rest.issues.createLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - name: label.name, - color: label.color, - description: label.description, - }); - core.info(`Created label: ${label.name}`); - } - } - } + - uses: actions/checkout@v4 - # ── 2. Gather issue data and find candidate duplicates ────────────────── - - name: Gather issue data and candidates - id: data - uses: actions/github-script@v7 - with: - script: | - const issueNumber = - context.eventName === 'issues' - ? context.payload.issue.number - : parseInt(core.getInput('issue_number') || '${{ inputs.issue_number }}'); - - const { data: issue } = await github.rest.issues.get({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issueNumber, - }); - - // Skip already-closed or already-labelled issues - if (issue.state === 'closed') { - core.setOutput('skip', 'true'); - core.info('Issue is already closed – skipping.'); - return; - } - if (issue.labels.some(l => l.name === 'duplicate')) { - core.setOutput('skip', 'true'); - core.info('Issue already has "duplicate" label – skipping.'); - return; - } - - // Extract meaningful keywords from the title - const stopWords = new Set([ - 'a','an','the','is','in','on','at','to','for','of','and','or','but','not', - 'with','this','that','it','be','are','was','has','have','does','do','how', - 'why','when','where','what','which','who','will','can','could','should', - 'would','may','might','must','get','got','use','using','used','error', - 'issue','bug','feature','request','problem','question','please','just', - 'after','before','during','about','from','into','also','then','than', - ]); - const keywords = issue.title - .toLowerCase() - .replace(/[^a-z0-9\s]/g, ' ') - .split(/\s+/) - .filter(w => w.length > 2 && !stopWords.has(w)) - .slice(0, 6) - .join(' '); - - let candidates = []; - if (keywords) { - try { - const q = `repo:${context.repo.owner}/${context.repo.repo} is:issue state:open ${keywords}`; - const { data: results } = await github.rest.search.issuesAndPullRequests({ - q, - per_page: 15, - }); - candidates = results.items - .filter(item => item.number !== issueNumber && !item.pull_request) - .slice(0, 10); - } catch (err) { - core.warning('GitHub search failed: ' + err.message); - } - } - - if (candidates.length === 0) { - core.setOutput('skip', 'true'); - core.info('No candidate issues found – skipping Claude analysis.'); - return; - } - - core.setOutput('skip', 'false'); - core.setOutput('issue_number', String(issueNumber)); - core.setOutput('issue_title', issue.title); - core.setOutput('issue_body', (issue.body || '').substring(0, 3000)); - core.setOutput('candidates', - JSON.stringify(candidates.map(c => ({ - number: c.number, - title: c.title, - url: c.html_url, - body: (c.body || '').substring(0, 500), - }))) - ); - - # ── 3. Write data files (avoids YAML-injection of arbitrary text) ─────── - - name: Write issue data to files - if: steps.data.outputs.skip == 'false' + - name: Determine issue number + id: issue env: - ISSUE_TITLE: ${{ steps.data.outputs.issue_title }} - ISSUE_BODY: ${{ steps.data.outputs.issue_body }} - CANDIDATES: ${{ steps.data.outputs.candidates }} + EVENT_NAME: ${{ github.event_name }} + INPUT_NUMBER: ${{ inputs.issue_number }} + ISSUE_NUMBER: ${{ github.event.issue.number }} run: | - printf '%s' "$ISSUE_TITLE" > /tmp/issue-title.txt - printf '%s' "$ISSUE_BODY" > /tmp/issue-body.txt - printf '%s' "$CANDIDATES" > /tmp/issue-candidates.json + if [ "$EVENT_NAME" = "workflow_dispatch" ]; then + echo "number=$INPUT_NUMBER" >> "$GITHUB_OUTPUT" + else + echo "number=$ISSUE_NUMBER" >> "$GITHUB_OUTPUT" + fi - # ── 4. Ask Claude to decide whether this is a duplicate ───────────────── - - name: Run Claude duplicate analysis - if: steps.data.outputs.skip == 'false' - uses: anthropics/claude-code-action@v1 + - uses: anthropics/claude-code-action@v1 + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: + prompt: "/dedupe ${{ github.repository }}/issues/${{ steps.issue.outputs.number }}" anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }} - github_token: ${{ secrets.GITHUB_TOKEN }} - track_progress: 'false' - prompt: | - You are a GitHub issue triage assistant. - - Analyze whether issue #${{ steps.data.outputs.issue_number }} in this repository - is a duplicate of any existing open issues. - - The issue data is stored in temporary files on this runner: - - /tmp/issue-title.txt — title of the new issue - - /tmp/issue-body.txt — body of the new issue - - /tmp/issue-candidates.json — JSON array of up to 10 candidate issues - (each has: number, title, url, body) - - Read those files first, then follow these rules: - - 1. Compare the new issue against every candidate. - Focus on whether they describe the *same underlying problem or request*. - 2. Only flag as a duplicate if you are at least 85 % confident. - Superficial wording differences do NOT make an issue non-duplicate. - 3. IF the new issue IS a duplicate: - a. Post a friendly, helpful comment on issue #${{ steps.data.outputs.issue_number }}. - The comment must: - - Thank the reporter - - Explain which existing issue(s) it duplicates and why (include markdown links) - - Invite them to subscribe to the original for updates - b. The LAST line of the comment must be exactly (fill in real numbers): - - Example: - 4. IF the issue is NOT a duplicate, or you are unsure: - - Do NOT post any comment. - - Do NOT take any other action. - - # ── 5. Parse Claude's comment and apply labels ────────────────────────── - - name: Apply labels if duplicate found - if: steps.data.outputs.skip == 'false' - uses: actions/github-script@v7 - with: - script: | - const issueNumber = parseInt('${{ steps.data.outputs.issue_number }}'); - - // Allow a moment for the comment to land - await new Promise(r => setTimeout(r, 5000)); - - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issueNumber, - per_page: 50, - }); - - const pattern = //; - let isDuplicate = false; - - for (const comment of [...comments].reverse()) { - const m = comment.body.match(pattern); - if (m) { - try { - const result = JSON.parse(m[1]); - isDuplicate = result.is_duplicate === true; - } catch (err) { - core.warning('Failed to parse DEDUPE_RESULT JSON: ' + err.message); - } - break; - } - } - - if (isDuplicate) { - await github.rest.issues.addLabels({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issueNumber, - labels: ['duplicate', 'autoclose'], - }); - core.info(`✅ Applied "duplicate" and "autoclose" labels to #${issueNumber}`); - } else { - core.info(`ℹ️ Issue #${issueNumber} is not a duplicate – no labels applied.`); - } + github_token: ${{ secrets.GITHUB_TOKEN }} + claude_args: "--model claude-sonnet-4-5-20250929" diff --git a/.github/workflows/remove-autoclose-label.yml b/.github/workflows/remove-autoclose-label.yml index 38fc8ee..5411b19 100644 --- a/.github/workflows/remove-autoclose-label.yml +++ b/.github/workflows/remove-autoclose-label.yml @@ -1,6 +1,8 @@ -# Removes the "autoclose" label whenever a human (non-bot) posts a new comment -# on an issue that carries the label. This resets the inactivity clock. -name: Remove Autoclose Label on Human Activity +# Removes the "duplicate" label when a human (non-bot) comments on a +# duplicate-flagged issue, signaling that the issue needs re-evaluation. +# The auto-close script also independently checks for human activity, +# so this provides an additional visible signal. +name: Remove Duplicate Label on Human Activity on: issue_comment: @@ -10,39 +12,34 @@ permissions: issues: write jobs: - remove-autoclose: + remove-label: # Only run for issue comments (not PR comments) - if: ${{ github.event.issue.pull_request == null }} + if: > + github.event.issue.pull_request == null && + !endsWith(github.actor, '[bot]') && + github.actor != 'github-actions' runs-on: ubuntu-latest steps: - - name: Remove autoclose label if human commented + - name: Remove duplicate label if human commented uses: actions/github-script@v7 with: script: | - const actor = context.actor; - - // Ignore bot accounts - if (actor.endsWith('[bot]') || actor === 'github-actions') { - core.info(`Skipping bot comment from ${actor}`); - return; - } - - const issue = context.payload.issue; + const issue = context.payload.issue; const labels = (issue.labels || []).map(l => l.name); - if (!labels.includes('autoclose')) { - core.info('Issue does not have "autoclose" label – nothing to do.'); + if (!labels.includes('duplicate')) { + core.info('Issue does not have "duplicate" label - nothing to do.'); return; } await github.rest.issues.removeLabel({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: context.repo.owner, + repo: context.repo.repo, issue_number: issue.number, - name: 'autoclose', + name: 'duplicate', }); core.info( - `Removed "autoclose" label from #${issue.number} ` + - `after human activity by ${actor}` + `Removed "duplicate" label from #${issue.number} ` + + `after human comment by ${context.actor}` ); diff --git a/scripts/autoclose-labeled-issues.js b/scripts/autoclose-labeled-issues.js index 8e85da8..e3c07f8 100644 --- a/scripts/autoclose-labeled-issues.js +++ b/scripts/autoclose-labeled-issues.js @@ -1,34 +1,32 @@ /** * scripts/autoclose-labeled-issues.js * - * Closes open issues that carry the "autoclose" label and have been inactive - * (no updates) for more than INACTIVITY_DAYS days. + * Auto-closes issues that have a bot "possible duplicate" comment older than + * 3 days, unless: + * - A human has commented after the bot's duplicate comment + * - The author reacted with thumbs-down on the duplicate comment * * Required environment variables: - * GITHUB_TOKEN – GitHub Actions token (or PAT with repo:issues write access) - * REPO_OWNER – Repository owner (e.g. VectifyAI) - * REPO_NAME – Repository name (e.g. PageIndex) + * GITHUB_TOKEN - GitHub Actions token + * REPO_OWNER - Repository owner + * REPO_NAME - Repository name * - * Optional environment variables: - * INACTIVITY_DAYS – Days of inactivity before closing (default: 7) - * DRY_RUN – If "true", report but do not close issues (default: false) + * Optional: + * DRY_RUN - If "true", report but do not close (default: false) */ 'use strict'; const https = require('https'); -// ── Configuration ───────────────────────────────────────────────────────────── +const GITHUB_TOKEN = process.env.GITHUB_TOKEN; +const REPO_OWNER = process.env.REPO_OWNER; +const REPO_NAME = process.env.REPO_NAME; +const DRY_RUN = process.env.DRY_RUN === 'true'; -const GITHUB_TOKEN = process.env.GITHUB_TOKEN; -const REPO_OWNER = process.env.REPO_OWNER; -const REPO_NAME = process.env.REPO_NAME; -const INACTIVITY_DAYS = parseInt(process.env.INACTIVITY_DAYS || '7', 10); -const DRY_RUN = process.env.DRY_RUN === 'true'; +const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000; -// ── HTTP helper ─────────────────────────────────────────────────────────────── - -function githubRequest(method, path, body = null) { +function githubRequest(method, path, body = null, retried = false) { return new Promise((resolve, reject) => { const payload = body ? JSON.stringify(body) : null; const options = { @@ -37,29 +35,31 @@ function githubRequest(method, path, body = null) { method, headers: { 'Authorization': `Bearer ${GITHUB_TOKEN}`, - 'Accept': 'application/vnd.github+json', - 'User-Agent': 'PageIndex-Autoclose-Script/1.0', + 'Accept': 'application/vnd.github+json', + 'User-Agent': 'PageIndex-Autoclose/1.0', 'X-GitHub-Api-Version': '2022-11-28', - ...(payload ? { - 'Content-Type': 'application/json', - 'Content-Length': Buffer.byteLength(payload), - } : {}), + ...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}), }, }; const req = https.request(options, (res) => { let data = ''; res.on('data', chunk => (data += chunk)); - res.on('end', () => { - if (res.statusCode >= 400) { - reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`)); + res.on('end', async () => { + if ((res.statusCode === 403 || res.statusCode === 429) && !retried) { + const retryAfter = parseInt(res.headers['retry-after'] || '60', 10); + console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`); + await sleep(retryAfter * 1000); + try { resolve(await githubRequest(method, path, body, true)); } + catch (err) { reject(err); } return; } - try { - resolve(data ? JSON.parse(data) : {}); - } catch { - resolve({}); + if (res.statusCode >= 400) { + reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`)); + return; } + try { resolve(data ? JSON.parse(data) : {}); } + catch { resolve({}); } }); }); req.on('error', reject); @@ -68,113 +68,162 @@ function githubRequest(method, path, body = null) { }); } -/** Simple sleep helper for rate-limiting. */ const sleep = (ms) => new Promise(r => setTimeout(r, ms)); -// ── Core logic ──────────────────────────────────────────────────────────────── - /** - * Fetches all open issues with the "autoclose" label, paginating as needed. + * Fetches open issues with the "duplicate" label, paginating as needed. + * Only returns issues created more than 3 days ago. */ -async function fetchAutocloseIssues() { +async function fetchDuplicateIssues() { const issues = []; let page = 1; while (true) { const data = await githubRequest( 'GET', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=autoclose&per_page=100&page=${page}` + `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=duplicate&per_page=100&page=${page}` ); if (!Array.isArray(data) || data.length === 0) break; - // Filter out any pull requests that may surface issues.push(...data.filter(i => !i.pull_request)); if (data.length < 100) break; page++; } - return issues; + + const cutoff = new Date(Date.now() - THREE_DAYS_MS); + return issues.filter(i => new Date(i.created_at) < cutoff); } /** - * Closes a single issue with a polite explanatory comment. + * Finds the bot's duplicate comment on an issue (contains "possible duplicate"). */ -async function closeIssue(issueNumber, inactivityDays) { - const body = - `This issue has been automatically closed because it was marked as a **duplicate** ` + - `and has had no new activity for ${inactivityDays} day(s).\n\n` + - `If you believe this was closed in error, please reopen the issue and leave a comment. ` + - `New human activity will prevent automatic closure in the future.\n\n` + - `Thank you for your contribution! 🙏`; +function findDuplicateComment(comments) { + return comments.find(c => + (c.user.type === 'Bot' || c.user.login === 'github-actions[bot]') && + c.body.includes('possible duplicate') + ); +} + +/** + * Checks if there are human comments after the duplicate comment. + */ +function hasHumanCommentAfter(comments, afterDate) { + return comments.some(c => { + if (c.user.type === 'Bot' || c.user.login.endsWith('[bot]') || c.user.login === 'github-actions') { + return false; + } + return new Date(c.created_at) > afterDate; + }); +} + +/** + * Checks if the duplicate comment has a thumbs-down reaction. + */ +async function hasThumbsDownReaction(commentId) { + const reactions = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/comments/${commentId}/reactions` + ); + return Array.isArray(reactions) && reactions.some(r => r.content === '-1'); +} + +/** + * Closes an issue as duplicate with a comment. + */ +async function closeAsDuplicate(issueNumber) { + const body = + 'This issue has been automatically closed as a duplicate. ' + + 'No human activity or objection was received within the 3-day grace period.\n\n' + + 'If you believe this was closed in error, please reopen the issue and leave a comment.'; - // Post closing comment first await githubRequest( 'POST', `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`, { body } ); - // Close the issue await githubRequest( 'PATCH', `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`, - { state: 'closed', state_reason: 'not_planned' } + { state: 'closed', state_reason: 'completed' } + ); + + await githubRequest( + 'POST', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`, + { labels: ['duplicate'] } ); } -// ── Entry point ─────────────────────────────────────────────────────────────── +async function processIssue(issue) { + const num = issue.number; + console.log(`\nChecking issue #${num}: ${issue.title}`); + + const comments = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${num}/comments?per_page=100` + ); + + if (!Array.isArray(comments)) { + console.log(` -> Could not fetch comments, skipping.`); + return false; + } + + const dupeComment = findDuplicateComment(comments); + if (!dupeComment) { + console.log(` -> No duplicate comment found, skipping.`); + return false; + } + + const commentDate = new Date(dupeComment.created_at); + const ageMs = Date.now() - commentDate.getTime(); + + if (ageMs < THREE_DAYS_MS) { + const daysLeft = Math.ceil((THREE_DAYS_MS - ageMs) / (24 * 60 * 60 * 1000)); + console.log(` -> Duplicate comment is less than 3 days old (${daysLeft}d remaining), skipping.`); + return false; + } + + if (hasHumanCommentAfter(comments, commentDate)) { + console.log(` -> Human commented after duplicate comment, skipping.`); + return false; + } + + if (await hasThumbsDownReaction(dupeComment.id)) { + console.log(` -> Author reacted with thumbs-down, skipping.`); + return false; + } + + if (DRY_RUN) { + console.log(` [DRY RUN] Would close issue #${num}`); + return true; + } + + await closeAsDuplicate(num); + console.log(` -> Closed issue #${num} as duplicate`); + return true; +} async function main() { - // Validate required env vars - const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'] - .filter(k => !process.env[k]); + const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'].filter(k => !process.env[k]); if (missing.length) { console.error(`Missing required environment variables: ${missing.join(', ')}`); process.exit(1); } - const cutoff = new Date(Date.now() - INACTIVITY_DAYS * 24 * 60 * 60 * 1000); + console.log('Auto-close duplicate issues'); + console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); + console.log(` Dry run: ${DRY_RUN}`); - console.log(`Auto-close inactive labelled issues`); - console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); - console.log(` Inactivity days: ${INACTIVITY_DAYS} (cutoff: ${cutoff.toISOString()})`); - console.log(` Dry run: ${DRY_RUN}`); - - const issues = await fetchAutocloseIssues(); - console.log(`\nFound ${issues.length} open issue(s) with "autoclose" label.`); + const issues = await fetchDuplicateIssues(); + console.log(`\nFound ${issues.length} duplicate-labeled issue(s) older than 3 days.`); let closedCount = 0; - let skippedCount = 0; - for (const issue of issues) { - const lastActivity = new Date(issue.updated_at); - const inactive = lastActivity < cutoff; - const daysSince = Math.floor((Date.now() - lastActivity.getTime()) / (1000 * 60 * 60 * 24)); - - if (!inactive) { - console.log(` #${issue.number} — active ${daysSince}d ago, skipping.`); - skippedCount++; - continue; - } - - console.log(` #${issue.number} — inactive for ${daysSince}d: "${issue.title}"`); - - if (DRY_RUN) { - console.log(` [DRY RUN] Would close issue #${issue.number}`); - closedCount++; - continue; - } - - try { - await closeIssue(issue.number, INACTIVITY_DAYS); - console.log(` ✅ Closed issue #${issue.number}`); - closedCount++; - } catch (err) { - console.error(` ❌ Failed to close #${issue.number}: ${err.message}`); - } - - // Respect GitHub's secondary rate limit + const closed = await processIssue(issue); + if (closed) closedCount++; await sleep(1000); } - console.log(`\nSummary: ${closedCount} closed, ${skippedCount} still active.`); + console.log(`\nSummary: ${closedCount} issue(s) closed.`); } main().catch(err => { diff --git a/scripts/backfill-dedupe.js b/scripts/backfill-dedupe.js deleted file mode 100644 index ade1039..0000000 --- a/scripts/backfill-dedupe.js +++ /dev/null @@ -1,370 +0,0 @@ -/** - * scripts/backfill-dedupe.js - * - * Backfills duplicate detection for historical issues. - * Fetches issues created within the last DAYS_BACK days, searches for - * candidate duplicates via the GitHub Search API, and asks the Anthropic - * API to determine whether each issue is a duplicate. - * - * Required environment variables: - * GITHUB_TOKEN – GitHub Actions token (or PAT with repo access) - * ANTHROPIC_API_KEY – Anthropic API key (mapped from AUTHROPIC_API_KEY secret) - * REPO_OWNER – Repository owner (e.g. VectifyAI) - * REPO_NAME – Repository name (e.g. PageIndex) - * - * Optional environment variables: - * DAYS_BACK – How many days back to process (default: 30) - * DRY_RUN – If "true", analyse but do not write to GitHub (default: false) - */ - -'use strict'; - -const https = require('https'); - -// ── Configuration ───────────────────────────────────────────────────────────── - -const GITHUB_TOKEN = process.env.GITHUB_TOKEN; -const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; -const REPO_OWNER = process.env.REPO_OWNER; -const REPO_NAME = process.env.REPO_NAME; -const DAYS_BACK = parseInt(process.env.DAYS_BACK || '30', 10); -const DRY_RUN = process.env.DRY_RUN === 'true'; - -const STOP_WORDS = new Set([ - 'a','an','the','is','in','on','at','to','for','of','and','or','but','not', - 'with','this','that','it','be','are','was','has','have','does','do','how', - 'why','when','where','what','which','who','will','can','could','should', - 'would','may','might','must','get','got','use','using','used','error', - 'issue','bug','feature','request','problem','question','please','just', - 'after','before','during','about','from','into','also','then','than', -]); - -// ── HTTP helpers ────────────────────────────────────────────────────────────── - -/** - * Makes an authenticated GitHub REST API request. - * @param {string} method HTTP method - * @param {string} path API path (e.g. '/repos/owner/repo/issues') - * @param {object|null} body Request body (will be JSON-encoded) - * @returns {Promise} - */ -function githubRequest(method, path, body = null) { - return new Promise((resolve, reject) => { - const payload = body ? JSON.stringify(body) : null; - const options = { - hostname: 'api.github.com', - path, - method, - headers: { - 'Authorization': `Bearer ${GITHUB_TOKEN}`, - 'Accept': 'application/vnd.github+json', - 'User-Agent': 'PageIndex-Backfill-Script/1.0', - 'X-GitHub-Api-Version': '2022-11-28', - ...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}), - }, - }; - - const req = https.request(options, (res) => { - let data = ''; - res.on('data', chunk => (data += chunk)); - res.on('end', () => { - if (res.statusCode >= 400) { - reject(new Error(`GitHub API ${method} ${path} → ${res.statusCode}: ${data}`)); - return; - } - try { - resolve(data ? JSON.parse(data) : {}); - } catch { - resolve({}); - } - }); - }); - req.on('error', reject); - if (payload) req.write(payload); - req.end(); - }); -} - -/** - * Calls the Anthropic Messages API and returns Claude's text response. - * @param {string} prompt User prompt - * @returns {Promise} - */ -function callClaude(prompt) { - return new Promise((resolve, reject) => { - const body = JSON.stringify({ - model: 'claude-haiku-4-5', - max_tokens: 1024, - messages: [{ role: 'user', content: prompt }], - }); - - const options = { - hostname: 'api.anthropic.com', - path: '/v1/messages', - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Content-Length': Buffer.byteLength(body), - 'x-api-key': ANTHROPIC_API_KEY, - 'anthropic-version': '2023-06-01', - }, - }; - - const req = https.request(options, (res) => { - let data = ''; - res.on('data', chunk => (data += chunk)); - res.on('end', () => { - try { - const parsed = JSON.parse(data); - if (parsed.error) { - reject(new Error(`Anthropic API error: ${parsed.error.message}`)); - return; - } - const text = (parsed.content || []) - .filter(b => b.type === 'text') - .map(b => b.text) - .join(''); - resolve(text); - } catch (err) { - reject(new Error(`Failed to parse Anthropic response: ${err.message}`)); - } - }); - }); - req.on('error', reject); - req.write(body); - req.end(); - }); -} - -/** Simple sleep helper for rate-limiting. */ -const sleep = (ms) => new Promise(r => setTimeout(r, ms)); - -// ── Core logic ──────────────────────────────────────────────────────────────── - -/** - * Fetches open issues created since `since` (ISO 8601 string), paginating as needed. - */ -async function fetchIssuesSince(since) { - const issues = []; - let page = 1; - while (true) { - const data = await githubRequest( - 'GET', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&sort=created&direction=desc&since=${since}&per_page=100&page=${page}` - ); - if (!Array.isArray(data) || data.length === 0) break; - // Filter out pull requests - issues.push(...data.filter(i => !i.pull_request)); - if (data.length < 100) break; - page++; - } - return issues; -} - -/** - * Searches for up to 10 candidate duplicate issues for the given issue. - */ -async function findCandidates(issue) { - const keywords = (issue.title || '') - .toLowerCase() - .replace(/[^a-z0-9\s]/g, ' ') - .split(/\s+/) - .filter(w => w.length > 2 && !STOP_WORDS.has(w)) - .slice(0, 6) - .join(' '); - - if (!keywords) return []; - - const q = encodeURIComponent( - `repo:${REPO_OWNER}/${REPO_NAME} is:issue state:open ${keywords}` - ); - - const data = await githubRequest('GET', `/search/issues?q=${q}&per_page=15`); - return (data.items || []) - .filter(item => item.number !== issue.number && !item.pull_request) - .slice(0, 10); -} - -/** - * Builds the duplicate-detection prompt for Claude. - */ -function buildPrompt(issue, candidates) { - const candidatesText = candidates - .map(c => `#${c.number}: ${c.title}\nURL: ${c.html_url}\n${(c.body || '').substring(0, 500)}`) - .join('\n---\n'); - - return `You are a GitHub issue triage assistant. - -Analyze whether the following open issue is a duplicate of any of the candidate issues listed below. - -== NEW ISSUE #${issue.number} == -Title: ${issue.title} -Body: -${(issue.body || '(no body)').substring(0, 3000)} - -== CANDIDATE ISSUES (up to 10) == -${candidatesText} - -RULES: -- Only flag as a duplicate if you are at least 85% confident. -- A minor difference in wording does NOT make an issue non-duplicate if they describe the same underlying problem or feature request. - -Respond with ONLY a JSON object (no markdown, no other text): -{ - "is_duplicate": true or false, - "duplicate_issues": [array of integer issue numbers that this is a duplicate of, empty if none], - "explanation": "one or two sentences explaining your reasoning" -}`; -} - -/** - * Parses Claude's JSON response robustly. - * Returns { is_duplicate, duplicate_issues, explanation } or null on failure. - */ -function parseClaudeResponse(text) { - // Try to extract a JSON object from the response - const jsonMatch = text.match(/\{[\s\S]*\}/); - if (!jsonMatch) return null; - try { - const parsed = JSON.parse(jsonMatch[0]); - return { - is_duplicate: Boolean(parsed.is_duplicate), - duplicate_issues: Array.isArray(parsed.duplicate_issues) ? parsed.duplicate_issues.map(Number) : [], - explanation: String(parsed.explanation || ''), - }; - } catch { - return null; - } -} - -/** - * Posts a duplicate-found comment on the issue. - */ -async function postDuplicateComment(issueNumber, duplicateIssueNumbers, explanation) { - const links = duplicateIssueNumbers - .map(n => `- #${n}`) - .join('\n'); - - const body = - `👋 Thank you for taking the time to open this issue!\n\n` + - `After automated analysis, this issue appears to be a duplicate of:\n\n` + - `${links}\n\n` + - `${explanation}\n\n` + - `Please subscribe to the original issue(s) above to follow updates. ` + - `This issue will be automatically closed after a short inactivity period.\n\n` + - ``; - - await githubRequest( - 'POST', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`, - { body } - ); -} - -/** - * Adds labels to an issue, creating them if they do not exist. - */ -async function ensureLabelAndApply(issueNumber, labelNames) { - const knownLabels = { - duplicate: { color: 'cfd3d7', description: 'This issue or pull request already exists' }, - autoclose: { color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' }, - }; - - for (const name of labelNames) { - try { - await githubRequest('GET', `/repos/${REPO_OWNER}/${REPO_NAME}/labels/${encodeURIComponent(name)}`); - } catch { - const meta = knownLabels[name] || { color: 'ededed', description: '' }; - await githubRequest('POST', `/repos/${REPO_OWNER}/${REPO_NAME}/labels`, { name, ...meta }); - } - } - - await githubRequest( - 'POST', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`, - { labels: labelNames } - ); -} - -/** - * Processes a single issue: finds candidates, asks Claude, and acts on the result. - */ -async function processIssue(issue) { - const num = issue.number; - console.log(`\nProcessing issue #${num}: ${issue.title}`); - - // Skip already-labelled issues - const existingLabels = (issue.labels || []).map(l => l.name); - if (existingLabels.includes('duplicate')) { - console.log(` → Already labelled as duplicate, skipping.`); - return; - } - - const candidates = await findCandidates(issue); - if (candidates.length === 0) { - console.log(` → No candidates found, skipping.`); - return; - } - console.log(` → Found ${candidates.length} candidate(s): ${candidates.map(c => `#${c.number}`).join(', ')}`); - - const prompt = buildPrompt(issue, candidates); - const rawReply = await callClaude(prompt); - const result = parseClaudeResponse(rawReply); - - if (!result) { - console.warn(` ⚠️ Could not parse Claude response for #${num}. Raw:\n${rawReply.substring(0, 300)}`); - return; - } - - console.log(` → is_duplicate=${result.is_duplicate}, issues=${JSON.stringify(result.duplicate_issues)}`); - console.log(` ${result.explanation}`); - - if (!result.is_duplicate || result.duplicate_issues.length === 0) { - console.log(` → Not a duplicate.`); - return; - } - - if (DRY_RUN) { - console.log(` [DRY RUN] Would post comment and apply labels to #${num}`); - return; - } - - await postDuplicateComment(num, result.duplicate_issues, result.explanation); - await ensureLabelAndApply(num, ['duplicate', 'autoclose']); - console.log(` ✅ Commented and labelled #${num}`); -} - -// ── Entry point ─────────────────────────────────────────────────────────────── - -async function main() { - // Validate required env vars - const missing = ['GITHUB_TOKEN', 'ANTHROPIC_API_KEY', 'REPO_OWNER', 'REPO_NAME'] - .filter(k => !process.env[k]); - if (missing.length) { - console.error(`Missing required environment variables: ${missing.join(', ')}`); - process.exit(1); - } - - const since = new Date(Date.now() - DAYS_BACK * 24 * 60 * 60 * 1000).toISOString(); - - console.log(`Backfilling duplicate detection`); - console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); - console.log(` Days back: ${DAYS_BACK} (since ${since})`); - console.log(` Dry run: ${DRY_RUN}`); - - const issues = await fetchIssuesSince(since); - console.log(`\nFetched ${issues.length} open issue(s) to process.`); - - for (const issue of issues) { - await processIssue(issue); - // Respect GitHub and Anthropic rate limits - await sleep(2500); - } - - console.log('\nBackfill complete.'); -} - -main().catch(err => { - console.error('Fatal error:', err.message); - process.exit(1); -}); diff --git a/scripts/comment-on-duplicates.sh b/scripts/comment-on-duplicates.sh new file mode 100755 index 0000000..6f3ff36 --- /dev/null +++ b/scripts/comment-on-duplicates.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# +# comment-on-duplicates.sh - Posts a duplicate issue comment with auto-close warning. +# +# Usage: +# ./scripts/comment-on-duplicates.sh --base-issue 123 --potential-duplicates 456 789 +# +set -euo pipefail + +REPO="${GITHUB_REPOSITORY:-}" +if [ -z "$REPO" ]; then + echo "Error: GITHUB_REPOSITORY is not set" >&2 + exit 1 +fi + +BASE_ISSUE="" +DUPLICATES=() + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --base-issue) + BASE_ISSUE="$2" + shift 2 + ;; + --potential-duplicates) + shift + while [[ $# -gt 0 && ! "$1" =~ ^-- ]]; do + DUPLICATES+=("$1") + shift + done + ;; + *) + echo "Error: Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +# Validate inputs +if [ -z "$BASE_ISSUE" ]; then + echo "Error: --base-issue is required" >&2 + exit 1 +fi + +if ! [[ "$BASE_ISSUE" =~ ^[0-9]+$ ]]; then + echo "Error: --base-issue must be a number, got: $BASE_ISSUE" >&2 + exit 1 +fi + +if [ ${#DUPLICATES[@]} -eq 0 ]; then + echo "Error: --potential-duplicates requires at least one issue number" >&2 + exit 1 +fi + +for dup in "${DUPLICATES[@]}"; do + if ! [[ "$dup" =~ ^[0-9]+$ ]]; then + echo "Error: duplicate issue must be a number, got: $dup" >&2 + exit 1 + fi +done + +# Limit to 3 duplicates max +if [ ${#DUPLICATES[@]} -gt 3 ]; then + echo "Warning: Limiting to first 3 duplicates" >&2 + DUPLICATES=("${DUPLICATES[@]:0:3}") +fi + +# Validate that the base issue exists and is open +if ! gh issue view "$BASE_ISSUE" --repo "$REPO" --json state -q '.state' | grep -qi 'open'; then + echo "Error: Issue #$BASE_ISSUE is not open or does not exist" >&2 + exit 1 +fi + +# Build the duplicate links list +LINKS="" +COUNT=0 +for dup in "${DUPLICATES[@]}"; do + # Validate duplicate issue exists + if gh issue view "$dup" --repo "$REPO" --json number -q '.number' > /dev/null 2>&1; then + COUNT=$((COUNT + 1)) + LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup} +" + else + echo "Warning: Issue #$dup does not exist, skipping" >&2 + fi +done + +if [ "$COUNT" -eq 0 ]; then + echo "Error: None of the specified duplicate issues exist" >&2 + exit 1 +fi + +# Build and post the comment +COMMENT="Found ${COUNT} possible duplicate issue(s): + +${LINKS} +This issue will be automatically closed as a duplicate in 3 days. +- To prevent auto-closure, add a comment or react with :thumbsdown: on this comment." + +gh issue comment "$BASE_ISSUE" --repo "$REPO" --body "$COMMENT" + +# Add the duplicate label +gh issue edit "$BASE_ISSUE" --repo "$REPO" --add-label "duplicate" + +echo "Posted duplicate comment on issue #$BASE_ISSUE with $COUNT potential duplicate(s)" From 7df8510bde1ab0ac3a28fe23c0c18fad7c8a112e Mon Sep 17 00:00:00 2001 From: BukeLy Date: Mon, 2 Mar 2026 17:23:33 +0800 Subject: [PATCH 4/6] Simplify scripts: unify bot detection, remove redundant API calls and TOCTOU checks --- .claude/commands/dedupe.md | 2 +- README.md | 65 ----------------------------- scripts/autoclose-labeled-issues.js | 17 +++----- scripts/comment-on-duplicates.sh | 26 ++---------- 4 files changed, 11 insertions(+), 99 deletions(-) diff --git a/.claude/commands/dedupe.md b/.claude/commands/dedupe.md index d649bb1..8a07908 100644 --- a/.claude/commands/dedupe.md +++ b/.claude/commands/dedupe.md @@ -41,7 +41,7 @@ Launch 5 parallel searches using different keyword strategies to maximize covera For each search, use: ``` -gh search issues "" --repo $REPOSITORY --limit 20 +gh search issues " state:open" --repo $REPOSITORY --limit 20 ``` ### 4. Analyze candidates diff --git a/README.md b/README.md index 131cded..7180efd 100644 --- a/README.md +++ b/README.md @@ -267,69 +267,4 @@ Leave us a star 🌟 if you like our project. Thank you! --- -## 🤖 GitHub Automation - -This repository uses automated GitHub Actions workflows to keep the issue tracker tidy. - -### Overview - -| Workflow | Trigger | Purpose | -|---|---|---| -| `issue-dedupe.yml` | Issue opened · `workflow_dispatch` | Detects duplicate issues using Claude and labels them | -| `backfill-dedupe.yml` | `workflow_dispatch` | Runs duplicate detection over historical issues | -| `autoclose-labeled-issues.yml` | Daily schedule · `workflow_dispatch` | Closes issues labelled `autoclose` after N days of inactivity | -| `remove-autoclose-label.yml` | Issue comment created | Removes the `autoclose` label when a human posts a new comment | - -### Required Secrets - -Add the following secret to the repository (**Settings → Secrets and variables → Actions**): - -| Secret | Description | -|---|---| -| `AUTHROPIC_API_KEY` | Your Anthropic API key (used by `anthropics/claude-code-action`) | - -`GITHUB_TOKEN` is provided automatically by GitHub Actions and does not need to be added manually. - -### Labels - -The workflows create the following labels automatically if they do not exist: - -| Label | Description | -|---|---| -| `duplicate` | Marks issues identified as duplicates | -| `autoclose` | Marks issues that will be automatically closed after inactivity | - -### Running the Backfill - -To scan historical issues for duplicates, trigger the **Backfill Duplicate Detection** workflow manually from the **Actions** tab: - -- **`days_back`** (default `30`) — how many days into the past to scan -- **`dry_run`** (default `false`) — set to `true` to preview results without modifying issues - -``` -Actions → Backfill Duplicate Detection → Run workflow -``` - -### Changing the Inactivity Threshold - -The default inactivity period before an `autoclose`-labelled issue is closed is **7 days**. - -To change it for a one-off run, trigger **Auto-close Inactive Labeled Issues** with the `inactivity_days` input. - -To change the default permanently, edit the `INACTIVITY_DAYS` env variable default in `.github/workflows/autoclose-labeled-issues.yml`: - -```yaml -INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} # ← change '7' here -``` - -### How Duplicate Detection Works - -1. When a new issue is opened, keywords from the title are used to search for the top 10 most relevant existing open issues via the GitHub Search API. -2. The issue title, body, and candidate list are passed to **Claude** (`anthropics/claude-code-action`) with a structured prompt. -3. Claude posts a comment on the issue (if it is highly confident it is a duplicate), including links to the original issue(s) and a brief explanation. -4. A follow-up step reads the comment, extracts the machine-readable result, and applies the `duplicate` and `autoclose` labels. -5. If Claude is not confident, no comment or labels are applied. - ---- - © 2025 [Vectify AI](https://vectify.ai) diff --git a/scripts/autoclose-labeled-issues.js b/scripts/autoclose-labeled-issues.js index e3c07f8..615d146 100644 --- a/scripts/autoclose-labeled-issues.js +++ b/scripts/autoclose-labeled-issues.js @@ -92,13 +92,16 @@ async function fetchDuplicateIssues() { return issues.filter(i => new Date(i.created_at) < cutoff); } +function isBot(user) { + return user.type === 'Bot' || user.login.endsWith('[bot]') || user.login === 'github-actions'; +} + /** * Finds the bot's duplicate comment on an issue (contains "possible duplicate"). */ function findDuplicateComment(comments) { return comments.find(c => - (c.user.type === 'Bot' || c.user.login === 'github-actions[bot]') && - c.body.includes('possible duplicate') + isBot(c.user) && c.body.includes('possible duplicate') ); } @@ -107,9 +110,7 @@ function findDuplicateComment(comments) { */ function hasHumanCommentAfter(comments, afterDate) { return comments.some(c => { - if (c.user.type === 'Bot' || c.user.login.endsWith('[bot]') || c.user.login === 'github-actions') { - return false; - } + if (isBot(c.user)) return false; return new Date(c.created_at) > afterDate; }); } @@ -145,12 +146,6 @@ async function closeAsDuplicate(issueNumber) { `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`, { state: 'closed', state_reason: 'completed' } ); - - await githubRequest( - 'POST', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`, - { labels: ['duplicate'] } - ); } async function processIssue(issue) { diff --git a/scripts/comment-on-duplicates.sh b/scripts/comment-on-duplicates.sh index 6f3ff36..05c93d9 100755 --- a/scripts/comment-on-duplicates.sh +++ b/scripts/comment-on-duplicates.sh @@ -66,32 +66,16 @@ if [ ${#DUPLICATES[@]} -gt 3 ]; then DUPLICATES=("${DUPLICATES[@]:0:3}") fi -# Validate that the base issue exists and is open -if ! gh issue view "$BASE_ISSUE" --repo "$REPO" --json state -q '.state' | grep -qi 'open'; then - echo "Error: Issue #$BASE_ISSUE is not open or does not exist" >&2 - exit 1 -fi - # Build the duplicate links list -LINKS="" COUNT=0 +LINKS="" for dup in "${DUPLICATES[@]}"; do - # Validate duplicate issue exists - if gh issue view "$dup" --repo "$REPO" --json number -q '.number' > /dev/null 2>&1; then - COUNT=$((COUNT + 1)) - LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup} + COUNT=$((COUNT + 1)) + LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup} " - else - echo "Warning: Issue #$dup does not exist, skipping" >&2 - fi done -if [ "$COUNT" -eq 0 ]; then - echo "Error: None of the specified duplicate issues exist" >&2 - exit 1 -fi - -# Build and post the comment +# Build and post the comment — if the issue is closed or doesn't exist, gh will error out COMMENT="Found ${COUNT} possible duplicate issue(s): ${LINKS} @@ -99,8 +83,6 @@ This issue will be automatically closed as a duplicate in 3 days. - To prevent auto-closure, add a comment or react with :thumbsdown: on this comment." gh issue comment "$BASE_ISSUE" --repo "$REPO" --body "$COMMENT" - -# Add the duplicate label gh issue edit "$BASE_ISSUE" --repo "$REPO" --add-label "duplicate" echo "Posted duplicate comment on issue #$BASE_ISSUE with $COUNT potential duplicate(s)" From 5fa180744da22f09e32b754065a4cd13a5e618e3 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Mon, 2 Mar 2026 17:45:57 +0800 Subject: [PATCH 5/6] Fix issues from Copilot review: 403 retry, comments pagination, backfill pagination - Only retry 403 when rate-limit headers indicate throttling, not permission errors - Add fetchAllComments() with pagination for issues with 100+ comments - Add pagination loop in backfill workflow to handle repos with 200+ open issues --- .github/workflows/backfill-dedupe.yml | 16 ++++++++-- scripts/autoclose-labeled-issues.js | 43 +++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/.github/workflows/backfill-dedupe.yml b/.github/workflows/backfill-dedupe.yml index 72c49e9..10060f0 100644 --- a/.github/workflows/backfill-dedupe.yml +++ b/.github/workflows/backfill-dedupe.yml @@ -37,9 +37,19 @@ jobs: SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ) echo "Fetching open issues since $SINCE" - # Get open issues, filter out PRs and already-labeled ones - ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \ - --jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number") + # Get open issues with pagination, filter out PRs and already-labeled ones + ISSUES="" + PAGE=1 + while true; do + BATCH=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number,labels,createdAt \ + --jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number") + + [ -z "$BATCH" ] && break + ISSUES="$ISSUES $BATCH" + [ $(echo "$BATCH" | wc -w) -lt 100 ] && break + PAGE=$((PAGE + 1)) + done + ISSUES=$(echo "$ISSUES" | xargs) if [ -z "$ISSUES" ]; then echo "No issues to process" diff --git a/scripts/autoclose-labeled-issues.js b/scripts/autoclose-labeled-issues.js index 615d146..3628410 100644 --- a/scripts/autoclose-labeled-issues.js +++ b/scripts/autoclose-labeled-issues.js @@ -46,7 +46,8 @@ function githubRequest(method, path, body = null, retried = false) { let data = ''; res.on('data', chunk => (data += chunk)); res.on('end', async () => { - if ((res.statusCode === 403 || res.statusCode === 429) && !retried) { + // 429: 始终重试(rate limit) + if (res.statusCode === 429 && !retried) { const retryAfter = parseInt(res.headers['retry-after'] || '60', 10); console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`); await sleep(retryAfter * 1000); @@ -54,6 +55,19 @@ function githubRequest(method, path, body = null, retried = false) { catch (err) { reject(err); } return; } + // 403: 只在 rate limit 相关时重试 + if (res.statusCode === 403 && !retried) { + const rateLimitRemaining = res.headers['x-ratelimit-remaining']; + const hasRetryAfter = res.headers['retry-after']; + if (rateLimitRemaining === '0' || hasRetryAfter) { + const retryAfter = parseInt(hasRetryAfter || '60', 10); + console.log(` Rate limited (403) on ${method} ${path}, retrying after ${retryAfter}s...`); + await sleep(retryAfter * 1000); + try { resolve(await githubRequest(method, path, body, true)); } + catch (err) { reject(err); } + return; + } + } if (res.statusCode >= 400) { reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`)); return; @@ -115,6 +129,26 @@ function hasHumanCommentAfter(comments, afterDate) { }); } +/** + * Fetches all comments for an issue, handling pagination. + * Requests per_page=100 and loops until we get fewer than 100 or an empty array. + */ +async function fetchAllComments(issueNumber) { + const allComments = []; + let page = 1; + while (true) { + const comments = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments?per_page=100&page=${page}` + ); + if (!Array.isArray(comments) || comments.length === 0) break; + allComments.push(...comments); + if (comments.length < 100) break; + page++; + } + return allComments; +} + /** * Checks if the duplicate comment has a thumbs-down reaction. */ @@ -152,12 +186,9 @@ async function processIssue(issue) { const num = issue.number; console.log(`\nChecking issue #${num}: ${issue.title}`); - const comments = await githubRequest( - 'GET', - `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${num}/comments?per_page=100` - ); + const comments = await fetchAllComments(num); - if (!Array.isArray(comments)) { + if (!Array.isArray(comments) || comments.length === 0) { console.log(` -> Could not fetch comments, skipping.`); return false; } From e388e1b8b310f1c5e17c5dd6985ba6d98ca83434 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Mon, 2 Mar 2026 18:01:34 +0800 Subject: [PATCH 6/6] Fix backfill pagination: use raw count instead of filtered count The pagination loop was breaking early because it checked the count of jq-filtered results rather than the raw API response count. --- .github/workflows/backfill-dedupe.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/backfill-dedupe.yml b/.github/workflows/backfill-dedupe.yml index 10060f0..0c47b4b 100644 --- a/.github/workflows/backfill-dedupe.yml +++ b/.github/workflows/backfill-dedupe.yml @@ -41,12 +41,12 @@ jobs: ISSUES="" PAGE=1 while true; do + RAW_COUNT=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number | jq 'length') BATCH=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number,labels,createdAt \ --jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number") - [ -z "$BATCH" ] && break - ISSUES="$ISSUES $BATCH" - [ $(echo "$BATCH" | wc -w) -lt 100 ] && break + [ -n "$BATCH" ] && ISSUES="$ISSUES $BATCH" + [ "$RAW_COUNT" -lt 100 ] && break PAGE=$((PAGE + 1)) done ISSUES=$(echo "$ISSUES" | xargs)