diff --git a/.claude/commands/dedupe.md b/.claude/commands/dedupe.md new file mode 100644 index 0000000..8a07908 --- /dev/null +++ b/.claude/commands/dedupe.md @@ -0,0 +1,69 @@ +--- +allowed-tools: + - Bash(gh:*) + - Bash(./scripts/comment-on-duplicates.sh:*) +--- + +You are a GitHub issue deduplication assistant. Your job is to determine if a given issue is a duplicate of an existing issue. + +## Input + +The issue to check: $ARGUMENTS + +## Steps + +### 1. Pre-checks + +First, check if the issue should be skipped: + +``` +gh issue view --json state,labels,title,body,comments +``` + +Skip if: +- The issue is already closed +- The issue already has a `duplicate` label +- The issue already has a dedupe comment (check comments for "possible duplicate") + +### 2. Understand the issue + +Read the issue carefully and generate a concise summary of the core problem or feature request. Extract 3-5 key technical terms or concepts. + +### 3. Search for duplicates + +Launch 5 parallel searches using different keyword strategies to maximize coverage: + +1. **Exact terms**: Use the most specific technical terms from the issue title +2. **Synonyms**: Use alternative phrasings for the core problem +3. **Error messages**: If the issue contains error messages, search for those +4. **Component names**: Search by the specific component/module mentioned +5. **Broad category**: Search by the general category of the issue + +For each search, use: +``` +gh search issues " state:open" --repo $REPOSITORY --limit 20 +``` + +### 4. Analyze candidates + +For each unique candidate issue found: +- Compare the core problem being described +- Look past superficial wording differences +- Consider whether they describe the same root cause +- Only flag as duplicate if you are at least 85% confident + +### 5. Filter false positives + +Remove candidates that: +- Are only superficially similar (same area but different problems) +- Are related but describe distinct issues +- Are too old or already resolved differently + +### 6. Report results + +If you found duplicates (max 3), call: +``` +./scripts/comment-on-duplicates.sh --base-issue --potential-duplicates ... +``` + +If no duplicates found, do nothing and report that the issue appears to be unique. diff --git a/.github/workflows/autoclose-labeled-issues.yml b/.github/workflows/autoclose-labeled-issues.yml new file mode 100644 index 0000000..8499dbd --- /dev/null +++ b/.github/workflows/autoclose-labeled-issues.yml @@ -0,0 +1,37 @@ +# Auto-closes duplicate issues after 3 days if no human activity or thumbs-down reaction. +# Runs daily at 09:00 UTC. +name: Auto-close Duplicate Issues + +on: + schedule: + - cron: '0 9 * * *' + workflow_dispatch: + inputs: + dry_run: + description: 'Dry run - report but do not close issues' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + +permissions: + issues: write + contents: read + +jobs: + autoclose: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Close inactive duplicate issues + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_OWNER: ${{ github.repository_owner }} + REPO_NAME: ${{ github.event.repository.name }} + DRY_RUN: ${{ inputs.dry_run || 'false' }} + run: node scripts/autoclose-labeled-issues.js diff --git a/.github/workflows/backfill-dedupe.yml b/.github/workflows/backfill-dedupe.yml new file mode 100644 index 0000000..0c47b4b --- /dev/null +++ b/.github/workflows/backfill-dedupe.yml @@ -0,0 +1,74 @@ +# Backfills duplicate detection for historical issues using Claude Code. +# Triggered manually via workflow_dispatch. +name: Backfill Duplicate Detection + +on: + workflow_dispatch: + inputs: + days_back: + description: 'How many days back to look for issues (default: 30)' + required: false + default: '30' + type: number + +permissions: + contents: read + issues: write + actions: write + +jobs: + backfill: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Fetch issues and run dedupe + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + DAYS_BACK: ${{ inputs.days_back || '30' }} + run: | + if ! [[ "$DAYS_BACK" =~ ^[0-9]+$ ]]; then + echo "Error: days_back must be a number" + exit 1 + fi + + SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ) + echo "Fetching open issues since $SINCE" + + # Get open issues with pagination, filter out PRs and already-labeled ones + ISSUES="" + PAGE=1 + while true; do + RAW_COUNT=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number | jq 'length') + BATCH=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number,labels,createdAt \ + --jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number") + + [ -n "$BATCH" ] && ISSUES="$ISSUES $BATCH" + [ "$RAW_COUNT" -lt 100 ] && break + PAGE=$((PAGE + 1)) + done + ISSUES=$(echo "$ISSUES" | xargs) + + if [ -z "$ISSUES" ]; then + echo "No issues to process" + exit 0 + fi + + BATCH_SIZE=10 + COUNT=0 + echo "Issues to process: $ISSUES" + for NUMBER in $ISSUES; do + echo "Triggering dedupe for issue #$NUMBER" + gh workflow run issue-dedupe.yml --repo "$REPO" -f issue_number="$NUMBER" + COUNT=$((COUNT + 1)) + if [ $((COUNT % BATCH_SIZE)) -eq 0 ]; then + echo "Pausing 60s after $COUNT issues..." + sleep 60 + else + sleep 5 + fi + done + + echo "Backfill triggered for $COUNT issues" diff --git a/.github/workflows/issue-dedupe.yml b/.github/workflows/issue-dedupe.yml new file mode 100644 index 0000000..88981c2 --- /dev/null +++ b/.github/workflows/issue-dedupe.yml @@ -0,0 +1,56 @@ +# Detects duplicate issues using Claude Code with the /dedupe command. +# Triggered automatically when a new issue is opened, or manually for a single issue. +name: Issue Duplicate Detection + +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to check for duplicates' + required: true + type: string + +permissions: + contents: read + issues: write + +concurrency: + group: dedupe-${{ github.event.issue.number || inputs.issue_number }} + cancel-in-progress: true + +jobs: + detect-duplicate: + runs-on: ubuntu-latest + timeout-minutes: 10 + # Skip pull-requests that surface as issues and bot-opened issues + if: > + (github.event_name == 'workflow_dispatch') || + (github.event.issue.pull_request == null && + !endsWith(github.actor, '[bot]') && + github.actor != 'github-actions') + steps: + - uses: actions/checkout@v4 + + - name: Determine issue number + id: issue + env: + EVENT_NAME: ${{ github.event_name }} + INPUT_NUMBER: ${{ inputs.issue_number }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + run: | + if [ "$EVENT_NAME" = "workflow_dispatch" ]; then + echo "number=$INPUT_NUMBER" >> "$GITHUB_OUTPUT" + else + echo "number=$ISSUE_NUMBER" >> "$GITHUB_OUTPUT" + fi + + - uses: anthropics/claude-code-action@v1 + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + prompt: "/dedupe ${{ github.repository }}/issues/${{ steps.issue.outputs.number }}" + anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }} + github_token: ${{ secrets.GITHUB_TOKEN }} + claude_args: "--model claude-sonnet-4-5-20250929" diff --git a/.github/workflows/remove-autoclose-label.yml b/.github/workflows/remove-autoclose-label.yml new file mode 100644 index 0000000..5411b19 --- /dev/null +++ b/.github/workflows/remove-autoclose-label.yml @@ -0,0 +1,45 @@ +# Removes the "duplicate" label when a human (non-bot) comments on a +# duplicate-flagged issue, signaling that the issue needs re-evaluation. +# The auto-close script also independently checks for human activity, +# so this provides an additional visible signal. +name: Remove Duplicate Label on Human Activity + +on: + issue_comment: + types: [created] + +permissions: + issues: write + +jobs: + remove-label: + # Only run for issue comments (not PR comments) + if: > + github.event.issue.pull_request == null && + !endsWith(github.actor, '[bot]') && + github.actor != 'github-actions' + runs-on: ubuntu-latest + steps: + - name: Remove duplicate label if human commented + uses: actions/github-script@v7 + with: + script: | + const issue = context.payload.issue; + const labels = (issue.labels || []).map(l => l.name); + + if (!labels.includes('duplicate')) { + core.info('Issue does not have "duplicate" label - nothing to do.'); + return; + } + + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue.number, + name: 'duplicate', + }); + + core.info( + `Removed "duplicate" label from #${issue.number} ` + + `after human comment by ${context.actor}` + ); diff --git a/scripts/autoclose-labeled-issues.js b/scripts/autoclose-labeled-issues.js new file mode 100644 index 0000000..3628410 --- /dev/null +++ b/scripts/autoclose-labeled-issues.js @@ -0,0 +1,258 @@ +/** + * scripts/autoclose-labeled-issues.js + * + * Auto-closes issues that have a bot "possible duplicate" comment older than + * 3 days, unless: + * - A human has commented after the bot's duplicate comment + * - The author reacted with thumbs-down on the duplicate comment + * + * Required environment variables: + * GITHUB_TOKEN - GitHub Actions token + * REPO_OWNER - Repository owner + * REPO_NAME - Repository name + * + * Optional: + * DRY_RUN - If "true", report but do not close (default: false) + */ + +'use strict'; + +const https = require('https'); + +const GITHUB_TOKEN = process.env.GITHUB_TOKEN; +const REPO_OWNER = process.env.REPO_OWNER; +const REPO_NAME = process.env.REPO_NAME; +const DRY_RUN = process.env.DRY_RUN === 'true'; + +const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000; + +function githubRequest(method, path, body = null, retried = false) { + return new Promise((resolve, reject) => { + const payload = body ? JSON.stringify(body) : null; + const options = { + hostname: 'api.github.com', + path, + method, + headers: { + 'Authorization': `Bearer ${GITHUB_TOKEN}`, + 'Accept': 'application/vnd.github+json', + 'User-Agent': 'PageIndex-Autoclose/1.0', + 'X-GitHub-Api-Version': '2022-11-28', + ...(payload ? { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) } : {}), + }, + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', chunk => (data += chunk)); + res.on('end', async () => { + // 429: 始终重试(rate limit) + if (res.statusCode === 429 && !retried) { + const retryAfter = parseInt(res.headers['retry-after'] || '60', 10); + console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`); + await sleep(retryAfter * 1000); + try { resolve(await githubRequest(method, path, body, true)); } + catch (err) { reject(err); } + return; + } + // 403: 只在 rate limit 相关时重试 + if (res.statusCode === 403 && !retried) { + const rateLimitRemaining = res.headers['x-ratelimit-remaining']; + const hasRetryAfter = res.headers['retry-after']; + if (rateLimitRemaining === '0' || hasRetryAfter) { + const retryAfter = parseInt(hasRetryAfter || '60', 10); + console.log(` Rate limited (403) on ${method} ${path}, retrying after ${retryAfter}s...`); + await sleep(retryAfter * 1000); + try { resolve(await githubRequest(method, path, body, true)); } + catch (err) { reject(err); } + return; + } + } + if (res.statusCode >= 400) { + reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`)); + return; + } + try { resolve(data ? JSON.parse(data) : {}); } + catch { resolve({}); } + }); + }); + req.on('error', reject); + if (payload) req.write(payload); + req.end(); + }); +} + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +/** + * Fetches open issues with the "duplicate" label, paginating as needed. + * Only returns issues created more than 3 days ago. + */ +async function fetchDuplicateIssues() { + const issues = []; + let page = 1; + while (true) { + const data = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues?state=open&labels=duplicate&per_page=100&page=${page}` + ); + if (!Array.isArray(data) || data.length === 0) break; + issues.push(...data.filter(i => !i.pull_request)); + if (data.length < 100) break; + page++; + } + + const cutoff = new Date(Date.now() - THREE_DAYS_MS); + return issues.filter(i => new Date(i.created_at) < cutoff); +} + +function isBot(user) { + return user.type === 'Bot' || user.login.endsWith('[bot]') || user.login === 'github-actions'; +} + +/** + * Finds the bot's duplicate comment on an issue (contains "possible duplicate"). + */ +function findDuplicateComment(comments) { + return comments.find(c => + isBot(c.user) && c.body.includes('possible duplicate') + ); +} + +/** + * Checks if there are human comments after the duplicate comment. + */ +function hasHumanCommentAfter(comments, afterDate) { + return comments.some(c => { + if (isBot(c.user)) return false; + return new Date(c.created_at) > afterDate; + }); +} + +/** + * Fetches all comments for an issue, handling pagination. + * Requests per_page=100 and loops until we get fewer than 100 or an empty array. + */ +async function fetchAllComments(issueNumber) { + const allComments = []; + let page = 1; + while (true) { + const comments = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments?per_page=100&page=${page}` + ); + if (!Array.isArray(comments) || comments.length === 0) break; + allComments.push(...comments); + if (comments.length < 100) break; + page++; + } + return allComments; +} + +/** + * Checks if the duplicate comment has a thumbs-down reaction. + */ +async function hasThumbsDownReaction(commentId) { + const reactions = await githubRequest( + 'GET', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/comments/${commentId}/reactions` + ); + return Array.isArray(reactions) && reactions.some(r => r.content === '-1'); +} + +/** + * Closes an issue as duplicate with a comment. + */ +async function closeAsDuplicate(issueNumber) { + const body = + 'This issue has been automatically closed as a duplicate. ' + + 'No human activity or objection was received within the 3-day grace period.\n\n' + + 'If you believe this was closed in error, please reopen the issue and leave a comment.'; + + await githubRequest( + 'POST', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments`, + { body } + ); + + await githubRequest( + 'PATCH', + `/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`, + { state: 'closed', state_reason: 'completed' } + ); +} + +async function processIssue(issue) { + const num = issue.number; + console.log(`\nChecking issue #${num}: ${issue.title}`); + + const comments = await fetchAllComments(num); + + if (!Array.isArray(comments) || comments.length === 0) { + console.log(` -> Could not fetch comments, skipping.`); + return false; + } + + const dupeComment = findDuplicateComment(comments); + if (!dupeComment) { + console.log(` -> No duplicate comment found, skipping.`); + return false; + } + + const commentDate = new Date(dupeComment.created_at); + const ageMs = Date.now() - commentDate.getTime(); + + if (ageMs < THREE_DAYS_MS) { + const daysLeft = Math.ceil((THREE_DAYS_MS - ageMs) / (24 * 60 * 60 * 1000)); + console.log(` -> Duplicate comment is less than 3 days old (${daysLeft}d remaining), skipping.`); + return false; + } + + if (hasHumanCommentAfter(comments, commentDate)) { + console.log(` -> Human commented after duplicate comment, skipping.`); + return false; + } + + if (await hasThumbsDownReaction(dupeComment.id)) { + console.log(` -> Author reacted with thumbs-down, skipping.`); + return false; + } + + if (DRY_RUN) { + console.log(` [DRY RUN] Would close issue #${num}`); + return true; + } + + await closeAsDuplicate(num); + console.log(` -> Closed issue #${num} as duplicate`); + return true; +} + +async function main() { + const missing = ['GITHUB_TOKEN', 'REPO_OWNER', 'REPO_NAME'].filter(k => !process.env[k]); + if (missing.length) { + console.error(`Missing required environment variables: ${missing.join(', ')}`); + process.exit(1); + } + + console.log('Auto-close duplicate issues'); + console.log(` Repository: ${REPO_OWNER}/${REPO_NAME}`); + console.log(` Dry run: ${DRY_RUN}`); + + const issues = await fetchDuplicateIssues(); + console.log(`\nFound ${issues.length} duplicate-labeled issue(s) older than 3 days.`); + + let closedCount = 0; + for (const issue of issues) { + const closed = await processIssue(issue); + if (closed) closedCount++; + await sleep(1000); + } + + console.log(`\nSummary: ${closedCount} issue(s) closed.`); +} + +main().catch(err => { + console.error('Fatal error:', err.message); + process.exit(1); +}); diff --git a/scripts/comment-on-duplicates.sh b/scripts/comment-on-duplicates.sh new file mode 100755 index 0000000..05c93d9 --- /dev/null +++ b/scripts/comment-on-duplicates.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# comment-on-duplicates.sh - Posts a duplicate issue comment with auto-close warning. +# +# Usage: +# ./scripts/comment-on-duplicates.sh --base-issue 123 --potential-duplicates 456 789 +# +set -euo pipefail + +REPO="${GITHUB_REPOSITORY:-}" +if [ -z "$REPO" ]; then + echo "Error: GITHUB_REPOSITORY is not set" >&2 + exit 1 +fi + +BASE_ISSUE="" +DUPLICATES=() + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --base-issue) + BASE_ISSUE="$2" + shift 2 + ;; + --potential-duplicates) + shift + while [[ $# -gt 0 && ! "$1" =~ ^-- ]]; do + DUPLICATES+=("$1") + shift + done + ;; + *) + echo "Error: Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +# Validate inputs +if [ -z "$BASE_ISSUE" ]; then + echo "Error: --base-issue is required" >&2 + exit 1 +fi + +if ! [[ "$BASE_ISSUE" =~ ^[0-9]+$ ]]; then + echo "Error: --base-issue must be a number, got: $BASE_ISSUE" >&2 + exit 1 +fi + +if [ ${#DUPLICATES[@]} -eq 0 ]; then + echo "Error: --potential-duplicates requires at least one issue number" >&2 + exit 1 +fi + +for dup in "${DUPLICATES[@]}"; do + if ! [[ "$dup" =~ ^[0-9]+$ ]]; then + echo "Error: duplicate issue must be a number, got: $dup" >&2 + exit 1 + fi +done + +# Limit to 3 duplicates max +if [ ${#DUPLICATES[@]} -gt 3 ]; then + echo "Warning: Limiting to first 3 duplicates" >&2 + DUPLICATES=("${DUPLICATES[@]:0:3}") +fi + +# Build the duplicate links list +COUNT=0 +LINKS="" +for dup in "${DUPLICATES[@]}"; do + COUNT=$((COUNT + 1)) + LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup} +" +done + +# Build and post the comment — if the issue is closed or doesn't exist, gh will error out +COMMENT="Found ${COUNT} possible duplicate issue(s): + +${LINKS} +This issue will be automatically closed as a duplicate in 3 days. +- To prevent auto-closure, add a comment or react with :thumbsdown: on this comment." + +gh issue comment "$BASE_ISSUE" --repo "$REPO" --body "$COMMENT" +gh issue edit "$BASE_ISSUE" --repo "$REPO" --add-label "duplicate" + +echo "Posted duplicate comment on issue #$BASE_ISSUE with $COUNT potential duplicate(s)"