Refactor issue dedup system to use claude-code-action with /dedupe command

Replace the copilot-generated inline search logic with a claude-code-action
based architecture inspired by anthropic/claude-code's approach:

- Add .claude/commands/dedupe.md with 5-parallel-search strategy
- Add scripts/comment-on-duplicates.sh with 3-day grace period warning
- Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command
- Rewrite autoclose script to check bot comments, human activity, and thumbsdown
- Rewrite backfill to trigger dedupe workflow per issue with rate limiting
- Add concurrency control, timeout, input validation, and rate limit retry
- Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger)
This commit is contained in:
BukeLy 2026-03-02 17:05:44 +08:00
parent b3cb9531a4
commit fd9330c434
8 changed files with 413 additions and 752 deletions

View file

@ -1,21 +1,14 @@
# Closes open issues that carry the "autoclose" label and have been inactive
# for more than INACTIVITY_DAYS days. Runs on a daily schedule and can also
# be triggered manually.
name: Auto-close Inactive Labeled Issues
# Auto-closes duplicate issues after 3 days if no human activity or thumbs-down reaction.
# Runs daily at 09:00 UTC.
name: Auto-close Duplicate Issues
on:
schedule:
# Runs every day at 01:00 UTC
- cron: '0 1 * * *'
- cron: '0 9 * * *'
workflow_dispatch:
inputs:
inactivity_days:
description: 'Days of inactivity before closing (default: 7)'
required: false
default: '7'
type: number
dry_run:
description: 'Dry run report but do not actually close issues'
description: 'Dry run - report but do not close issues'
required: false
default: 'false'
type: choice
@ -30,16 +23,15 @@ permissions:
jobs:
autoclose:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Close inactive autoclose-labeled issues
- name: Close inactive duplicate issues
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
# workflow_dispatch overrides the default; schedule uses the default (7)
INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }}
DRY_RUN: ${{ inputs.dry_run || 'false' }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
DRY_RUN: ${{ inputs.dry_run || 'false' }}
run: node scripts/autoclose-labeled-issues.js

View file

@ -1,4 +1,4 @@
# Backfills duplicate detection for historical issues.
# Backfills duplicate detection for historical issues using Claude Code.
# Triggered manually via workflow_dispatch.
name: Backfill Duplicate Detection
@ -10,58 +10,55 @@ on:
required: false
default: '30'
type: number
dry_run:
description: 'Dry run analyze but do not post comments or apply labels'
required: false
default: 'false'
type: choice
options:
- 'false'
- 'true'
permissions:
issues: write
contents: read
issues: write
actions: write
jobs:
backfill:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: actions/checkout@v4
- name: Ensure required labels exist
uses: actions/github-script@v7
with:
script: |
const labels = [
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
];
for (const label of labels) {
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
});
} catch (err) {
if (err.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner, repo: context.repo.repo,
name: label.name, color: label.color, description: label.description,
});
core.info(`Created label: ${label.name}`);
}
}
}
- name: Run backfill script
- name: Fetch issues and run dedupe
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
DAYS_BACK: ${{ inputs.days_back }}
DRY_RUN: ${{ inputs.dry_run }}
run: node scripts/backfill-dedupe.js
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
DAYS_BACK: ${{ inputs.days_back || '30' }}
run: |
if ! [[ "$DAYS_BACK" =~ ^[0-9]+$ ]]; then
echo "Error: days_back must be a number"
exit 1
fi
SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ)
echo "Fetching open issues since $SINCE"
# Get open issues, filter out PRs and already-labeled ones
ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \
--jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number")
if [ -z "$ISSUES" ]; then
echo "No issues to process"
exit 0
fi
BATCH_SIZE=10
COUNT=0
echo "Issues to process: $ISSUES"
for NUMBER in $ISSUES; do
echo "Triggering dedupe for issue #$NUMBER"
gh workflow run issue-dedupe.yml --repo "$REPO" -f issue_number="$NUMBER"
COUNT=$((COUNT + 1))
if [ $((COUNT % BATCH_SIZE)) -eq 0 ]; then
echo "Pausing 60s after $COUNT issues..."
sleep 60
else
sleep 5
fi
done
echo "Backfill triggered for $COUNT issues"

View file

@ -1,4 +1,4 @@
# Detects duplicate issues using Claude Code.
# Detects duplicate issues using Claude Code with the /dedupe command.
# Triggered automatically when a new issue is opened, or manually for a single issue.
name: Issue Duplicate Detection
@ -10,15 +10,20 @@ on:
issue_number:
description: 'Issue number to check for duplicates'
required: true
type: number
type: string
permissions:
issues: write
contents: read
issues: write
concurrency:
group: dedupe-${{ github.event.issue.number || inputs.issue_number }}
cancel-in-progress: true
jobs:
detect-duplicate:
runs-on: ubuntu-latest
timeout-minutes: 10
# Skip pull-requests that surface as issues and bot-opened issues
if: >
(github.event_name == 'workflow_dispatch') ||
@ -26,210 +31,26 @@ jobs:
!endsWith(github.actor, '[bot]') &&
github.actor != 'github-actions')
steps:
# ── 1. Ensure required labels exist ─────────────────────────────────────
- name: Ensure labels exist
uses: actions/github-script@v7
with:
script: |
const labels = [
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
];
for (const label of labels) {
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
});
} catch (err) {
if (err.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
color: label.color,
description: label.description,
});
core.info(`Created label: ${label.name}`);
}
}
}
- uses: actions/checkout@v4
# ── 2. Gather issue data and find candidate duplicates ──────────────────
- name: Gather issue data and candidates
id: data
uses: actions/github-script@v7
with:
script: |
const issueNumber =
context.eventName === 'issues'
? context.payload.issue.number
: parseInt(core.getInput('issue_number') || '${{ inputs.issue_number }}');
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
});
// Skip already-closed or already-labelled issues
if (issue.state === 'closed') {
core.setOutput('skip', 'true');
core.info('Issue is already closed skipping.');
return;
}
if (issue.labels.some(l => l.name === 'duplicate')) {
core.setOutput('skip', 'true');
core.info('Issue already has "duplicate" label skipping.');
return;
}
// Extract meaningful keywords from the title
const stopWords = new Set([
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
'with','this','that','it','be','are','was','has','have','does','do','how',
'why','when','where','what','which','who','will','can','could','should',
'would','may','might','must','get','got','use','using','used','error',
'issue','bug','feature','request','problem','question','please','just',
'after','before','during','about','from','into','also','then','than',
]);
const keywords = issue.title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 2 && !stopWords.has(w))
.slice(0, 6)
.join(' ');
let candidates = [];
if (keywords) {
try {
const q = `repo:${context.repo.owner}/${context.repo.repo} is:issue state:open ${keywords}`;
const { data: results } = await github.rest.search.issuesAndPullRequests({
q,
per_page: 15,
});
candidates = results.items
.filter(item => item.number !== issueNumber && !item.pull_request)
.slice(0, 10);
} catch (err) {
core.warning('GitHub search failed: ' + err.message);
}
}
if (candidates.length === 0) {
core.setOutput('skip', 'true');
core.info('No candidate issues found skipping Claude analysis.');
return;
}
core.setOutput('skip', 'false');
core.setOutput('issue_number', String(issueNumber));
core.setOutput('issue_title', issue.title);
core.setOutput('issue_body', (issue.body || '').substring(0, 3000));
core.setOutput('candidates',
JSON.stringify(candidates.map(c => ({
number: c.number,
title: c.title,
url: c.html_url,
body: (c.body || '').substring(0, 500),
})))
);
# ── 3. Write data files (avoids YAML-injection of arbitrary text) ───────
- name: Write issue data to files
if: steps.data.outputs.skip == 'false'
- name: Determine issue number
id: issue
env:
ISSUE_TITLE: ${{ steps.data.outputs.issue_title }}
ISSUE_BODY: ${{ steps.data.outputs.issue_body }}
CANDIDATES: ${{ steps.data.outputs.candidates }}
EVENT_NAME: ${{ github.event_name }}
INPUT_NUMBER: ${{ inputs.issue_number }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
run: |
printf '%s' "$ISSUE_TITLE" > /tmp/issue-title.txt
printf '%s' "$ISSUE_BODY" > /tmp/issue-body.txt
printf '%s' "$CANDIDATES" > /tmp/issue-candidates.json
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
echo "number=$INPUT_NUMBER" >> "$GITHUB_OUTPUT"
else
echo "number=$ISSUE_NUMBER" >> "$GITHUB_OUTPUT"
fi
# ── 4. Ask Claude to decide whether this is a duplicate ─────────────────
- name: Run Claude duplicate analysis
if: steps.data.outputs.skip == 'false'
uses: anthropics/claude-code-action@v1
- uses: anthropics/claude-code-action@v1
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
prompt: "/dedupe ${{ github.repository }}/issues/${{ steps.issue.outputs.number }}"
anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }}
github_token: ${{ secrets.GITHUB_TOKEN }}
track_progress: 'false'
prompt: |
You are a GitHub issue triage assistant.
Analyze whether issue #${{ steps.data.outputs.issue_number }} in this repository
is a duplicate of any existing open issues.
The issue data is stored in temporary files on this runner:
- /tmp/issue-title.txt — title of the new issue
- /tmp/issue-body.txt — body of the new issue
- /tmp/issue-candidates.json — JSON array of up to 10 candidate issues
(each has: number, title, url, body)
Read those files first, then follow these rules:
1. Compare the new issue against every candidate.
Focus on whether they describe the *same underlying problem or request*.
2. Only flag as a duplicate if you are at least 85 % confident.
Superficial wording differences do NOT make an issue non-duplicate.
3. IF the new issue IS a duplicate:
a. Post a friendly, helpful comment on issue #${{ steps.data.outputs.issue_number }}.
The comment must:
- Thank the reporter
- Explain which existing issue(s) it duplicates and why (include markdown links)
- Invite them to subscribe to the original for updates
b. The LAST line of the comment must be exactly (fill in real numbers):
<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[ISSUE_NUMBERS]} -->
Example: <!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[42,73]} -->
4. IF the issue is NOT a duplicate, or you are unsure:
- Do NOT post any comment.
- Do NOT take any other action.
# ── 5. Parse Claude's comment and apply labels ──────────────────────────
- name: Apply labels if duplicate found
if: steps.data.outputs.skip == 'false'
uses: actions/github-script@v7
with:
script: |
const issueNumber = parseInt('${{ steps.data.outputs.issue_number }}');
// Allow a moment for the comment to land
await new Promise(r => setTimeout(r, 5000));
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
per_page: 50,
});
const pattern = /<!--\s*DEDUPE_RESULT:\s*(\{[^}]+\})\s*-->/;
let isDuplicate = false;
for (const comment of [...comments].reverse()) {
const m = comment.body.match(pattern);
if (m) {
try {
const result = JSON.parse(m[1]);
isDuplicate = result.is_duplicate === true;
} catch (err) {
core.warning('Failed to parse DEDUPE_RESULT JSON: ' + err.message);
}
break;
}
}
if (isDuplicate) {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['duplicate', 'autoclose'],
});
core.info(`✅ Applied "duplicate" and "autoclose" labels to #${issueNumber}`);
} else {
core.info(` Issue #${issueNumber} is not a duplicate no labels applied.`);
}
github_token: ${{ secrets.GITHUB_TOKEN }}
claude_args: "--model claude-sonnet-4-5-20250929"

View file

@ -1,6 +1,8 @@
# Removes the "autoclose" label whenever a human (non-bot) posts a new comment
# on an issue that carries the label. This resets the inactivity clock.
name: Remove Autoclose Label on Human Activity
# Removes the "duplicate" label when a human (non-bot) comments on a
# duplicate-flagged issue, signaling that the issue needs re-evaluation.
# The auto-close script also independently checks for human activity,
# so this provides an additional visible signal.
name: Remove Duplicate Label on Human Activity
on:
issue_comment:
@ -10,39 +12,34 @@ permissions:
issues: write
jobs:
remove-autoclose:
remove-label:
# Only run for issue comments (not PR comments)
if: ${{ github.event.issue.pull_request == null }}
if: >
github.event.issue.pull_request == null &&
!endsWith(github.actor, '[bot]') &&
github.actor != 'github-actions'
runs-on: ubuntu-latest
steps:
- name: Remove autoclose label if human commented
- name: Remove duplicate label if human commented
uses: actions/github-script@v7
with:
script: |
const actor = context.actor;
// Ignore bot accounts
if (actor.endsWith('[bot]') || actor === 'github-actions') {
core.info(`Skipping bot comment from ${actor}`);
return;
}
const issue = context.payload.issue;
const issue = context.payload.issue;
const labels = (issue.labels || []).map(l => l.name);
if (!labels.includes('autoclose')) {
core.info('Issue does not have "autoclose" label nothing to do.');
if (!labels.includes('duplicate')) {
core.info('Issue does not have "duplicate" label - nothing to do.');
return;
}
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issue.number,
name: 'autoclose',
name: 'duplicate',
});
core.info(
`Removed "autoclose" label from #${issue.number} ` +
`after human activity by ${actor}`
`Removed "duplicate" label from #${issue.number} ` +
`after human comment by ${context.actor}`
);