mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-25 08:06:22 +02:00
Refactor issue dedup system to use claude-code-action with /dedupe command
Replace the copilot-generated inline search logic with a claude-code-action based architecture inspired by anthropic/claude-code's approach: - Add .claude/commands/dedupe.md with 5-parallel-search strategy - Add scripts/comment-on-duplicates.sh with 3-day grace period warning - Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command - Rewrite autoclose script to check bot comments, human activity, and thumbsdown - Rewrite backfill to trigger dedupe workflow per issue with rate limiting - Add concurrency control, timeout, input validation, and rate limit retry - Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger)
This commit is contained in:
parent
b3cb9531a4
commit
fd9330c434
8 changed files with 413 additions and 752 deletions
30
.github/workflows/autoclose-labeled-issues.yml
vendored
30
.github/workflows/autoclose-labeled-issues.yml
vendored
|
|
@ -1,21 +1,14 @@
|
|||
# Closes open issues that carry the "autoclose" label and have been inactive
|
||||
# for more than INACTIVITY_DAYS days. Runs on a daily schedule and can also
|
||||
# be triggered manually.
|
||||
name: Auto-close Inactive Labeled Issues
|
||||
# Auto-closes duplicate issues after 3 days if no human activity or thumbs-down reaction.
|
||||
# Runs daily at 09:00 UTC.
|
||||
name: Auto-close Duplicate Issues
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Runs every day at 01:00 UTC
|
||||
- cron: '0 1 * * *'
|
||||
- cron: '0 9 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
inactivity_days:
|
||||
description: 'Days of inactivity before closing (default: 7)'
|
||||
required: false
|
||||
default: '7'
|
||||
type: number
|
||||
dry_run:
|
||||
description: 'Dry run – report but do not actually close issues'
|
||||
description: 'Dry run - report but do not close issues'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: choice
|
||||
|
|
@ -30,16 +23,15 @@ permissions:
|
|||
jobs:
|
||||
autoclose:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Close inactive autoclose-labeled issues
|
||||
- name: Close inactive duplicate issues
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_OWNER: ${{ github.repository_owner }}
|
||||
REPO_NAME: ${{ github.event.repository.name }}
|
||||
# workflow_dispatch overrides the default; schedule uses the default (7)
|
||||
INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }}
|
||||
DRY_RUN: ${{ inputs.dry_run || 'false' }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_OWNER: ${{ github.repository_owner }}
|
||||
REPO_NAME: ${{ github.event.repository.name }}
|
||||
DRY_RUN: ${{ inputs.dry_run || 'false' }}
|
||||
run: node scripts/autoclose-labeled-issues.js
|
||||
|
|
|
|||
89
.github/workflows/backfill-dedupe.yml
vendored
89
.github/workflows/backfill-dedupe.yml
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# Backfills duplicate detection for historical issues.
|
||||
# Backfills duplicate detection for historical issues using Claude Code.
|
||||
# Triggered manually via workflow_dispatch.
|
||||
name: Backfill Duplicate Detection
|
||||
|
||||
|
|
@ -10,58 +10,55 @@ on:
|
|||
required: false
|
||||
default: '30'
|
||||
type: number
|
||||
dry_run:
|
||||
description: 'Dry run – analyze but do not post comments or apply labels'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: choice
|
||||
options:
|
||||
- 'false'
|
||||
- 'true'
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
issues: write
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
backfill:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Ensure required labels exist
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const labels = [
|
||||
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
|
||||
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
|
||||
];
|
||||
for (const label of labels) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: label.name,
|
||||
});
|
||||
} catch (err) {
|
||||
if (err.status === 404) {
|
||||
await github.rest.issues.createLabel({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
name: label.name, color: label.color, description: label.description,
|
||||
});
|
||||
core.info(`Created label: ${label.name}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- name: Run backfill script
|
||||
- name: Fetch issues and run dedupe
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }}
|
||||
REPO_OWNER: ${{ github.repository_owner }}
|
||||
REPO_NAME: ${{ github.event.repository.name }}
|
||||
DAYS_BACK: ${{ inputs.days_back }}
|
||||
DRY_RUN: ${{ inputs.dry_run }}
|
||||
run: node scripts/backfill-dedupe.js
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
DAYS_BACK: ${{ inputs.days_back || '30' }}
|
||||
run: |
|
||||
if ! [[ "$DAYS_BACK" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: days_back must be a number"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ)
|
||||
echo "Fetching open issues since $SINCE"
|
||||
|
||||
# Get open issues, filter out PRs and already-labeled ones
|
||||
ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \
|
||||
--jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number")
|
||||
|
||||
if [ -z "$ISSUES" ]; then
|
||||
echo "No issues to process"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
BATCH_SIZE=10
|
||||
COUNT=0
|
||||
echo "Issues to process: $ISSUES"
|
||||
for NUMBER in $ISSUES; do
|
||||
echo "Triggering dedupe for issue #$NUMBER"
|
||||
gh workflow run issue-dedupe.yml --repo "$REPO" -f issue_number="$NUMBER"
|
||||
COUNT=$((COUNT + 1))
|
||||
if [ $((COUNT % BATCH_SIZE)) -eq 0 ]; then
|
||||
echo "Pausing 60s after $COUNT issues..."
|
||||
sleep 60
|
||||
else
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Backfill triggered for $COUNT issues"
|
||||
|
|
|
|||
229
.github/workflows/issue-dedupe.yml
vendored
229
.github/workflows/issue-dedupe.yml
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# Detects duplicate issues using Claude Code.
|
||||
# Detects duplicate issues using Claude Code with the /dedupe command.
|
||||
# Triggered automatically when a new issue is opened, or manually for a single issue.
|
||||
name: Issue Duplicate Detection
|
||||
|
||||
|
|
@ -10,15 +10,20 @@ on:
|
|||
issue_number:
|
||||
description: 'Issue number to check for duplicates'
|
||||
required: true
|
||||
type: number
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
concurrency:
|
||||
group: dedupe-${{ github.event.issue.number || inputs.issue_number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
detect-duplicate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
# Skip pull-requests that surface as issues and bot-opened issues
|
||||
if: >
|
||||
(github.event_name == 'workflow_dispatch') ||
|
||||
|
|
@ -26,210 +31,26 @@ jobs:
|
|||
!endsWith(github.actor, '[bot]') &&
|
||||
github.actor != 'github-actions')
|
||||
steps:
|
||||
# ── 1. Ensure required labels exist ─────────────────────────────────────
|
||||
- name: Ensure labels exist
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const labels = [
|
||||
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
|
||||
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
|
||||
];
|
||||
for (const label of labels) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: label.name,
|
||||
});
|
||||
} catch (err) {
|
||||
if (err.status === 404) {
|
||||
await github.rest.issues.createLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: label.name,
|
||||
color: label.color,
|
||||
description: label.description,
|
||||
});
|
||||
core.info(`Created label: ${label.name}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# ── 2. Gather issue data and find candidate duplicates ──────────────────
|
||||
- name: Gather issue data and candidates
|
||||
id: data
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const issueNumber =
|
||||
context.eventName === 'issues'
|
||||
? context.payload.issue.number
|
||||
: parseInt(core.getInput('issue_number') || '${{ inputs.issue_number }}');
|
||||
|
||||
const { data: issue } = await github.rest.issues.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issueNumber,
|
||||
});
|
||||
|
||||
// Skip already-closed or already-labelled issues
|
||||
if (issue.state === 'closed') {
|
||||
core.setOutput('skip', 'true');
|
||||
core.info('Issue is already closed – skipping.');
|
||||
return;
|
||||
}
|
||||
if (issue.labels.some(l => l.name === 'duplicate')) {
|
||||
core.setOutput('skip', 'true');
|
||||
core.info('Issue already has "duplicate" label – skipping.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract meaningful keywords from the title
|
||||
const stopWords = new Set([
|
||||
'a','an','the','is','in','on','at','to','for','of','and','or','but','not',
|
||||
'with','this','that','it','be','are','was','has','have','does','do','how',
|
||||
'why','when','where','what','which','who','will','can','could','should',
|
||||
'would','may','might','must','get','got','use','using','used','error',
|
||||
'issue','bug','feature','request','problem','question','please','just',
|
||||
'after','before','during','about','from','into','also','then','than',
|
||||
]);
|
||||
const keywords = issue.title
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9\s]/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(w => w.length > 2 && !stopWords.has(w))
|
||||
.slice(0, 6)
|
||||
.join(' ');
|
||||
|
||||
let candidates = [];
|
||||
if (keywords) {
|
||||
try {
|
||||
const q = `repo:${context.repo.owner}/${context.repo.repo} is:issue state:open ${keywords}`;
|
||||
const { data: results } = await github.rest.search.issuesAndPullRequests({
|
||||
q,
|
||||
per_page: 15,
|
||||
});
|
||||
candidates = results.items
|
||||
.filter(item => item.number !== issueNumber && !item.pull_request)
|
||||
.slice(0, 10);
|
||||
} catch (err) {
|
||||
core.warning('GitHub search failed: ' + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (candidates.length === 0) {
|
||||
core.setOutput('skip', 'true');
|
||||
core.info('No candidate issues found – skipping Claude analysis.');
|
||||
return;
|
||||
}
|
||||
|
||||
core.setOutput('skip', 'false');
|
||||
core.setOutput('issue_number', String(issueNumber));
|
||||
core.setOutput('issue_title', issue.title);
|
||||
core.setOutput('issue_body', (issue.body || '').substring(0, 3000));
|
||||
core.setOutput('candidates',
|
||||
JSON.stringify(candidates.map(c => ({
|
||||
number: c.number,
|
||||
title: c.title,
|
||||
url: c.html_url,
|
||||
body: (c.body || '').substring(0, 500),
|
||||
})))
|
||||
);
|
||||
|
||||
# ── 3. Write data files (avoids YAML-injection of arbitrary text) ───────
|
||||
- name: Write issue data to files
|
||||
if: steps.data.outputs.skip == 'false'
|
||||
- name: Determine issue number
|
||||
id: issue
|
||||
env:
|
||||
ISSUE_TITLE: ${{ steps.data.outputs.issue_title }}
|
||||
ISSUE_BODY: ${{ steps.data.outputs.issue_body }}
|
||||
CANDIDATES: ${{ steps.data.outputs.candidates }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
INPUT_NUMBER: ${{ inputs.issue_number }}
|
||||
ISSUE_NUMBER: ${{ github.event.issue.number }}
|
||||
run: |
|
||||
printf '%s' "$ISSUE_TITLE" > /tmp/issue-title.txt
|
||||
printf '%s' "$ISSUE_BODY" > /tmp/issue-body.txt
|
||||
printf '%s' "$CANDIDATES" > /tmp/issue-candidates.json
|
||||
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
|
||||
echo "number=$INPUT_NUMBER" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "number=$ISSUE_NUMBER" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ── 4. Ask Claude to decide whether this is a duplicate ─────────────────
|
||||
- name: Run Claude duplicate analysis
|
||||
if: steps.data.outputs.skip == 'false'
|
||||
uses: anthropics/claude-code-action@v1
|
||||
- uses: anthropics/claude-code-action@v1
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
prompt: "/dedupe ${{ github.repository }}/issues/${{ steps.issue.outputs.number }}"
|
||||
anthropic_api_key: ${{ secrets.AUTHROPIC_API_KEY }}
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
track_progress: 'false'
|
||||
prompt: |
|
||||
You are a GitHub issue triage assistant.
|
||||
|
||||
Analyze whether issue #${{ steps.data.outputs.issue_number }} in this repository
|
||||
is a duplicate of any existing open issues.
|
||||
|
||||
The issue data is stored in temporary files on this runner:
|
||||
- /tmp/issue-title.txt — title of the new issue
|
||||
- /tmp/issue-body.txt — body of the new issue
|
||||
- /tmp/issue-candidates.json — JSON array of up to 10 candidate issues
|
||||
(each has: number, title, url, body)
|
||||
|
||||
Read those files first, then follow these rules:
|
||||
|
||||
1. Compare the new issue against every candidate.
|
||||
Focus on whether they describe the *same underlying problem or request*.
|
||||
2. Only flag as a duplicate if you are at least 85 % confident.
|
||||
Superficial wording differences do NOT make an issue non-duplicate.
|
||||
3. IF the new issue IS a duplicate:
|
||||
a. Post a friendly, helpful comment on issue #${{ steps.data.outputs.issue_number }}.
|
||||
The comment must:
|
||||
- Thank the reporter
|
||||
- Explain which existing issue(s) it duplicates and why (include markdown links)
|
||||
- Invite them to subscribe to the original for updates
|
||||
b. The LAST line of the comment must be exactly (fill in real numbers):
|
||||
<!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[ISSUE_NUMBERS]} -->
|
||||
Example: <!-- DEDUPE_RESULT: {"is_duplicate":true,"issues":[42,73]} -->
|
||||
4. IF the issue is NOT a duplicate, or you are unsure:
|
||||
- Do NOT post any comment.
|
||||
- Do NOT take any other action.
|
||||
|
||||
# ── 5. Parse Claude's comment and apply labels ──────────────────────────
|
||||
- name: Apply labels if duplicate found
|
||||
if: steps.data.outputs.skip == 'false'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const issueNumber = parseInt('${{ steps.data.outputs.issue_number }}');
|
||||
|
||||
// Allow a moment for the comment to land
|
||||
await new Promise(r => setTimeout(r, 5000));
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issueNumber,
|
||||
per_page: 50,
|
||||
});
|
||||
|
||||
const pattern = /<!--\s*DEDUPE_RESULT:\s*(\{[^}]+\})\s*-->/;
|
||||
let isDuplicate = false;
|
||||
|
||||
for (const comment of [...comments].reverse()) {
|
||||
const m = comment.body.match(pattern);
|
||||
if (m) {
|
||||
try {
|
||||
const result = JSON.parse(m[1]);
|
||||
isDuplicate = result.is_duplicate === true;
|
||||
} catch (err) {
|
||||
core.warning('Failed to parse DEDUPE_RESULT JSON: ' + err.message);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDuplicate) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issueNumber,
|
||||
labels: ['duplicate', 'autoclose'],
|
||||
});
|
||||
core.info(`✅ Applied "duplicate" and "autoclose" labels to #${issueNumber}`);
|
||||
} else {
|
||||
core.info(`ℹ️ Issue #${issueNumber} is not a duplicate – no labels applied.`);
|
||||
}
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
claude_args: "--model claude-sonnet-4-5-20250929"
|
||||
|
|
|
|||
41
.github/workflows/remove-autoclose-label.yml
vendored
41
.github/workflows/remove-autoclose-label.yml
vendored
|
|
@ -1,6 +1,8 @@
|
|||
# Removes the "autoclose" label whenever a human (non-bot) posts a new comment
|
||||
# on an issue that carries the label. This resets the inactivity clock.
|
||||
name: Remove Autoclose Label on Human Activity
|
||||
# Removes the "duplicate" label when a human (non-bot) comments on a
|
||||
# duplicate-flagged issue, signaling that the issue needs re-evaluation.
|
||||
# The auto-close script also independently checks for human activity,
|
||||
# so this provides an additional visible signal.
|
||||
name: Remove Duplicate Label on Human Activity
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
|
|
@ -10,39 +12,34 @@ permissions:
|
|||
issues: write
|
||||
|
||||
jobs:
|
||||
remove-autoclose:
|
||||
remove-label:
|
||||
# Only run for issue comments (not PR comments)
|
||||
if: ${{ github.event.issue.pull_request == null }}
|
||||
if: >
|
||||
github.event.issue.pull_request == null &&
|
||||
!endsWith(github.actor, '[bot]') &&
|
||||
github.actor != 'github-actions'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Remove autoclose label if human commented
|
||||
- name: Remove duplicate label if human commented
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const actor = context.actor;
|
||||
|
||||
// Ignore bot accounts
|
||||
if (actor.endsWith('[bot]') || actor === 'github-actions') {
|
||||
core.info(`Skipping bot comment from ${actor}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const issue = context.payload.issue;
|
||||
const issue = context.payload.issue;
|
||||
const labels = (issue.labels || []).map(l => l.name);
|
||||
|
||||
if (!labels.includes('autoclose')) {
|
||||
core.info('Issue does not have "autoclose" label – nothing to do.');
|
||||
if (!labels.includes('duplicate')) {
|
||||
core.info('Issue does not have "duplicate" label - nothing to do.');
|
||||
return;
|
||||
}
|
||||
|
||||
await github.rest.issues.removeLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: issue.number,
|
||||
name: 'autoclose',
|
||||
name: 'duplicate',
|
||||
});
|
||||
|
||||
core.info(
|
||||
`Removed "autoclose" label from #${issue.number} ` +
|
||||
`after human activity by ${actor}`
|
||||
`Removed "duplicate" label from #${issue.number} ` +
|
||||
`after human comment by ${context.actor}`
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue