mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-25 08:06:22 +02:00
Refactor issue dedup system to use claude-code-action with /dedupe command
Replace the copilot-generated inline search logic with a claude-code-action based architecture inspired by anthropic/claude-code's approach: - Add .claude/commands/dedupe.md with 5-parallel-search strategy - Add scripts/comment-on-duplicates.sh with 3-day grace period warning - Rewrite issue-dedupe.yml to use claude-code-action + /dedupe command - Rewrite autoclose script to check bot comments, human activity, and thumbsdown - Rewrite backfill to trigger dedupe workflow per issue with rate limiting - Add concurrency control, timeout, input validation, and rate limit retry - Remove gh.sh (unnecessary), backfill-dedupe.js (replaced by workflow trigger)
This commit is contained in:
parent
b3cb9531a4
commit
fd9330c434
8 changed files with 413 additions and 752 deletions
89
.github/workflows/backfill-dedupe.yml
vendored
89
.github/workflows/backfill-dedupe.yml
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# Backfills duplicate detection for historical issues.
|
||||
# Backfills duplicate detection for historical issues using Claude Code.
|
||||
# Triggered manually via workflow_dispatch.
|
||||
name: Backfill Duplicate Detection
|
||||
|
||||
|
|
@ -10,58 +10,55 @@ on:
|
|||
required: false
|
||||
default: '30'
|
||||
type: number
|
||||
dry_run:
|
||||
description: 'Dry run – analyze but do not post comments or apply labels'
|
||||
required: false
|
||||
default: 'false'
|
||||
type: choice
|
||||
options:
|
||||
- 'false'
|
||||
- 'true'
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
issues: write
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
backfill:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Ensure required labels exist
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const labels = [
|
||||
{ name: 'duplicate', color: 'cfd3d7', description: 'This issue or pull request already exists' },
|
||||
{ name: 'autoclose', color: 'e4e669', description: 'Will be auto-closed after a period of inactivity' },
|
||||
];
|
||||
for (const label of labels) {
|
||||
try {
|
||||
await github.rest.issues.getLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: label.name,
|
||||
});
|
||||
} catch (err) {
|
||||
if (err.status === 404) {
|
||||
await github.rest.issues.createLabel({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
name: label.name, color: label.color, description: label.description,
|
||||
});
|
||||
core.info(`Created label: ${label.name}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- name: Run backfill script
|
||||
- name: Fetch issues and run dedupe
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.AUTHROPIC_API_KEY }}
|
||||
REPO_OWNER: ${{ github.repository_owner }}
|
||||
REPO_NAME: ${{ github.event.repository.name }}
|
||||
DAYS_BACK: ${{ inputs.days_back }}
|
||||
DRY_RUN: ${{ inputs.dry_run }}
|
||||
run: node scripts/backfill-dedupe.js
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
DAYS_BACK: ${{ inputs.days_back || '30' }}
|
||||
run: |
|
||||
if ! [[ "$DAYS_BACK" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: days_back must be a number"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ)
|
||||
echo "Fetching open issues since $SINCE"
|
||||
|
||||
# Get open issues, filter out PRs and already-labeled ones
|
||||
ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \
|
||||
--jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number")
|
||||
|
||||
if [ -z "$ISSUES" ]; then
|
||||
echo "No issues to process"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
BATCH_SIZE=10
|
||||
COUNT=0
|
||||
echo "Issues to process: $ISSUES"
|
||||
for NUMBER in $ISSUES; do
|
||||
echo "Triggering dedupe for issue #$NUMBER"
|
||||
gh workflow run issue-dedupe.yml --repo "$REPO" -f issue_number="$NUMBER"
|
||||
COUNT=$((COUNT + 1))
|
||||
if [ $((COUNT % BATCH_SIZE)) -eq 0 ]; then
|
||||
echo "Pausing 60s after $COUNT issues..."
|
||||
sleep 60
|
||||
else
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Backfill triggered for $COUNT issues"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue