mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-24 23:56:21 +02:00
Simplify scripts: unify bot detection, remove redundant API calls and TOCTOU checks
This commit is contained in:
parent
fd9330c434
commit
7df8510bde
4 changed files with 11 additions and 99 deletions
|
|
@ -41,7 +41,7 @@ Launch 5 parallel searches using different keyword strategies to maximize covera
|
|||
|
||||
For each search, use:
|
||||
```
|
||||
gh search issues "<keywords>" --repo $REPOSITORY --limit 20
|
||||
gh search issues "<keywords> state:open" --repo $REPOSITORY --limit 20
|
||||
```
|
||||
|
||||
### 4. Analyze candidates
|
||||
|
|
|
|||
65
README.md
65
README.md
|
|
@ -267,69 +267,4 @@ Leave us a star 🌟 if you like our project. Thank you!
|
|||
|
||||
---
|
||||
|
||||
## 🤖 GitHub Automation
|
||||
|
||||
This repository uses automated GitHub Actions workflows to keep the issue tracker tidy.
|
||||
|
||||
### Overview
|
||||
|
||||
| Workflow | Trigger | Purpose |
|
||||
|---|---|---|
|
||||
| `issue-dedupe.yml` | Issue opened · `workflow_dispatch` | Detects duplicate issues using Claude and labels them |
|
||||
| `backfill-dedupe.yml` | `workflow_dispatch` | Runs duplicate detection over historical issues |
|
||||
| `autoclose-labeled-issues.yml` | Daily schedule · `workflow_dispatch` | Closes issues labelled `autoclose` after N days of inactivity |
|
||||
| `remove-autoclose-label.yml` | Issue comment created | Removes the `autoclose` label when a human posts a new comment |
|
||||
|
||||
### Required Secrets
|
||||
|
||||
Add the following secret to the repository (**Settings → Secrets and variables → Actions**):
|
||||
|
||||
| Secret | Description |
|
||||
|---|---|
|
||||
| `AUTHROPIC_API_KEY` | Your Anthropic API key (used by `anthropics/claude-code-action`) |
|
||||
|
||||
`GITHUB_TOKEN` is provided automatically by GitHub Actions and does not need to be added manually.
|
||||
|
||||
### Labels
|
||||
|
||||
The workflows create the following labels automatically if they do not exist:
|
||||
|
||||
| Label | Description |
|
||||
|---|---|
|
||||
| `duplicate` | Marks issues identified as duplicates |
|
||||
| `autoclose` | Marks issues that will be automatically closed after inactivity |
|
||||
|
||||
### Running the Backfill
|
||||
|
||||
To scan historical issues for duplicates, trigger the **Backfill Duplicate Detection** workflow manually from the **Actions** tab:
|
||||
|
||||
- **`days_back`** (default `30`) — how many days into the past to scan
|
||||
- **`dry_run`** (default `false`) — set to `true` to preview results without modifying issues
|
||||
|
||||
```
|
||||
Actions → Backfill Duplicate Detection → Run workflow
|
||||
```
|
||||
|
||||
### Changing the Inactivity Threshold
|
||||
|
||||
The default inactivity period before an `autoclose`-labelled issue is closed is **7 days**.
|
||||
|
||||
To change it for a one-off run, trigger **Auto-close Inactive Labeled Issues** with the `inactivity_days` input.
|
||||
|
||||
To change the default permanently, edit the `INACTIVITY_DAYS` env variable default in `.github/workflows/autoclose-labeled-issues.yml`:
|
||||
|
||||
```yaml
|
||||
INACTIVITY_DAYS: ${{ inputs.inactivity_days || '7' }} # ← change '7' here
|
||||
```
|
||||
|
||||
### How Duplicate Detection Works
|
||||
|
||||
1. When a new issue is opened, keywords from the title are used to search for the top 10 most relevant existing open issues via the GitHub Search API.
|
||||
2. The issue title, body, and candidate list are passed to **Claude** (`anthropics/claude-code-action`) with a structured prompt.
|
||||
3. Claude posts a comment on the issue (if it is highly confident it is a duplicate), including links to the original issue(s) and a brief explanation.
|
||||
4. A follow-up step reads the comment, extracts the machine-readable result, and applies the `duplicate` and `autoclose` labels.
|
||||
5. If Claude is not confident, no comment or labels are applied.
|
||||
|
||||
---
|
||||
|
||||
© 2025 [Vectify AI](https://vectify.ai)
|
||||
|
|
|
|||
|
|
@ -92,13 +92,16 @@ async function fetchDuplicateIssues() {
|
|||
return issues.filter(i => new Date(i.created_at) < cutoff);
|
||||
}
|
||||
|
||||
function isBot(user) {
|
||||
return user.type === 'Bot' || user.login.endsWith('[bot]') || user.login === 'github-actions';
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the bot's duplicate comment on an issue (contains "possible duplicate").
|
||||
*/
|
||||
function findDuplicateComment(comments) {
|
||||
return comments.find(c =>
|
||||
(c.user.type === 'Bot' || c.user.login === 'github-actions[bot]') &&
|
||||
c.body.includes('possible duplicate')
|
||||
isBot(c.user) && c.body.includes('possible duplicate')
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -107,9 +110,7 @@ function findDuplicateComment(comments) {
|
|||
*/
|
||||
function hasHumanCommentAfter(comments, afterDate) {
|
||||
return comments.some(c => {
|
||||
if (c.user.type === 'Bot' || c.user.login.endsWith('[bot]') || c.user.login === 'github-actions') {
|
||||
return false;
|
||||
}
|
||||
if (isBot(c.user)) return false;
|
||||
return new Date(c.created_at) > afterDate;
|
||||
});
|
||||
}
|
||||
|
|
@ -145,12 +146,6 @@ async function closeAsDuplicate(issueNumber) {
|
|||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}`,
|
||||
{ state: 'closed', state_reason: 'completed' }
|
||||
);
|
||||
|
||||
await githubRequest(
|
||||
'POST',
|
||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/labels`,
|
||||
{ labels: ['duplicate'] }
|
||||
);
|
||||
}
|
||||
|
||||
async function processIssue(issue) {
|
||||
|
|
|
|||
|
|
@ -66,32 +66,16 @@ if [ ${#DUPLICATES[@]} -gt 3 ]; then
|
|||
DUPLICATES=("${DUPLICATES[@]:0:3}")
|
||||
fi
|
||||
|
||||
# Validate that the base issue exists and is open
|
||||
if ! gh issue view "$BASE_ISSUE" --repo "$REPO" --json state -q '.state' | grep -qi 'open'; then
|
||||
echo "Error: Issue #$BASE_ISSUE is not open or does not exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build the duplicate links list
|
||||
LINKS=""
|
||||
COUNT=0
|
||||
LINKS=""
|
||||
for dup in "${DUPLICATES[@]}"; do
|
||||
# Validate duplicate issue exists
|
||||
if gh issue view "$dup" --repo "$REPO" --json number -q '.number' > /dev/null 2>&1; then
|
||||
COUNT=$((COUNT + 1))
|
||||
LINKS="${LINKS}${COUNT}. https://github.com/${REPO}/issues/${dup}
|
||||
"
|
||||
else
|
||||
echo "Warning: Issue #$dup does not exist, skipping" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$COUNT" -eq 0 ]; then
|
||||
echo "Error: None of the specified duplicate issues exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build and post the comment
|
||||
# Build and post the comment — if the issue is closed or doesn't exist, gh will error out
|
||||
COMMENT="Found ${COUNT} possible duplicate issue(s):
|
||||
|
||||
${LINKS}
|
||||
|
|
@ -99,8 +83,6 @@ This issue will be automatically closed as a duplicate in 3 days.
|
|||
- To prevent auto-closure, add a comment or react with :thumbsdown: on this comment."
|
||||
|
||||
gh issue comment "$BASE_ISSUE" --repo "$REPO" --body "$COMMENT"
|
||||
|
||||
# Add the duplicate label
|
||||
gh issue edit "$BASE_ISSUE" --repo "$REPO" --add-label "duplicate"
|
||||
|
||||
echo "Posted duplicate comment on issue #$BASE_ISSUE with $COUNT potential duplicate(s)"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue