mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-16 18:25:14 +02:00
Fix issues from Copilot review: 403 retry, comments pagination, backfill pagination
- Only retry 403 when rate-limit headers indicate throttling, not permission errors - Add fetchAllComments() with pagination for issues with 100+ comments - Add pagination loop in backfill workflow to handle repos with 200+ open issues
This commit is contained in:
parent
7df8510bde
commit
5fa180744d
2 changed files with 50 additions and 9 deletions
16
.github/workflows/backfill-dedupe.yml
vendored
16
.github/workflows/backfill-dedupe.yml
vendored
|
|
@ -37,9 +37,19 @@ jobs:
|
||||||
SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ)
|
SINCE=$(date -u -d "$DAYS_BACK days ago" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-${DAYS_BACK}d +%Y-%m-%dT%H:%M:%SZ)
|
||||||
echo "Fetching open issues since $SINCE"
|
echo "Fetching open issues since $SINCE"
|
||||||
|
|
||||||
# Get open issues, filter out PRs and already-labeled ones
|
# Get open issues with pagination, filter out PRs and already-labeled ones
|
||||||
ISSUES=$(gh issue list --repo "$REPO" --state open --limit 200 --json number,title,labels,createdAt \
|
ISSUES=""
|
||||||
--jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number")
|
PAGE=1
|
||||||
|
while true; do
|
||||||
|
BATCH=$(gh issue list --repo "$REPO" --state open --limit 100 --page "$PAGE" --json number,labels,createdAt \
|
||||||
|
--jq "[.[] | select(.createdAt >= \"$SINCE\") | select([.labels[].name] | index(\"duplicate\") | not)] | .[].number")
|
||||||
|
|
||||||
|
[ -z "$BATCH" ] && break
|
||||||
|
ISSUES="$ISSUES $BATCH"
|
||||||
|
[ $(echo "$BATCH" | wc -w) -lt 100 ] && break
|
||||||
|
PAGE=$((PAGE + 1))
|
||||||
|
done
|
||||||
|
ISSUES=$(echo "$ISSUES" | xargs)
|
||||||
|
|
||||||
if [ -z "$ISSUES" ]; then
|
if [ -z "$ISSUES" ]; then
|
||||||
echo "No issues to process"
|
echo "No issues to process"
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,8 @@ function githubRequest(method, path, body = null, retried = false) {
|
||||||
let data = '';
|
let data = '';
|
||||||
res.on('data', chunk => (data += chunk));
|
res.on('data', chunk => (data += chunk));
|
||||||
res.on('end', async () => {
|
res.on('end', async () => {
|
||||||
if ((res.statusCode === 403 || res.statusCode === 429) && !retried) {
|
// 429: 始终重试(rate limit)
|
||||||
|
if (res.statusCode === 429 && !retried) {
|
||||||
const retryAfter = parseInt(res.headers['retry-after'] || '60', 10);
|
const retryAfter = parseInt(res.headers['retry-after'] || '60', 10);
|
||||||
console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`);
|
console.log(` Rate limited on ${method} ${path}, retrying after ${retryAfter}s...`);
|
||||||
await sleep(retryAfter * 1000);
|
await sleep(retryAfter * 1000);
|
||||||
|
|
@ -54,6 +55,19 @@ function githubRequest(method, path, body = null, retried = false) {
|
||||||
catch (err) { reject(err); }
|
catch (err) { reject(err); }
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// 403: 只在 rate limit 相关时重试
|
||||||
|
if (res.statusCode === 403 && !retried) {
|
||||||
|
const rateLimitRemaining = res.headers['x-ratelimit-remaining'];
|
||||||
|
const hasRetryAfter = res.headers['retry-after'];
|
||||||
|
if (rateLimitRemaining === '0' || hasRetryAfter) {
|
||||||
|
const retryAfter = parseInt(hasRetryAfter || '60', 10);
|
||||||
|
console.log(` Rate limited (403) on ${method} ${path}, retrying after ${retryAfter}s...`);
|
||||||
|
await sleep(retryAfter * 1000);
|
||||||
|
try { resolve(await githubRequest(method, path, body, true)); }
|
||||||
|
catch (err) { reject(err); }
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (res.statusCode >= 400) {
|
if (res.statusCode >= 400) {
|
||||||
reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`));
|
reject(new Error(`GitHub API ${method} ${path} -> ${res.statusCode}: ${data}`));
|
||||||
return;
|
return;
|
||||||
|
|
@ -115,6 +129,26 @@ function hasHumanCommentAfter(comments, afterDate) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches all comments for an issue, handling pagination.
|
||||||
|
* Requests per_page=100 and loops until we get fewer than 100 or an empty array.
|
||||||
|
*/
|
||||||
|
async function fetchAllComments(issueNumber) {
|
||||||
|
const allComments = [];
|
||||||
|
let page = 1;
|
||||||
|
while (true) {
|
||||||
|
const comments = await githubRequest(
|
||||||
|
'GET',
|
||||||
|
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${issueNumber}/comments?per_page=100&page=${page}`
|
||||||
|
);
|
||||||
|
if (!Array.isArray(comments) || comments.length === 0) break;
|
||||||
|
allComments.push(...comments);
|
||||||
|
if (comments.length < 100) break;
|
||||||
|
page++;
|
||||||
|
}
|
||||||
|
return allComments;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the duplicate comment has a thumbs-down reaction.
|
* Checks if the duplicate comment has a thumbs-down reaction.
|
||||||
*/
|
*/
|
||||||
|
|
@ -152,12 +186,9 @@ async function processIssue(issue) {
|
||||||
const num = issue.number;
|
const num = issue.number;
|
||||||
console.log(`\nChecking issue #${num}: ${issue.title}`);
|
console.log(`\nChecking issue #${num}: ${issue.title}`);
|
||||||
|
|
||||||
const comments = await githubRequest(
|
const comments = await fetchAllComments(num);
|
||||||
'GET',
|
|
||||||
`/repos/${REPO_OWNER}/${REPO_NAME}/issues/${num}/comments?per_page=100`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!Array.isArray(comments)) {
|
if (!Array.isArray(comments) || comments.length === 0) {
|
||||||
console.log(` -> Could not fetch comments, skipping.`);
|
console.log(` -> Could not fetch comments, skipping.`);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue