mirror of
https://github.com/willchen96/mike.git
synced 2026-06-30 21:59:37 +02:00
refactor: enhance error handling and streamline API key management in LLM modules
This commit is contained in:
parent
f39f175273
commit
a2368a7479
4 changed files with 135 additions and 114 deletions
|
|
@ -20,9 +20,19 @@ type NativeMessage = {
|
|||
|
||||
const MAX_TOKENS = 16384;
|
||||
|
||||
function apiKey(override?: string | null): string {
|
||||
const key = override?.trim() || process.env.ANTHROPIC_API_KEY?.trim() || "";
|
||||
if (!key) {
|
||||
throw new Error(
|
||||
"Anthropic API key is not configured. Set ANTHROPIC_API_KEY or add a user Anthropic key.",
|
||||
);
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
function client(override?: string | null): Anthropic {
|
||||
const apiKey = override?.trim() || process.env.ANTHROPIC_API_KEY || "";
|
||||
return new Anthropic({ apiKey });
|
||||
const apiKeyValue = apiKey(override);
|
||||
return new Anthropic({ apiKey: apiKeyValue });
|
||||
}
|
||||
|
||||
function toNativeMessages(
|
||||
|
|
|
|||
|
|
@ -28,9 +28,6 @@ type GeminiContent = {
|
|||
parts: GeminiPart[];
|
||||
};
|
||||
|
||||
const RETRYABLE_STATUSES = new Set([429, 500, 502, 503, 504]);
|
||||
const MAX_GEMINI_ATTEMPTS = 3;
|
||||
|
||||
function apiKey(override?: string | null): string {
|
||||
const key = override?.trim() || process.env.GEMINI_API_KEY?.trim() || "";
|
||||
if (!key) {
|
||||
|
|
@ -45,49 +42,6 @@ function client(override?: string | null): GoogleGenAI {
|
|||
return new GoogleGenAI({ apiKey: apiKey(override) });
|
||||
}
|
||||
|
||||
function geminiStatus(err: unknown): number | null {
|
||||
const status = (err as { status?: unknown })?.status;
|
||||
return typeof status === "number" ? status : null;
|
||||
}
|
||||
|
||||
function isRetryableGeminiError(err: unknown): boolean {
|
||||
const status = geminiStatus(err);
|
||||
if (status != null && RETRYABLE_STATUSES.has(status)) return true;
|
||||
|
||||
const message =
|
||||
err instanceof Error ? err.message : typeof err === "string" ? err : "";
|
||||
return /UNAVAILABLE|Service Unavailable|high demand|try again later/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
function retryDelayMs(attempt: number): number {
|
||||
return 400 * 2 ** attempt;
|
||||
}
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function withGeminiRetries<T>(operation: () => Promise<T>): Promise<T> {
|
||||
let lastError: unknown;
|
||||
for (let attempt = 0; attempt < MAX_GEMINI_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const isLastAttempt = attempt === MAX_GEMINI_ATTEMPTS - 1;
|
||||
if (isLastAttempt || !isRetryableGeminiError(err)) throw err;
|
||||
console.warn("[gemini] transient error; retrying", {
|
||||
attempt: attempt + 1,
|
||||
status: geminiStatus(err),
|
||||
});
|
||||
await sleep(retryDelayMs(attempt));
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
function toNativeContents(messages: StreamChatParams["messages"]): GeminiContent[] {
|
||||
return messages.map((m) => ({
|
||||
role: m.role === "assistant" ? "model" : "user",
|
||||
|
|
@ -107,25 +61,23 @@ export async function streamGemini(
|
|||
let fullText = "";
|
||||
|
||||
for (let iter = 0; iter < maxIter; iter++) {
|
||||
const stream = await withGeminiRetries(() =>
|
||||
ai.models.generateContentStream({
|
||||
model,
|
||||
contents: contents as never,
|
||||
config: {
|
||||
systemInstruction: systemPrompt,
|
||||
tools: functionDeclarations.length
|
||||
? [{ functionDeclarations } as never]
|
||||
: undefined,
|
||||
// When enabled, ask Gemini to surface thought summaries.
|
||||
// When disabled, explicitly zero the thinking budget so the
|
||||
// model skips thinking entirely (saves tokens and latency
|
||||
// for bulk extraction jobs).
|
||||
thinkingConfig: enableThinking
|
||||
? { includeThoughts: true }
|
||||
: { thinkingBudget: 0 },
|
||||
},
|
||||
}),
|
||||
);
|
||||
const stream = await ai.models.generateContentStream({
|
||||
model,
|
||||
contents: contents as never,
|
||||
config: {
|
||||
systemInstruction: systemPrompt,
|
||||
tools: functionDeclarations.length
|
||||
? [{ functionDeclarations } as never]
|
||||
: undefined,
|
||||
// When enabled, ask Gemini to surface thought summaries.
|
||||
// When disabled, explicitly zero the thinking budget so the
|
||||
// model skips thinking entirely (saves tokens and latency
|
||||
// for bulk extraction jobs).
|
||||
thinkingConfig: enableThinking
|
||||
? { includeThoughts: true }
|
||||
: { thinkingBudget: 0 },
|
||||
},
|
||||
});
|
||||
|
||||
// Per-iteration accumulators.
|
||||
const textParts: string[] = [];
|
||||
|
|
@ -207,14 +159,12 @@ export async function completeGeminiText(params: {
|
|||
apiKeys?: { gemini?: string | null };
|
||||
}): Promise<string> {
|
||||
const ai = client(params.apiKeys?.gemini);
|
||||
const resp = await withGeminiRetries(() =>
|
||||
ai.models.generateContent({
|
||||
model: params.model,
|
||||
contents: [{ role: "user", parts: [{ text: params.user }] }],
|
||||
config: params.systemPrompt
|
||||
? { systemInstruction: params.systemPrompt }
|
||||
: undefined,
|
||||
}),
|
||||
);
|
||||
const resp = await ai.models.generateContent({
|
||||
model: params.model,
|
||||
contents: [{ role: "user", parts: [{ text: params.user }] }],
|
||||
config: params.systemPrompt
|
||||
? { systemInstruction: params.systemPrompt }
|
||||
: undefined,
|
||||
});
|
||||
return resp.text ?? "";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,13 @@ type ResponseStreamEvent = {
|
|||
};
|
||||
|
||||
function apiKey(override?: string | null): string {
|
||||
return override?.trim() || process.env.OPENAI_API_KEY?.trim() || "";
|
||||
const key = override?.trim() || process.env.OPENAI_API_KEY?.trim() || "";
|
||||
if (!key) {
|
||||
throw new Error(
|
||||
"OpenAI API key is not configured. Set OPENAI_API_KEY or add a user OpenAI key.",
|
||||
);
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
function toResponseTools(tools: OpenAIToolSchema[]): ResponseFunctionTool[] {
|
||||
|
|
@ -131,9 +137,11 @@ async function createResponse(params: {
|
|||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => "");
|
||||
throw new Error(
|
||||
const err = new Error(
|
||||
`OpenAI request failed (${response.status}): ${text || response.statusText}`,
|
||||
);
|
||||
(err as { status?: number }).status = response.status;
|
||||
throw err;
|
||||
}
|
||||
|
||||
return response;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue