diff --git a/backend/src/routes/settings.ts b/backend/src/routes/settings.ts index a339edd..b61c17a 100644 --- a/backend/src/routes/settings.ts +++ b/backend/src/routes/settings.ts @@ -199,16 +199,21 @@ router.post('/ai/test', async (req: AuthRequest, res: Response) => { return; } + console.log(`[AI Test] Testing URL: ${url} with provider: ${settings.ai_provider}`); + const { extractWithAI } = await import('../services/ai-extractor'); const result = await extractWithAI(url, settings); + console.log(`[AI Test] Result:`, JSON.stringify(result, null, 2)); + res.json({ success: !!result.price, ...result, }); } catch (error) { console.error('Error testing AI extraction:', error); - res.status(500).json({ error: 'Failed to test AI extraction' }); + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + res.status(500).json({ error: `Failed to test AI extraction: ${errorMessage}` }); } }); diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts index 27dba90..99cf814 100644 --- a/backend/src/services/ai-extractor.ts +++ b/backend/src/services/ai-extractor.ts @@ -36,7 +36,21 @@ HTML Content: function prepareHtmlForAI(html: string): string { const $ = load(html); - // Remove script, style, and other non-content elements + // Extract JSON-LD data BEFORE removing scripts (it often contains product info) + const jsonLdScripts: string[] = []; + $('script[type="application/ld+json"]').each((_, el) => { + const scriptContent = $(el).html(); + if (scriptContent) { + // Include any JSON-LD that might be product-related + if (scriptContent.includes('price') || + scriptContent.includes('Product') || + scriptContent.includes('Offer')) { + jsonLdScripts.push(scriptContent); + } + } + }); + + // Now remove script, style, and other non-content elements $('script, style, noscript, iframe, svg, path, meta, link, comment').remove(); // Get the body content @@ -60,19 +74,11 @@ function prepareHtmlForAI(html: string): string { } } - // Also extract JSON-LD data which often contains product info - const jsonLdScripts: string[] = []; - $('script[type="application/ld+json"]').each((_, el) => { - const scriptContent = $(el).html(); - if (scriptContent && scriptContent.includes('price')) { - jsonLdScripts.push(scriptContent); - } - }); - - // Combine content with JSON-LD data + // Combine JSON-LD data with HTML content let finalContent = content; if (jsonLdScripts.length > 0) { - finalContent = `JSON-LD Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`; + finalContent = `JSON-LD Structured Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`; + console.log(`[AI] Found ${jsonLdScripts.length} JSON-LD scripts with product data`); } // Truncate to ~15000 characters to stay within token limits @@ -80,6 +86,7 @@ function prepareHtmlForAI(html: string): string { finalContent = finalContent.substring(0, 15000) + '\n... [truncated]'; } + console.log(`[AI] Prepared HTML content: ${finalContent.length} characters`); return finalContent; } @@ -138,6 +145,8 @@ async function extractWithOpenAI( } function parseAIResponse(responseText: string): AIExtractionResult { + console.log(`[AI] Raw response: ${responseText.substring(0, 500)}...`); + // Try to extract JSON from the response let jsonStr = responseText.trim(); @@ -155,6 +164,7 @@ function parseAIResponse(responseText: string): AIExtractionResult { try { const data = JSON.parse(jsonStr); + console.log(`[AI] Parsed data:`, JSON.stringify(data, null, 2)); let price: ParsedPrice | null = null; if (data.price !== null && data.price !== undefined) {