mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-12 17:22:39 +02:00
Fix AI extraction JSON-LD parsing and add debug logging
- Extract JSON-LD scripts BEFORE removing script tags - Add logging for prepared HTML, AI responses, and parsed data - Include more detailed error messages in test endpoint Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
d98138fe7c
commit
906212e6ae
2 changed files with 28 additions and 13 deletions
|
|
@ -199,16 +199,21 @@ router.post('/ai/test', async (req: AuthRequest, res: Response) => {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`[AI Test] Testing URL: ${url} with provider: ${settings.ai_provider}`);
|
||||||
|
|
||||||
const { extractWithAI } = await import('../services/ai-extractor');
|
const { extractWithAI } = await import('../services/ai-extractor');
|
||||||
const result = await extractWithAI(url, settings);
|
const result = await extractWithAI(url, settings);
|
||||||
|
|
||||||
|
console.log(`[AI Test] Result:`, JSON.stringify(result, null, 2));
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: !!result.price,
|
success: !!result.price,
|
||||||
...result,
|
...result,
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error testing AI extraction:', error);
|
console.error('Error testing AI extraction:', error);
|
||||||
res.status(500).json({ error: 'Failed to test AI extraction' });
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
res.status(500).json({ error: `Failed to test AI extraction: ${errorMessage}` });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,21 @@ HTML Content:
|
||||||
function prepareHtmlForAI(html: string): string {
|
function prepareHtmlForAI(html: string): string {
|
||||||
const $ = load(html);
|
const $ = load(html);
|
||||||
|
|
||||||
// Remove script, style, and other non-content elements
|
// Extract JSON-LD data BEFORE removing scripts (it often contains product info)
|
||||||
|
const jsonLdScripts: string[] = [];
|
||||||
|
$('script[type="application/ld+json"]').each((_, el) => {
|
||||||
|
const scriptContent = $(el).html();
|
||||||
|
if (scriptContent) {
|
||||||
|
// Include any JSON-LD that might be product-related
|
||||||
|
if (scriptContent.includes('price') ||
|
||||||
|
scriptContent.includes('Product') ||
|
||||||
|
scriptContent.includes('Offer')) {
|
||||||
|
jsonLdScripts.push(scriptContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now remove script, style, and other non-content elements
|
||||||
$('script, style, noscript, iframe, svg, path, meta, link, comment').remove();
|
$('script, style, noscript, iframe, svg, path, meta, link, comment').remove();
|
||||||
|
|
||||||
// Get the body content
|
// Get the body content
|
||||||
|
|
@ -60,19 +74,11 @@ function prepareHtmlForAI(html: string): string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also extract JSON-LD data which often contains product info
|
// Combine JSON-LD data with HTML content
|
||||||
const jsonLdScripts: string[] = [];
|
|
||||||
$('script[type="application/ld+json"]').each((_, el) => {
|
|
||||||
const scriptContent = $(el).html();
|
|
||||||
if (scriptContent && scriptContent.includes('price')) {
|
|
||||||
jsonLdScripts.push(scriptContent);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Combine content with JSON-LD data
|
|
||||||
let finalContent = content;
|
let finalContent = content;
|
||||||
if (jsonLdScripts.length > 0) {
|
if (jsonLdScripts.length > 0) {
|
||||||
finalContent = `JSON-LD Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`;
|
finalContent = `JSON-LD Structured Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`;
|
||||||
|
console.log(`[AI] Found ${jsonLdScripts.length} JSON-LD scripts with product data`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truncate to ~15000 characters to stay within token limits
|
// Truncate to ~15000 characters to stay within token limits
|
||||||
|
|
@ -80,6 +86,7 @@ function prepareHtmlForAI(html: string): string {
|
||||||
finalContent = finalContent.substring(0, 15000) + '\n... [truncated]';
|
finalContent = finalContent.substring(0, 15000) + '\n... [truncated]';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`[AI] Prepared HTML content: ${finalContent.length} characters`);
|
||||||
return finalContent;
|
return finalContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -138,6 +145,8 @@ async function extractWithOpenAI(
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseAIResponse(responseText: string): AIExtractionResult {
|
function parseAIResponse(responseText: string): AIExtractionResult {
|
||||||
|
console.log(`[AI] Raw response: ${responseText.substring(0, 500)}...`);
|
||||||
|
|
||||||
// Try to extract JSON from the response
|
// Try to extract JSON from the response
|
||||||
let jsonStr = responseText.trim();
|
let jsonStr = responseText.trim();
|
||||||
|
|
||||||
|
|
@ -155,6 +164,7 @@ function parseAIResponse(responseText: string): AIExtractionResult {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const data = JSON.parse(jsonStr);
|
const data = JSON.parse(jsonStr);
|
||||||
|
console.log(`[AI] Parsed data:`, JSON.stringify(data, null, 2));
|
||||||
|
|
||||||
let price: ParsedPrice | null = null;
|
let price: ParsedPrice | null = null;
|
||||||
if (data.price !== null && data.price !== undefined) {
|
if (data.price !== null && data.price !== undefined) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue