mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
302 lines
9.9 KiB
Python
302 lines
9.9 KiB
Python
"""
|
|
Generate Search Queries AI Agent
|
|
|
|
An AI-powered agent that generates customer discovery search queries using Google's Gemini AI.
|
|
This agent analyzes product descriptions and generates natural, conversational search queries
|
|
that help identify potential customers who are actively seeking solutions.
|
|
|
|
Based on the TypeScript implementation in GENERATE_SEARCH_QUERIES_PLUGIN.md
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import List
|
|
|
|
import google.generativeai as genai
|
|
from fastapi import FastAPI, HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
app = FastAPI(title="Generate Search Queries Agent")
|
|
|
|
|
|
class GenerateQueriesRequest(BaseModel):
|
|
"""Request body for query generation."""
|
|
|
|
productDescription: str
|
|
|
|
|
|
class GenerateQueriesResponse(BaseModel):
|
|
"""Response body from query generation."""
|
|
|
|
success: bool
|
|
queries: List[str] | None = None
|
|
error: str | None = None
|
|
message: str | None = None
|
|
|
|
|
|
# Initialize Gemini API
|
|
def get_gemini_model():
|
|
"""Initialize and return Gemini model."""
|
|
api_key = os.getenv("GOOGLE_AI_API_KEY") or os.getenv("VITE_GOOGLE_AI_API_KEY")
|
|
if not api_key:
|
|
raise ValueError("GOOGLE_AI_API_KEY or VITE_GOOGLE_AI_API_KEY environment variable is not set")
|
|
|
|
genai.configure(api_key=api_key)
|
|
return genai.GenerativeModel(model="gemini-2.5-flash")
|
|
|
|
|
|
def escape_control_characters_in_strings(json_string: str) -> str:
|
|
"""
|
|
Escape control characters ONLY within string values (not in JSON structure).
|
|
This regex finds quoted strings and escapes control characters inside them.
|
|
"""
|
|
def escape_match(match):
|
|
content = match.group(1)
|
|
escaped = ""
|
|
i = 0
|
|
while i < len(content):
|
|
char = content[i]
|
|
code = ord(char)
|
|
|
|
# Skip if already escaped
|
|
if i > 0 and content[i - 1] == "\\":
|
|
escaped += char
|
|
i += 1
|
|
continue
|
|
|
|
# Escape control characters
|
|
if code < 32:
|
|
if code == 10: # \n
|
|
escaped += "\\n"
|
|
elif code == 13: # \r
|
|
escaped += "\\r"
|
|
elif code == 9: # \t
|
|
escaped += "\\t"
|
|
elif code == 12: # \f
|
|
escaped += "\\f"
|
|
elif code == 8: # \b
|
|
escaped += "\\b"
|
|
else:
|
|
escaped += f"\\u{code:04x}"
|
|
else:
|
|
escaped += char
|
|
i += 1
|
|
|
|
return f'"{escaped}"'
|
|
|
|
return re.sub(r'"((?:[^"\\]|\\.)*)"', escape_match, json_string)
|
|
|
|
|
|
def extract_json_from_response(response_text: str) -> str:
|
|
"""
|
|
Extract JSON array from response, handling markdown code blocks.
|
|
"""
|
|
json_string = response_text.strip()
|
|
|
|
# Try to extract from markdown code blocks first
|
|
json_match = re.search(r"```(?:json)?\s*(\[[\s\S]*?\])\s*```", response_text)
|
|
if not json_match:
|
|
# Fallback: try to find JSON array directly
|
|
json_match = re.search(r"\[[\s\S]*?\]", response_text)
|
|
|
|
if json_match:
|
|
json_string = json_match.group(1) if json_match.lastindex else json_match.group(0)
|
|
|
|
# Clean up the JSON string
|
|
json_string = json_string.strip()
|
|
json_string = re.sub(r"^[\s\n]*", "", json_string)
|
|
json_string = re.sub(r"[\s\n]*$", "", json_string)
|
|
|
|
return json_string
|
|
|
|
|
|
def parse_queries_from_response(response_text: str) -> List[str]:
|
|
"""
|
|
Parse queries from Gemini response with multiple fallback strategies.
|
|
"""
|
|
try:
|
|
# Extract JSON from response
|
|
json_string = extract_json_from_response(response_text)
|
|
|
|
# Fix control characters in string values
|
|
json_string = escape_control_characters_in_strings(json_string)
|
|
|
|
# Try to parse JSON
|
|
try:
|
|
parsed = json.loads(json_string)
|
|
except json.JSONDecodeError as parse_error:
|
|
print(f"JSON parse error. Raw response: {response_text}")
|
|
print(f"Extracted JSON string: {json_string}")
|
|
print(f"Parse error details: {parse_error}")
|
|
|
|
# Fallback: try to extract queries manually using regex
|
|
query_matches = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', json_string)
|
|
queries = []
|
|
for match in query_matches:
|
|
if match:
|
|
# Unescape the string
|
|
unescaped = (
|
|
match.replace("\\n", "\n")
|
|
.replace("\\r", "\r")
|
|
.replace("\\t", "\t")
|
|
.replace('\\"', '"')
|
|
.replace("\\\\", "\\")
|
|
)
|
|
if unescaped.strip():
|
|
queries.append(unescaped.strip())
|
|
|
|
if queries:
|
|
print(f"Using manually extracted queries: {queries}")
|
|
return queries
|
|
else:
|
|
raise parse_error
|
|
|
|
# Validate it's an array of strings
|
|
if not isinstance(parsed, list):
|
|
raise ValueError("Response is not an array")
|
|
|
|
# Filter out invalid entries and ensure all are strings
|
|
valid_queries = [
|
|
q.strip()
|
|
for q in parsed
|
|
if isinstance(q, str) and q.strip()
|
|
][:5] # Limit to max 5 queries
|
|
|
|
return valid_queries
|
|
|
|
except Exception as e:
|
|
print(f"Error parsing queries: {e}")
|
|
raise
|
|
|
|
|
|
def generate_fallback_queries(product_description: str) -> List[str]:
|
|
"""Generate fallback queries if AI generation fails."""
|
|
desc_snippet = product_description[:50]
|
|
return [
|
|
f"looking for {desc_snippet}",
|
|
f"need help with {desc_snippet}",
|
|
f"struggling with {desc_snippet}",
|
|
]
|
|
|
|
|
|
def create_prompt(product_description: str) -> str:
|
|
"""Create the prompt for Gemini to generate search queries."""
|
|
return f"""Analyze the following product/service description and generate 3-5 search queries that would help find potential customers who are actively seeking this solution or experiencing related pain points.
|
|
|
|
**Product/Service Description:**
|
|
{product_description}
|
|
|
|
**Instructions:**
|
|
1. Identify the core problem this product/service solves
|
|
2. Think about how potential customers might express their pain points, frustrations, or needs
|
|
3. Generate search queries that capture:
|
|
- People asking questions about the problem domain
|
|
- People expressing frustration with existing solutions
|
|
- People seeking recommendations or alternatives
|
|
- People discussing challenges related to this domain
|
|
- People showing buying intent or solution-seeking behavior
|
|
|
|
4. Each query should be:
|
|
- Natural and conversational (as someone might type on Reddit/X)
|
|
- Focused on pain points or solution-seeking
|
|
- Specific to the product's domain/industry
|
|
- Not too generic or too narrow
|
|
|
|
5. Avoid:
|
|
- Brand names or specific product names
|
|
- Overly technical jargon
|
|
- Queries that are too broad (e.g., just "help" or "problem")
|
|
|
|
**Example:**
|
|
If product is "AI-powered lead generation tool for SaaS founders":
|
|
- Good queries: "finding first customers", "struggling to find leads", "looking for lead generation tools", "how to find customers on reddit"
|
|
- Bad queries: "lead generation" (too generic), "ralix.ai" (brand name), "SaaS" (too broad)
|
|
|
|
Return ONLY a JSON array of query strings, like this:
|
|
["query 1", "query 2", "query 3", "query 4", "query 5"]
|
|
|
|
Do not include any explanation or additional text, only the JSON array."""
|
|
|
|
|
|
@app.post("/GenerateSearchQueries", response_model=GenerateQueriesResponse)
|
|
async def generate_search_queries(request: GenerateQueriesRequest) -> GenerateQueriesResponse:
|
|
"""
|
|
Generate search queries from a product description using Google Gemini AI.
|
|
|
|
This endpoint:
|
|
1. Validates the input
|
|
2. Calls Gemini AI to generate queries
|
|
3. Parses the response with multiple fallback strategies
|
|
4. Returns formatted queries or fallback queries if parsing fails
|
|
"""
|
|
# Validate required parameters
|
|
if not request.productDescription:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail={
|
|
"error": "Missing required parameters",
|
|
"message": "productDescription is required",
|
|
},
|
|
)
|
|
|
|
try:
|
|
# Get Gemini model
|
|
try:
|
|
model = get_gemini_model()
|
|
except ValueError as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail={
|
|
"error": "API key not configured",
|
|
"message": str(e),
|
|
},
|
|
)
|
|
|
|
# Generate search queries using Gemini
|
|
prompt = create_prompt(request.productDescription)
|
|
response = model.generate_content(prompt)
|
|
response_text = response.text.strip()
|
|
|
|
print(f"Gemini API Response for query generation: {response_text}")
|
|
|
|
# Parse queries from response
|
|
try:
|
|
queries = parse_queries_from_response(response_text)
|
|
except Exception as parse_error:
|
|
print(f"Failed to parse queries: {parse_error}")
|
|
# Use fallback queries
|
|
queries = generate_fallback_queries(request.productDescription)
|
|
print(f"Using fallback queries: {queries}")
|
|
|
|
if not queries:
|
|
# Final fallback if parsing returned empty list
|
|
queries = generate_fallback_queries(request.productDescription)
|
|
print(f"No valid queries generated, using fallback queries: {queries}")
|
|
|
|
return GenerateQueriesResponse(success=True, queries=queries)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
print(f"Error generating search queries: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail={
|
|
"error": "Failed to generate search queries",
|
|
"message": str(e),
|
|
},
|
|
)
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
return {"status": "healthy"}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(app, host="0.0.0.0", port=8080)
|
|
|