refactor: updated chonkie & removed junk code

- Removed all references to the Serper API from the codebase, including related components, hooks, and schemas.
- Updated the `pyproject.toml` to reflect the new version of `chonkie` and other dependencies.
- Cleaned up the configuration and connector management to streamline the application.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-19 23:26:45 -08:00
parent 70ca585379
commit 086048a4db
18 changed files with 344 additions and 701 deletions

View file

@ -1,69 +1,12 @@
import os import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import Any
import yaml import yaml
from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker
from chonkie.embeddings.azure_openai import AzureOpenAIEmbeddings
from chonkie.embeddings.registry import EmbeddingsRegistry
from dotenv import load_dotenv from dotenv import load_dotenv
from rerankers import Reranker from rerankers import Reranker
# Monkey patch AzureOpenAIEmbeddings to fix parameter order issue
# This is a temporary workaround until the upstream chonkie library is fixed
class FixedAzureOpenAIEmbeddings(AzureOpenAIEmbeddings):
"""Wrapper around AzureOpenAIEmbeddings with fixed parameter order."""
def __init__(
self,
model: str = "text-embedding-3-small",
azure_endpoint: str | None = None,
tokenizer: Any | None = None,
dimension: int | None = None,
azure_api_key: str | None = None,
api_version: str = "2024-10-21",
deployment: str | None = None,
max_retries: int = 3,
timeout: float = 60.0,
batch_size: int = 128,
**kwargs: dict[str, Any],
):
"""Initialize with model as first parameter to avoid conflicts."""
# Call parent's __init__ by explicitly passing azure_endpoint as first arg
# to maintain compatibility with the original signature
super().__init__(
azure_endpoint=azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT", ""),
model=model,
tokenizer=tokenizer,
dimension=dimension,
azure_api_key=azure_api_key,
api_version=api_version,
deployment=deployment,
max_retries=max_retries,
timeout=timeout,
batch_size=batch_size,
**kwargs,
)
# TODO: Fix this in chonkie upstream
# Register our fixed Azure OpenAI embeddings with pattern
# This automatically infers the following arguments from their corresponding environment variables if they are not provided:
# - `api_key` from `AZURE_OPENAI_API_KEY`
# - `organization` from `OPENAI_ORG_ID`
# - `project` from `OPENAI_PROJECT_ID`
# - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
# - `api_version` from `OPENAI_API_VERSION`
# - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
EmbeddingsRegistry.register_provider("azure_openai", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_pattern(r"^text-embedding-", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-ada-002", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-3-small", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-3-large", FixedAzureOpenAIEmbeddings)
# Get the base directory of the project # Get the base directory of the project
BASE_DIR = Path(__file__).resolve().parent.parent.parent BASE_DIR = Path(__file__).resolve().parent.parent.parent

View file

@ -1,18 +0,0 @@
from datetime import UTC, datetime
from pydantic import BaseModel
class GoogleAuthCredentialsBase(BaseModel):
token: str
refresh_token: str
token_uri: str
client_id: str
expiry: datetime
scopes: list[str]
client_secret: str
@property
def expired(self) -> bool:
"""Check if the credentials have expired."""
return self.expiry <= datetime.now(UTC)

View file

@ -8,13 +8,6 @@ from app.db import Chat, Podcast
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
async def generate_document_podcast(
session: AsyncSession, document_id: int, search_space_id: int, user_id: int
):
# TODO: Need to fetch the document chunks, then concatenate them and pass them to the podcast generation model
pass
async def generate_chat_podcast( async def generate_chat_podcast(
session: AsyncSession, session: AsyncSession,
chat_id: int, chat_id: int,

View file

@ -41,7 +41,6 @@ dependencies = [
"celery[redis]>=5.5.3", "celery[redis]>=5.5.3",
"flower>=2.0.1", "flower>=2.0.1",
"redis>=5.2.1", "redis>=5.2.1",
"chonkie[all]>=1.4.0",
"firecrawl-py>=4.9.0", "firecrawl-py>=4.9.0",
"boto3>=1.35.0", "boto3>=1.35.0",
"langchain-community>=0.3.31", "langchain-community>=0.3.31",
@ -54,6 +53,7 @@ dependencies = [
"deepagents>=0.3.0", "deepagents>=0.3.0",
"trafilatura>=2.0.0", "trafilatura>=2.0.0",
"fastapi-users[oauth,sqlalchemy]>=15.0.3", "fastapi-users[oauth,sqlalchemy]>=15.0.3",
"chonkie[all]>=1.5.0",
] ]
[dependency-groups] [dependency-groups]

File diff suppressed because it is too large Load diff

View file

@ -1,34 +0,0 @@
# name: "Submit to Web Store"
# on:
# workflow_dispatch:
# jobs:
# build:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - name: Cache pnpm modules
# uses: actions/cache@v3
# with:
# path: ~/.pnpm-store
# key: ${{ runner.os }}-${{ hashFiles('**/pnpm-lock.yaml') }}
# restore-keys: |
# ${{ runner.os }}-
# - uses: pnpm/action-setup@v2.2.4
# with:
# version: latest
# run_install: true
# - name: Use Node.js 16.x
# uses: actions/setup-node@v3.4.1
# with:
# node-version: 16.x
# cache: "pnpm"
# - name: Build the extension
# run: pnpm build
# - name: Package the extension into a zip artifact
# run: pnpm package
# - name: Browser Platform Publish
# uses: PlasmoHQ/bpp@v3
# with:
# keys: ${{ secrets.SUBMIT_KEYS }}
# artifact: build/chrome-mv3-prod.zip

View file

@ -153,15 +153,6 @@ export default function EditConnectorPage() {
placeholder="Begins with secret_..." placeholder="Begins with secret_..."
/> />
)} )}
{/* == Serper == */}
{connector.connector_type === "SERPER_API" && (
<EditSimpleTokenForm
control={editForm.control}
fieldName="SERPER_API_KEY"
fieldLabel="Serper API Key"
fieldDescription="Update the Serper API Key if needed."
/>
)}
{/* == Tavily == */} {/* == Tavily == */}
{connector.connector_type === "TAVILY_API" && ( {connector.connector_type === "TAVILY_API" && (
<EditSimpleTokenForm <EditSimpleTokenForm

View file

@ -39,7 +39,6 @@ const apiConnectorFormSchema = z.object({
// Helper function to get connector type display name // Helper function to get connector type display name
const getConnectorTypeDisplay = (type: string): string => { const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = { const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API", TAVILY_API: "Tavily API",
SLACK_CONNECTOR: "Slack Connector", SLACK_CONNECTOR: "Slack Connector",
NOTION_CONNECTOR: "Notion Connector", NOTION_CONNECTOR: "Notion Connector",
@ -67,7 +66,6 @@ type ApiConnectorFormValues = z.infer<typeof apiConnectorFormSchema>;
// Get API key field name based on connector type // Get API key field name based on connector type
const getApiKeyFieldName = (connectorType: string): string => { const getApiKeyFieldName = (connectorType: string): string => {
const fieldMap: Record<string, string> = { const fieldMap: Record<string, string> = {
SERPER_API: "SERPER_API_KEY",
TAVILY_API: "TAVILY_API_KEY", TAVILY_API: "TAVILY_API_KEY",
SLACK_CONNECTOR: "SLACK_BOT_TOKEN", SLACK_CONNECTOR: "SLACK_BOT_TOKEN",
NOTION_CONNECTOR: "NOTION_INTEGRATION_TOKEN", NOTION_CONNECTOR: "NOTION_INTEGRATION_TOKEN",

View file

@ -1,216 +0,0 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { ArrowLeft, Check, Info, Loader2 } from "lucide-react";
import { motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import * as z from "zod";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardFooter,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import {
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { EnumConnectorName } from "@/contracts/enums/connector";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
// Define the form schema with Zod
const serperApiFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
api_key: z.string().min(10, {
message: "API key is required and must be valid.",
}),
});
// Define the type for the form values
type SerperApiFormValues = z.infer<typeof serperApiFormSchema>;
export default function SerperApiPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors();
// Initialize the form
const form = useForm<SerperApiFormValues>({
resolver: zodResolver(serperApiFormSchema),
defaultValues: {
name: "Serper API Connector",
api_key: "",
},
});
// Handle form submission
const onSubmit = async (values: SerperApiFormValues) => {
setIsSubmitting(true);
try {
await createConnector(
{
name: values.name,
connector_type: EnumConnectorName.SERPER_API,
config: {
SERPER_API_KEY: values.api_key,
},
is_indexable: false,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,
next_scheduled_at: null,
},
parseInt(searchSpaceId)
);
toast.success("Serper API connector created successfully!");
// Navigate back to connectors page
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) {
console.error("Error creating connector:", error);
toast.error(error instanceof Error ? error.message : "Failed to create connector");
} finally {
setIsSubmitting(false);
}
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Connectors
</Button>
{/* Header */}
<div className="mb-8">
<div className="flex items-center gap-4">
<div className="flex h-12 w-12 items-center justify-center rounded-lg">
{getConnectorIcon(EnumConnectorName.SERPER_API, "h-6 w-6")}
</div>
<div>
<h1 className="text-3xl font-bold tracking-tight">Connect Serper API</h1>
<p className="text-muted-foreground">
Connect Serper API for Google search capabilities.
</p>
</div>
</div>
</div>
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
>
<Card className="border-2 border-border">
<CardHeader>
<CardTitle className="text-2xl font-bold">Connect Serper API</CardTitle>
<CardDescription>
Integrate with Serper API to enhance your search capabilities with Google search
results.
</CardDescription>
</CardHeader>
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>API Key Required</AlertTitle>
<AlertDescription>
You'll need a Serper API key to use this connector. You can get one by signing up at{" "}
<a
href="https://serper.dev"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4"
>
serper.dev
</a>
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My Serper API Connector" {...field} />
</FormControl>
<FormDescription>A friendly name to identify this connector.</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="api_key"
render={({ field }) => (
<FormItem>
<FormLabel>Serper API Key</FormLabel>
<FormControl>
<Input type="password" placeholder="Enter your Serper API key" {...field} />
</FormControl>
<FormDescription>
Your API key will be encrypted and stored securely.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<div className="flex justify-end">
<Button type="submit" disabled={isSubmitting} className="w-full sm:w-auto">
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect Serper API
</>
)}
</Button>
</div>
</form>
</Form>
</CardContent>
<CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
<h4 className="text-sm font-medium">What you get with Serper API:</h4>
<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
<li>Access to Google search results directly in your research</li>
<li>Real-time information from the web</li>
<li>Enhanced search capabilities for your projects</li>
</ul>
</CardFooter>
</Card>
</motion.div>
</div>
);
}

View file

@ -197,7 +197,6 @@ export function DashboardBreadcrumb() {
"slack-connector": "Slack", "slack-connector": "Slack",
"notion-connector": "Notion", "notion-connector": "Notion",
"tavily-api": "Tavily API", "tavily-api": "Tavily API",
"serper-api": "Serper API",
"linkup-api": "LinkUp API", "linkup-api": "LinkUp API",
"luma-connector": "Luma", "luma-connector": "Luma",
"elasticsearch-connector": "Elasticsearch", "elasticsearch-connector": "Elasticsearch",

View file

@ -28,7 +28,6 @@ export const editConnectorSchema = z.object({
name: z.string().min(3, { message: "Connector name must be at least 3 characters." }), name: z.string().min(3, { message: "Connector name must be at least 3 characters." }),
SLACK_BOT_TOKEN: z.string().optional(), SLACK_BOT_TOKEN: z.string().optional(),
NOTION_INTEGRATION_TOKEN: z.string().optional(), NOTION_INTEGRATION_TOKEN: z.string().optional(),
SERPER_API_KEY: z.string().optional(),
TAVILY_API_KEY: z.string().optional(), TAVILY_API_KEY: z.string().optional(),
SEARXNG_HOST: z.string().optional(), SEARXNG_HOST: z.string().optional(),
SEARXNG_API_KEY: z.string().optional(), SEARXNG_API_KEY: z.string().optional(),

View file

@ -1,70 +0,0 @@
"use client";
import { Sparkles } from "lucide-react";
import { cn } from "@/lib/utils";
interface DisplayCardProps {
className?: string;
icon?: React.ReactNode;
title?: string;
description?: string;
date?: string;
iconClassName?: string;
titleClassName?: string;
}
function DisplayCard({
className,
icon = <Sparkles className="size-4 text-blue-300" />,
title = "Featured",
description = "Discover amazing content",
date = "Just now",
iconClassName = "text-blue-500",
titleClassName = "text-blue-500",
}: DisplayCardProps) {
return (
<div
className={cn(
"relative flex h-36 w-[22rem] -skew-y-[8deg] select-none flex-col justify-between rounded-xl border-2 bg-muted/70 backdrop-blur-sm px-4 py-3 transition-all duration-700 after:absolute after:-right-1 after:top-[-5%] after:h-[110%] after:w-[20rem] after:bg-gradient-to-l after:from-background after:to-transparent after:content-[''] hover:border-white/20 hover:bg-muted [&>*]:flex [&>*]:items-center [&>*]:gap-2",
className
)}
>
<div>
<span className="relative inline-block rounded-full bg-blue-800 p-1">{icon}</span>
<p className={cn("text-lg font-medium", titleClassName)}>{title}</p>
</div>
<p className="whitespace-nowrap text-lg">{description}</p>
<p className="text-muted-foreground">{date}</p>
</div>
);
}
interface DisplayCardsProps {
cards?: DisplayCardProps[];
}
export default function DisplayCards({ cards }: DisplayCardsProps) {
const defaultCards = [
{
className:
"[grid-area:stack] hover:-translate-y-10 before:absolute before:w-[100%] before:outline-1 before:rounded-xl before:outline-border before:h-[100%] before:content-[''] before:bg-blend-overlay before:bg-background/50 grayscale-[100%] hover:before:opacity-0 before:transition-opacity before:duration:700 hover:grayscale-0 before:left-0 before:top-0",
},
{
className:
"[grid-area:stack] translate-x-16 translate-y-10 hover:-translate-y-1 before:absolute before:w-[100%] before:outline-1 before:rounded-xl before:outline-border before:h-[100%] before:content-[''] before:bg-blend-overlay before:bg-background/50 grayscale-[100%] hover:before:opacity-0 before:transition-opacity before:duration:700 hover:grayscale-0 before:left-0 before:top-0",
},
{
className: "[grid-area:stack] translate-x-32 translate-y-20 hover:translate-y-10",
},
];
const displayCards = cards || defaultCards;
return (
<div className="grid [grid-template-areas:'stack'] place-items-center opacity-100 animate-in fade-in-0 duration-700">
{displayCards.map((cardProps, index) => (
<DisplayCard key={index} {...cardProps} />
))}
</div>
);
}

View file

@ -1,5 +1,4 @@
export enum EnumConnectorName { export enum EnumConnectorName {
SERPER_API = "SERPER_API",
TAVILY_API = "TAVILY_API", TAVILY_API = "TAVILY_API",
SEARXNG_API = "SEARXNG_API", SEARXNG_API = "SEARXNG_API",
LINKUP_API = "LINKUP_API", LINKUP_API = "LINKUP_API",

View file

@ -21,7 +21,6 @@ import {
File, File,
FileText, FileText,
Globe, Globe,
Link,
Microscope, Microscope,
Search, Search,
Sparkles, Sparkles,
@ -40,8 +39,6 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
return <IconLayoutKanban {...iconProps} />; return <IconLayoutKanban {...iconProps} />;
case EnumConnectorName.GITHUB_CONNECTOR: case EnumConnectorName.GITHUB_CONNECTOR:
return <IconBrandGithub {...iconProps} />; return <IconBrandGithub {...iconProps} />;
case EnumConnectorName.SERPER_API:
return <Link {...iconProps} />;
case EnumConnectorName.TAVILY_API: case EnumConnectorName.TAVILY_API:
return <IconWorldWww {...iconProps} />; return <IconWorldWww {...iconProps} />;
case EnumConnectorName.SEARXNG_API: case EnumConnectorName.SEARXNG_API:

View file

@ -29,11 +29,10 @@ export const getChatsRequest = z.object({
}); });
export const searchChatsRequest = z.object({ export const searchChatsRequest = z.object({
queryParams: paginationQueryParams queryParams: paginationQueryParams.extend({
.extend({ title: z.string(),
title: z.string(), search_space_id: z.number().or(z.string()).optional(),
search_space_id: z.number().or(z.string()).optional(), }),
}),
}); });
export const deleteChatResponse = z.object({ export const deleteChatResponse = z.object({

View file

@ -79,7 +79,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
name: "", name: "",
SLACK_BOT_TOKEN: "", SLACK_BOT_TOKEN: "",
NOTION_INTEGRATION_TOKEN: "", NOTION_INTEGRATION_TOKEN: "",
SERPER_API_KEY: "",
TAVILY_API_KEY: "", TAVILY_API_KEY: "",
SEARXNG_HOST: "", SEARXNG_HOST: "",
SEARXNG_API_KEY: "", SEARXNG_API_KEY: "",
@ -118,7 +117,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
name: currentConnector.name, name: currentConnector.name,
SLACK_BOT_TOKEN: config.SLACK_BOT_TOKEN || "", SLACK_BOT_TOKEN: config.SLACK_BOT_TOKEN || "",
NOTION_INTEGRATION_TOKEN: config.NOTION_INTEGRATION_TOKEN || "", NOTION_INTEGRATION_TOKEN: config.NOTION_INTEGRATION_TOKEN || "",
SERPER_API_KEY: config.SERPER_API_KEY || "",
TAVILY_API_KEY: config.TAVILY_API_KEY || "", TAVILY_API_KEY: config.TAVILY_API_KEY || "",
SEARXNG_HOST: config.SEARXNG_HOST || "", SEARXNG_HOST: config.SEARXNG_HOST || "",
SEARXNG_API_KEY: config.SEARXNG_API_KEY || "", SEARXNG_API_KEY: config.SEARXNG_API_KEY || "",
@ -282,16 +280,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
}; };
} }
break; break;
case "SERPER_API":
if (formData.SERPER_API_KEY !== originalConfig.SERPER_API_KEY) {
if (!formData.SERPER_API_KEY) {
toast.error("Serper Key empty.");
setIsSaving(false);
return;
}
newConfig = { SERPER_API_KEY: formData.SERPER_API_KEY };
}
break;
case "TAVILY_API": case "TAVILY_API":
if (formData.TAVILY_API_KEY !== originalConfig.TAVILY_API_KEY) { if (formData.TAVILY_API_KEY !== originalConfig.TAVILY_API_KEY) {
if (!formData.TAVILY_API_KEY) { if (!formData.TAVILY_API_KEY) {
@ -570,8 +558,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
"NOTION_INTEGRATION_TOKEN", "NOTION_INTEGRATION_TOKEN",
newlySavedConfig.NOTION_INTEGRATION_TOKEN || "" newlySavedConfig.NOTION_INTEGRATION_TOKEN || ""
); );
} else if (connector.connector_type === "SERPER_API") {
editForm.setValue("SERPER_API_KEY", newlySavedConfig.SERPER_API_KEY || "");
} else if (connector.connector_type === "TAVILY_API") { } else if (connector.connector_type === "TAVILY_API") {
editForm.setValue("TAVILY_API_KEY", newlySavedConfig.TAVILY_API_KEY || ""); editForm.setValue("TAVILY_API_KEY", newlySavedConfig.TAVILY_API_KEY || "");
} else if (connector.connector_type === "SEARXNG_API") { } else if (connector.connector_type === "SEARXNG_API") {

View file

@ -23,7 +23,6 @@ export interface CreateConnectorRequest {
// Get connector type display name // Get connector type display name
export const getConnectorTypeDisplay = (type: string): string => { export const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = { const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API", TAVILY_API: "Tavily API",
SEARXNG_API: "SearxNG", SEARXNG_API: "SearxNG",
}; };

View file

@ -1,7 +1,6 @@
// Helper function to get connector type display name // Helper function to get connector type display name
export const getConnectorTypeDisplay = (type: string): string => { export const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = { const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API", TAVILY_API: "Tavily API",
SEARXNG_API: "SearxNG", SEARXNG_API: "SearxNG",
SLACK_CONNECTOR: "Slack", SLACK_CONNECTOR: "Slack",