refactor: updated chonkie & removed junk code

- Removed all references to the Serper API from the codebase, including related components, hooks, and schemas.
- Updated the `pyproject.toml` to reflect the new version of `chonkie` and other dependencies.
- Cleaned up the configuration and connector management to streamline the application.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-19 23:26:45 -08:00
parent 70ca585379
commit 086048a4db
18 changed files with 344 additions and 701 deletions

View file

@ -1,69 +1,12 @@
import os
import shutil
from pathlib import Path
from typing import Any
import yaml
from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker
from chonkie.embeddings.azure_openai import AzureOpenAIEmbeddings
from chonkie.embeddings.registry import EmbeddingsRegistry
from dotenv import load_dotenv
from rerankers import Reranker
# Monkey patch AzureOpenAIEmbeddings to fix parameter order issue
# This is a temporary workaround until the upstream chonkie library is fixed
class FixedAzureOpenAIEmbeddings(AzureOpenAIEmbeddings):
"""Wrapper around AzureOpenAIEmbeddings with fixed parameter order."""
def __init__(
self,
model: str = "text-embedding-3-small",
azure_endpoint: str | None = None,
tokenizer: Any | None = None,
dimension: int | None = None,
azure_api_key: str | None = None,
api_version: str = "2024-10-21",
deployment: str | None = None,
max_retries: int = 3,
timeout: float = 60.0,
batch_size: int = 128,
**kwargs: dict[str, Any],
):
"""Initialize with model as first parameter to avoid conflicts."""
# Call parent's __init__ by explicitly passing azure_endpoint as first arg
# to maintain compatibility with the original signature
super().__init__(
azure_endpoint=azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT", ""),
model=model,
tokenizer=tokenizer,
dimension=dimension,
azure_api_key=azure_api_key,
api_version=api_version,
deployment=deployment,
max_retries=max_retries,
timeout=timeout,
batch_size=batch_size,
**kwargs,
)
# TODO: Fix this in chonkie upstream
# Register our fixed Azure OpenAI embeddings with pattern
# This automatically infers the following arguments from their corresponding environment variables if they are not provided:
# - `api_key` from `AZURE_OPENAI_API_KEY`
# - `organization` from `OPENAI_ORG_ID`
# - `project` from `OPENAI_PROJECT_ID`
# - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
# - `api_version` from `OPENAI_API_VERSION`
# - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
EmbeddingsRegistry.register_provider("azure_openai", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_pattern(r"^text-embedding-", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-ada-002", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-3-small", FixedAzureOpenAIEmbeddings)
EmbeddingsRegistry.register_model("text-embedding-3-large", FixedAzureOpenAIEmbeddings)
# Get the base directory of the project
BASE_DIR = Path(__file__).resolve().parent.parent.parent

View file

@ -1,18 +0,0 @@
from datetime import UTC, datetime
from pydantic import BaseModel
class GoogleAuthCredentialsBase(BaseModel):
token: str
refresh_token: str
token_uri: str
client_id: str
expiry: datetime
scopes: list[str]
client_secret: str
@property
def expired(self) -> bool:
"""Check if the credentials have expired."""
return self.expiry <= datetime.now(UTC)

View file

@ -8,13 +8,6 @@ from app.db import Chat, Podcast
from app.services.task_logging_service import TaskLoggingService
async def generate_document_podcast(
session: AsyncSession, document_id: int, search_space_id: int, user_id: int
):
# TODO: Need to fetch the document chunks, then concatenate them and pass them to the podcast generation model
pass
async def generate_chat_podcast(
session: AsyncSession,
chat_id: int,

View file

@ -41,7 +41,6 @@ dependencies = [
"celery[redis]>=5.5.3",
"flower>=2.0.1",
"redis>=5.2.1",
"chonkie[all]>=1.4.0",
"firecrawl-py>=4.9.0",
"boto3>=1.35.0",
"langchain-community>=0.3.31",
@ -54,6 +53,7 @@ dependencies = [
"deepagents>=0.3.0",
"trafilatura>=2.0.0",
"fastapi-users[oauth,sqlalchemy]>=15.0.3",
"chonkie[all]>=1.5.0",
]
[dependency-groups]

File diff suppressed because it is too large Load diff

View file

@ -1,34 +0,0 @@
# name: "Submit to Web Store"
# on:
# workflow_dispatch:
# jobs:
# build:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - name: Cache pnpm modules
# uses: actions/cache@v3
# with:
# path: ~/.pnpm-store
# key: ${{ runner.os }}-${{ hashFiles('**/pnpm-lock.yaml') }}
# restore-keys: |
# ${{ runner.os }}-
# - uses: pnpm/action-setup@v2.2.4
# with:
# version: latest
# run_install: true
# - name: Use Node.js 16.x
# uses: actions/setup-node@v3.4.1
# with:
# node-version: 16.x
# cache: "pnpm"
# - name: Build the extension
# run: pnpm build
# - name: Package the extension into a zip artifact
# run: pnpm package
# - name: Browser Platform Publish
# uses: PlasmoHQ/bpp@v3
# with:
# keys: ${{ secrets.SUBMIT_KEYS }}
# artifact: build/chrome-mv3-prod.zip

View file

@ -153,15 +153,6 @@ export default function EditConnectorPage() {
placeholder="Begins with secret_..."
/>
)}
{/* == Serper == */}
{connector.connector_type === "SERPER_API" && (
<EditSimpleTokenForm
control={editForm.control}
fieldName="SERPER_API_KEY"
fieldLabel="Serper API Key"
fieldDescription="Update the Serper API Key if needed."
/>
)}
{/* == Tavily == */}
{connector.connector_type === "TAVILY_API" && (
<EditSimpleTokenForm

View file

@ -39,7 +39,6 @@ const apiConnectorFormSchema = z.object({
// Helper function to get connector type display name
const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API",
SLACK_CONNECTOR: "Slack Connector",
NOTION_CONNECTOR: "Notion Connector",
@ -67,7 +66,6 @@ type ApiConnectorFormValues = z.infer<typeof apiConnectorFormSchema>;
// Get API key field name based on connector type
const getApiKeyFieldName = (connectorType: string): string => {
const fieldMap: Record<string, string> = {
SERPER_API: "SERPER_API_KEY",
TAVILY_API: "TAVILY_API_KEY",
SLACK_CONNECTOR: "SLACK_BOT_TOKEN",
NOTION_CONNECTOR: "NOTION_INTEGRATION_TOKEN",

View file

@ -1,216 +0,0 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { ArrowLeft, Check, Info, Loader2 } from "lucide-react";
import { motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import * as z from "zod";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardFooter,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import {
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { EnumConnectorName } from "@/contracts/enums/connector";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
// Define the form schema with Zod
const serperApiFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
api_key: z.string().min(10, {
message: "API key is required and must be valid.",
}),
});
// Define the type for the form values
type SerperApiFormValues = z.infer<typeof serperApiFormSchema>;
export default function SerperApiPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors();
// Initialize the form
const form = useForm<SerperApiFormValues>({
resolver: zodResolver(serperApiFormSchema),
defaultValues: {
name: "Serper API Connector",
api_key: "",
},
});
// Handle form submission
const onSubmit = async (values: SerperApiFormValues) => {
setIsSubmitting(true);
try {
await createConnector(
{
name: values.name,
connector_type: EnumConnectorName.SERPER_API,
config: {
SERPER_API_KEY: values.api_key,
},
is_indexable: false,
last_indexed_at: null,
periodic_indexing_enabled: false,
indexing_frequency_minutes: null,
next_scheduled_at: null,
},
parseInt(searchSpaceId)
);
toast.success("Serper API connector created successfully!");
// Navigate back to connectors page
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) {
console.error("Error creating connector:", error);
toast.error(error instanceof Error ? error.message : "Failed to create connector");
} finally {
setIsSubmitting(false);
}
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Connectors
</Button>
{/* Header */}
<div className="mb-8">
<div className="flex items-center gap-4">
<div className="flex h-12 w-12 items-center justify-center rounded-lg">
{getConnectorIcon(EnumConnectorName.SERPER_API, "h-6 w-6")}
</div>
<div>
<h1 className="text-3xl font-bold tracking-tight">Connect Serper API</h1>
<p className="text-muted-foreground">
Connect Serper API for Google search capabilities.
</p>
</div>
</div>
</div>
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
>
<Card className="border-2 border-border">
<CardHeader>
<CardTitle className="text-2xl font-bold">Connect Serper API</CardTitle>
<CardDescription>
Integrate with Serper API to enhance your search capabilities with Google search
results.
</CardDescription>
</CardHeader>
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>API Key Required</AlertTitle>
<AlertDescription>
You'll need a Serper API key to use this connector. You can get one by signing up at{" "}
<a
href="https://serper.dev"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4"
>
serper.dev
</a>
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My Serper API Connector" {...field} />
</FormControl>
<FormDescription>A friendly name to identify this connector.</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="api_key"
render={({ field }) => (
<FormItem>
<FormLabel>Serper API Key</FormLabel>
<FormControl>
<Input type="password" placeholder="Enter your Serper API key" {...field} />
</FormControl>
<FormDescription>
Your API key will be encrypted and stored securely.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<div className="flex justify-end">
<Button type="submit" disabled={isSubmitting} className="w-full sm:w-auto">
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect Serper API
</>
)}
</Button>
</div>
</form>
</Form>
</CardContent>
<CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
<h4 className="text-sm font-medium">What you get with Serper API:</h4>
<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
<li>Access to Google search results directly in your research</li>
<li>Real-time information from the web</li>
<li>Enhanced search capabilities for your projects</li>
</ul>
</CardFooter>
</Card>
</motion.div>
</div>
);
}

View file

@ -197,7 +197,6 @@ export function DashboardBreadcrumb() {
"slack-connector": "Slack",
"notion-connector": "Notion",
"tavily-api": "Tavily API",
"serper-api": "Serper API",
"linkup-api": "LinkUp API",
"luma-connector": "Luma",
"elasticsearch-connector": "Elasticsearch",

View file

@ -28,7 +28,6 @@ export const editConnectorSchema = z.object({
name: z.string().min(3, { message: "Connector name must be at least 3 characters." }),
SLACK_BOT_TOKEN: z.string().optional(),
NOTION_INTEGRATION_TOKEN: z.string().optional(),
SERPER_API_KEY: z.string().optional(),
TAVILY_API_KEY: z.string().optional(),
SEARXNG_HOST: z.string().optional(),
SEARXNG_API_KEY: z.string().optional(),

View file

@ -1,70 +0,0 @@
"use client";
import { Sparkles } from "lucide-react";
import { cn } from "@/lib/utils";
interface DisplayCardProps {
className?: string;
icon?: React.ReactNode;
title?: string;
description?: string;
date?: string;
iconClassName?: string;
titleClassName?: string;
}
function DisplayCard({
className,
icon = <Sparkles className="size-4 text-blue-300" />,
title = "Featured",
description = "Discover amazing content",
date = "Just now",
iconClassName = "text-blue-500",
titleClassName = "text-blue-500",
}: DisplayCardProps) {
return (
<div
className={cn(
"relative flex h-36 w-[22rem] -skew-y-[8deg] select-none flex-col justify-between rounded-xl border-2 bg-muted/70 backdrop-blur-sm px-4 py-3 transition-all duration-700 after:absolute after:-right-1 after:top-[-5%] after:h-[110%] after:w-[20rem] after:bg-gradient-to-l after:from-background after:to-transparent after:content-[''] hover:border-white/20 hover:bg-muted [&>*]:flex [&>*]:items-center [&>*]:gap-2",
className
)}
>
<div>
<span className="relative inline-block rounded-full bg-blue-800 p-1">{icon}</span>
<p className={cn("text-lg font-medium", titleClassName)}>{title}</p>
</div>
<p className="whitespace-nowrap text-lg">{description}</p>
<p className="text-muted-foreground">{date}</p>
</div>
);
}
interface DisplayCardsProps {
cards?: DisplayCardProps[];
}
export default function DisplayCards({ cards }: DisplayCardsProps) {
const defaultCards = [
{
className:
"[grid-area:stack] hover:-translate-y-10 before:absolute before:w-[100%] before:outline-1 before:rounded-xl before:outline-border before:h-[100%] before:content-[''] before:bg-blend-overlay before:bg-background/50 grayscale-[100%] hover:before:opacity-0 before:transition-opacity before:duration:700 hover:grayscale-0 before:left-0 before:top-0",
},
{
className:
"[grid-area:stack] translate-x-16 translate-y-10 hover:-translate-y-1 before:absolute before:w-[100%] before:outline-1 before:rounded-xl before:outline-border before:h-[100%] before:content-[''] before:bg-blend-overlay before:bg-background/50 grayscale-[100%] hover:before:opacity-0 before:transition-opacity before:duration:700 hover:grayscale-0 before:left-0 before:top-0",
},
{
className: "[grid-area:stack] translate-x-32 translate-y-20 hover:translate-y-10",
},
];
const displayCards = cards || defaultCards;
return (
<div className="grid [grid-template-areas:'stack'] place-items-center opacity-100 animate-in fade-in-0 duration-700">
{displayCards.map((cardProps, index) => (
<DisplayCard key={index} {...cardProps} />
))}
</div>
);
}

View file

@ -1,5 +1,4 @@
export enum EnumConnectorName {
SERPER_API = "SERPER_API",
TAVILY_API = "TAVILY_API",
SEARXNG_API = "SEARXNG_API",
LINKUP_API = "LINKUP_API",

View file

@ -21,7 +21,6 @@ import {
File,
FileText,
Globe,
Link,
Microscope,
Search,
Sparkles,
@ -40,8 +39,6 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
return <IconLayoutKanban {...iconProps} />;
case EnumConnectorName.GITHUB_CONNECTOR:
return <IconBrandGithub {...iconProps} />;
case EnumConnectorName.SERPER_API:
return <Link {...iconProps} />;
case EnumConnectorName.TAVILY_API:
return <IconWorldWww {...iconProps} />;
case EnumConnectorName.SEARXNG_API:

View file

@ -29,11 +29,10 @@ export const getChatsRequest = z.object({
});
export const searchChatsRequest = z.object({
queryParams: paginationQueryParams
.extend({
title: z.string(),
search_space_id: z.number().or(z.string()).optional(),
}),
queryParams: paginationQueryParams.extend({
title: z.string(),
search_space_id: z.number().or(z.string()).optional(),
}),
});
export const deleteChatResponse = z.object({

View file

@ -79,7 +79,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
name: "",
SLACK_BOT_TOKEN: "",
NOTION_INTEGRATION_TOKEN: "",
SERPER_API_KEY: "",
TAVILY_API_KEY: "",
SEARXNG_HOST: "",
SEARXNG_API_KEY: "",
@ -118,7 +117,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
name: currentConnector.name,
SLACK_BOT_TOKEN: config.SLACK_BOT_TOKEN || "",
NOTION_INTEGRATION_TOKEN: config.NOTION_INTEGRATION_TOKEN || "",
SERPER_API_KEY: config.SERPER_API_KEY || "",
TAVILY_API_KEY: config.TAVILY_API_KEY || "",
SEARXNG_HOST: config.SEARXNG_HOST || "",
SEARXNG_API_KEY: config.SEARXNG_API_KEY || "",
@ -282,16 +280,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
};
}
break;
case "SERPER_API":
if (formData.SERPER_API_KEY !== originalConfig.SERPER_API_KEY) {
if (!formData.SERPER_API_KEY) {
toast.error("Serper Key empty.");
setIsSaving(false);
return;
}
newConfig = { SERPER_API_KEY: formData.SERPER_API_KEY };
}
break;
case "TAVILY_API":
if (formData.TAVILY_API_KEY !== originalConfig.TAVILY_API_KEY) {
if (!formData.TAVILY_API_KEY) {
@ -570,8 +558,6 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
"NOTION_INTEGRATION_TOKEN",
newlySavedConfig.NOTION_INTEGRATION_TOKEN || ""
);
} else if (connector.connector_type === "SERPER_API") {
editForm.setValue("SERPER_API_KEY", newlySavedConfig.SERPER_API_KEY || "");
} else if (connector.connector_type === "TAVILY_API") {
editForm.setValue("TAVILY_API_KEY", newlySavedConfig.TAVILY_API_KEY || "");
} else if (connector.connector_type === "SEARXNG_API") {

View file

@ -23,7 +23,6 @@ export interface CreateConnectorRequest {
// Get connector type display name
export const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API",
SEARXNG_API: "SearxNG",
};

View file

@ -1,7 +1,6 @@
// Helper function to get connector type display name
export const getConnectorTypeDisplay = (type: string): string => {
const typeMap: Record<string, string> = {
SERPER_API: "Serper API",
TAVILY_API: "Tavily API",
SEARXNG_API: "SearxNG",
SLACK_CONNECTOR: "Slack",