Renaming resources

2026-07-12 22:42:13 +02:00 · 2025-11-22 19:19:00 -08:00 · 2025-11-22 19:19:00 -08:00 · 121e2f0c0e
commit 121e2f0c0e
parent 6d19e0fad8
24 changed files with 117 additions and 273 deletions
--- a/surfsense_backend/alembic/versions/38_add_webcrawler_connector_enum.py
+++ b/surfsense_backend/alembic/versions/38_add_webcrawler_connector_enum.py
@ -44,9 +44,9 @@ def upgrade() -> None:
        IF NOT EXISTS (
            SELECT 1 FROM pg_type t
            JOIN pg_enum e ON t.oid = e.enumtypid
-            WHERE t.typname = 'documenttype' AND e.enumlabel = 'WEBCRAWLER_CONNECTOR'
+            WHERE t.typname = 'documenttype' AND e.enumlabel = 'CRAWLED_URL'
        ) THEN
-            ALTER TYPE documenttype ADD VALUE 'WEBCRAWLER_CONNECTOR';
+            ALTER TYPE documenttype ADD VALUE 'CRAWLED_URL';
        END IF;
    END
    $$;
--- a/surfsense_backend/app/agents/researcher/nodes.py
+++ b/surfsense_backend/app/agents/researcher/nodes.py
@ -671,7 +671,7 @@ async def fetch_relevant_documents(
                    (
                        source_object,
                        crawled_urls_chunks,
-                    ) = await connector_service.search_crawled_urls(
+                    ) = await connector_service.search_webcrawler(
                        user_query=reformulated_query,
                        user_id=user_id,
                        search_space_id=search_space_id,
--- a/surfsense_backend/app/agents/researcher/qna_agent/default_prompts.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/default_prompts.py
@ -34,7 +34,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
 - TAVILY_API: "Tavily search API results" (personalized search results)
 - LINKUP_API: "Linkup search API results" (personalized search results)
 - LUMA_CONNECTOR: "Luma events"
- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense web crawler" (personally selected websites)
+- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
 </knowledge_sources>

 <instructions>
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -208,9 +208,6 @@ class Config:
        # LlamaCloud API Key
        LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")

-    # Firecrawl API Key
-    FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", None)
-
    # Litellm TTS Configuration
    TTS_SERVICE = os.getenv("TTS_SERVICE")
    TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -49,7 +49,7 @@ from app.tasks.connector_indexers import (
    index_luma_events,
    index_notion_pages,
    index_slack_messages,
-    index_webcrawler_urls,
+    index_crawled_urls,
 )
 from app.users import current_active_user
 from app.utils.check_ownership import check_ownership
@ -691,12 +691,12 @@ async def index_connector_content(
            response_message = "Elasticsearch indexing started in the background."

        elif connector.connector_type == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR:
-            from app.tasks.celery_tasks.connector_tasks import index_webcrawler_urls_task
+            from app.tasks.celery_tasks.connector_tasks import index_crawled_urls_task

            logger.info(
                f"Triggering web pages indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
            )
-            index_webcrawler_urls_task.delay(
+            index_crawled_urls_task.delay(
                connector_id, search_space_id, str(user.id), indexing_from, indexing_to
            )
            response_message = "Web page indexing started in the background."
@ -1537,8 +1537,8 @@ async def run_elasticsearch_indexing(
            exc_info=True,
        )

-# Add new helper functions for webcrawler indexing
-async def run_webcrawler_indexing_with_new_session(
+# Add new helper functions for crawled web page indexing
+async def run_web_page_indexing_with_new_session(
    connector_id: int,
    search_space_id: int,
    user_id: str,
@ -1546,16 +1546,16 @@ async def run_webcrawler_indexing_with_new_session(
    end_date: str,
 ):
    """
-    Create a new session and run the Webcrawler indexing task.
+    Create a new session and run the Web page indexing task.
    This prevents session leaks by creating a dedicated session for the background task.
    """
    async with async_session_maker() as session:
-        await run_webcrawler_indexing(
+        await run_web_page_indexing(
            session, connector_id, search_space_id, user_id, start_date, end_date
        )


-async def run_webcrawler_indexing(
+async def run_web_page_indexing(
    session: AsyncSession,
    connector_id: int,
    search_space_id: int,
@ -1564,7 +1564,7 @@ async def run_webcrawler_indexing(
    end_date: str,
 ):
    """
-    Background task to run Webcrawler indexing.
+    Background task to run Web page indexing.
    Args:
        session: Database session
        connector_id: ID of the webcrawler connector
@ -1574,7 +1574,7 @@ async def run_webcrawler_indexing(
        end_date: End date for indexing
    """
    try:
-        documents_processed, error_or_warning = await index_webcrawler_urls(
+        documents_processed, error_or_warning = await index_crawled_urls(
            session=session,
            connector_id=connector_id,
            search_space_id=search_space_id,
@ -1588,11 +1588,11 @@ async def run_webcrawler_indexing(
        if documents_processed > 0:
            await update_connector_last_indexed(session, connector_id)
            logger.info(
-                f"Webcrawler indexing completed successfully: {documents_processed} documents processed"
+                f"Web page indexing completed successfully: {documents_processed} documents processed"
            )
        else:
            logger.error(
-                f"Webcrawler indexing failed or no documents processed: {error_or_warning}"
+                f"Web page indexing failed or no documents processed: {error_or_warning}"
            )
    except Exception as e:
-        logger.error(f"Error in background Webcrawler indexing task: {e!s}")
+        logger.error(f"Error in background Web page indexing task: {e!s}")
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -2573,4 +2573,4 @@ class ConnectorService:
            "sources": sources_list,
        }

-        return result_object, elasticsearch_chunks
+        return result_object, elasticsearch_chunks
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@ -602,8 +602,8 @@ async def _index_elasticsearch_documents(
        )


-@celery_app.task(name="index_webcrawler_urls", bind=True)
-def index_webcrawler_urls_task(
+@celery_app.task(name="index_crawled_urls", bind=True)
+def index_crawled_urls_task(
    self,
    connector_id: int,
    search_space_id: int,
@ -611,7 +611,7 @@ def index_webcrawler_urls_task(
    start_date: str,
    end_date: str,
 ):
-    """Celery task to index Webcrawler Urls."""
+    """Celery task to index Web page Urls."""
    import asyncio

    loop = asyncio.new_event_loop()
@ -619,7 +619,7 @@ def index_webcrawler_urls_task(

    try:
        loop.run_until_complete(
-            _index_webcrawler_urls(
+            _index_crawled_urls(
                connector_id, search_space_id, user_id, start_date, end_date
            )
        )
@ -627,19 +627,19 @@ def index_webcrawler_urls_task(
        loop.close()


-async def _index_webcrawler_urls(
+async def _index_crawled_urls(
    connector_id: int,
    search_space_id: int,
    user_id: str,
    start_date: str,
    end_date: str,
 ):
-    """Index Webcrawler Urls with new session."""
+    """Index Web page Urls with new session."""
    from app.routes.search_source_connectors_routes import (
-        run_webcrawler_indexing,
+        run_web_page_indexing,
    )

    async with get_celery_session_maker()() as session:
-        await run_webcrawler_indexing(
+        await run_web_page_indexing(
            session, connector_id, search_space_id, user_id, start_date, end_date
        )
--- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
@ -77,7 +77,7 @@ async def _check_and_trigger_schedules():
                index_luma_events_task,
                index_notion_pages_task,
                index_slack_messages_task,
-                index_webcrawler_urls_task
+                index_crawled_urls_task
            )

            # Map connector types to their tasks
@ -95,7 +95,7 @@ async def _check_and_trigger_schedules():
                SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
                SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
                SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
-                SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_webcrawler_urls_task,
+                SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
            }

            # Trigger indexing for each due connector
--- a/surfsense_backend/app/tasks/connector_indexers/init.py
+++ b/surfsense_backend/app/tasks/connector_indexers/init.py
@ -42,7 +42,7 @@ from .luma_indexer import index_luma_events
 # Documentation and knowledge management
 from .notion_indexer import index_notion_pages
 from .slack_indexer import index_slack_messages
-from .webcrawler_indexer import index_webcrawler_urls
+from .webcrawler_indexer import index_crawled_urls

 __all__ = [  # noqa: RUF022
    "index_airtable_records",
@ -60,7 +60,7 @@ __all__ = [  # noqa: RUF022
    "index_linear_issues",
    # Documentation and knowledge management
    "index_notion_pages",
-    "index_webcrawler_urls",
+    "index_crawled_urls",
    # Communication platforms
    "index_slack_messages",
    "index_google_gmail_messages",
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -27,7 +27,7 @@ from .base import (
 )


-async def index_webcrawler_urls(
+async def index_crawled_urls(
    session: AsyncSession,
    connector_id: int,
    search_space_id: int,
@ -37,7 +37,7 @@ async def index_webcrawler_urls(
    update_last_indexed: bool = True,
 ) -> tuple[int, str | None]:
    """
-    Index webcrawler URLs.
+    Index web page URLs.

    Args:
        session: Database session
@ -55,9 +55,9 @@ async def index_webcrawler_urls(

    # Log task start
    log_entry = await task_logger.log_task_start(
-        task_name="webcrawler_url_indexing",
+        task_name="crawled_url_indexing",
        source="connector_indexing_task",
-        message=f"Starting webcrawler URL indexing for connector {connector_id}",
+        message=f"Starting web page URL indexing for connector {connector_id}",
        metadata={
            "connector_id": connector_id,
            "user_id": str(user_id),
@ -104,7 +104,7 @@ async def index_webcrawler_urls(
            urls = []

        logger.info(
-            f"Starting webcrawler indexing for connector {connector_id} with {len(urls)} URLs"
+            f"Starting crawled web page indexing for connector {connector_id} with {len(urls)} URLs"
        )

        # Initialize webcrawler client
@ -367,7 +367,7 @@ async def index_webcrawler_urls(

        await task_logger.log_task_success(
            log_entry,
-            f"Successfully completed webcrawler indexing for connector {connector_id}",
+            f"Successfully completed crawled web page indexing for connector {connector_id}",
            {
                "urls_processed": total_processed,
                "documents_indexed": documents_indexed,
@ -378,7 +378,7 @@ async def index_webcrawler_urls(
        )

        logger.info(
-            f"Webcrawler indexing completed: {documents_indexed} new, "
+            f"Web page indexing completed: {documents_indexed} new, "
            f"{documents_updated} updated, {documents_skipped} skipped, "
            f"{len(failed_urls)} failed"
        )
@ -388,7 +388,7 @@ async def index_webcrawler_urls(
        await session.rollback()
        await task_logger.log_task_failure(
            log_entry,
-            f"Database error during webcrawler indexing for connector {connector_id}",
+            f"Database error during web page indexing for connector {connector_id}",
            str(db_error),
            {"error_type": "SQLAlchemyError"},
        )
@ -398,12 +398,12 @@ async def index_webcrawler_urls(
        await session.rollback()
        await task_logger.log_task_failure(
            log_entry,
-            f"Failed to index webcrawler URLs for connector {connector_id}",
+            f"Failed to index web page URLs for connector {connector_id}",
            str(e),
            {"error_type": type(e).__name__},
        )
-        logger.error(f"Failed to index webcrawler URLs: {e!s}", exc_info=True)
-        return 0, f"Failed to index webcrawler URLs: {e!s}"
+        logger.error(f"Failed to index web page URLs: {e!s}", exc_info=True)
+        return 0, f"Failed to index web page URLs: {e!s}"


 async def get_crawled_url_documents(
--- a/surfsense_backend/app/utils/periodic_scheduler.py
+++ b/surfsense_backend/app/utils/periodic_scheduler.py
@ -31,7 +31,7 @@ CONNECTOR_TASK_MAP = {
    SearchSourceConnectorType.DISCORD_CONNECTOR: "index_discord_messages",
    SearchSourceConnectorType.LUMA_CONNECTOR: "index_luma_events",
    SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
-    SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_webcrawler_urls",
+    SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
 }


@ -80,7 +80,7 @@ def create_periodic_schedule(
            index_luma_events_task,
            index_notion_pages_task,
            index_slack_messages_task,
-            index_webcrawler_urls_task,
+            index_crawled_urls_task,
        )

        # Map connector type to task
@ -98,7 +98,7 @@ def create_periodic_schedule(
            SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
            SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
            SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
-            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_webcrawler_urls_task,
+            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
        }

        # Trigger the first run immediately
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx
@ -18,7 +18,16 @@ import {
 	CardHeader,
 	CardTitle,
 } from "@/components/ui/card";
-import { Form } from "@/components/ui/form";
+import {
+	Form,
+	FormControl,
+	FormDescription,
+	FormField,
+	FormItem,
+	FormLabel,
+	FormMessage,
+} from "@/components/ui/form";
+import { Textarea } from "@/components/ui/textarea";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
 import { useConnectorEditPage } from "@/hooks/use-connector-edit-page";
 // Import Utils, Types, Hook, and Components
@ -285,13 +294,35 @@ export default function EditConnectorPage() {

 								{/* == Webcrawler == */}
 								{connector.connector_type === "WEBCRAWLER_CONNECTOR" && (
-									<EditSimpleTokenForm
-										control={editForm.control}
-										fieldName="FIRECRAWL_API_KEY"
-										fieldLabel="Firecrawl API Key (Optional)"
-										fieldDescription="Add a Firecrawl API key for enhanced crawling capabilities. If not provided, will use AsyncChromiumLoader as fallback."
-										placeholder="fc-xxxxxxxxxxxxx"
-									/>
+									<div className="space-y-4">
+										<EditSimpleTokenForm
+											control={editForm.control}
+											fieldName="FIRECRAWL_API_KEY"
+											fieldLabel="Firecrawl API Key (Optional)"
+											fieldDescription="Add a Firecrawl API key for enhanced crawling capabilities. If not provided, will use AsyncChromiumLoader as fallback."
+											placeholder="fc-xxxxxxxxxxxxx"
+										/>
+										<FormField
+											control={editForm.control}
+											name="INITIAL_URLS"
+											render={({ field }) => (
+												<FormItem>
+													<FormLabel>URLs to Crawl</FormLabel>
+													<FormControl>
+														<Textarea
+															placeholder="https://example.com&#10;https://docs.example.com&#10;https://blog.example.com"
+															className="min-h-[150px] font-mono text-sm"
+															{...field}
+														/>
+													</FormControl>
+													<FormDescription>
+														Enter URLs to crawl (one per line). These URLs will be indexed when you trigger indexing.
+													</FormDescription>
+													<FormMessage />
+												</FormItem>
+											)}
+										/>
+									</div>
 								)}
 								
 							</CardContent>
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
@ -55,7 +55,7 @@ const getConnectorTypeDisplay = (type: string): string => {
 		AIRTABLE_CONNECTOR: "Airtable Connector",
 		LUMA_CONNECTOR: "Luma Connector",
 		ELASTICSEARCH_CONNECTOR: "Elasticsearch Connector",
-		WEBCRAWLER_CONNECTOR: "Web Crawler Connector",
+		WEBCRAWLER_CONNECTOR: "Web Page Connector",
 		// Add other connector types here as needed
 	};
 	return typeMap[type] || type;
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/webcrawler-connector/page.tsx
@ -64,7 +64,7 @@ export default function WebcrawlerConnectorPage() {
 	const form = useForm<WebcrawlerConnectorFormValues>({
 		resolver: zodResolver(webcrawlerConnectorFormSchema),
 		defaultValues: {
-			name: "Web Crawler",
+			name: "Web Pages",
 			api_key: "",
 			initial_urls: "",
 		},
@ -150,7 +150,7 @@ export default function WebcrawlerConnectorPage() {
 							{getConnectorIcon(EnumConnectorName.WEBCRAWLER_CONNECTOR, "h-6 w-6")}
 						</div>
 						<div>
-							<h1 className="text-3xl font-bold tracking-tight">Connect Web Crawler</h1>
+							<h1 className="text-3xl font-bold tracking-tight">Connect Web Pages</h1>
 							<p className="text-muted-foreground">Crawl and index web pages for search.</p>
 						</div>
 					</div>
@ -160,9 +160,9 @@ export default function WebcrawlerConnectorPage() {
 				{!doesConnectorExist ? (
 					<Card>
 						<CardHeader>
-							<CardTitle>Set Up Web Crawler</CardTitle>
+							<CardTitle>Set Up Web Page crawler</CardTitle>
 							<CardDescription>
-								Configure your web crawler to index web pages. Optionally add a Firecrawl API key
+								Configure your web page crawler to index web pages. Optionally add a Firecrawl API key
 								for enhanced crawling capabilities.
 							</CardDescription>
 						</CardHeader>
@ -277,7 +277,7 @@ export default function WebcrawlerConnectorPage() {
 					/* Success Card */
 					<Card>
 						<CardHeader>
-							<CardTitle>✅ Your web crawler is successfully set up!</CardTitle>
+							<CardTitle>✅ Your web page crawler is successfully set up!</CardTitle>
 							<CardDescription>
 								You can now add URLs to crawl from the connector management page.
 							</CardDescription>
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/webpage/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/webpage/page.tsx
@ -1,201 +0,0 @@
-"use client";
-
-import { type Tag, TagInput } from "emblor";
-import { Globe, Loader2 } from "lucide-react";
-import { useParams, useRouter } from "next/navigation";
-import { useTranslations } from "next-intl";
-import { useState } from "react";
-import { toast } from "sonner";
-import { Button } from "@/components/ui/button";
-import {
-	Card,
-	CardContent,
-	CardDescription,
-	CardFooter,
-	CardHeader,
-	CardTitle,
-} from "@/components/ui/card";
-import { Label } from "@/components/ui/label";
-
-// URL validation regex
-const urlRegex = /^(https?:\/\/)?([\da-z.-]+)\.([a-z.]{2,6})([/\w .-]*)*\/?$/;
-
-export default function WebpageCrawler() {
-	const t = useTranslations("add_webpage");
-	const params = useParams();
-	const router = useRouter();
-	const search_space_id = params.search_space_id as string;
-
-	const [urlTags, setUrlTags] = useState<Tag[]>([]);
-	const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
-	const [isSubmitting, setIsSubmitting] = useState(false);
-	const [error, setError] = useState<string | null>(null);
-
-	// Function to validate a URL
-	const isValidUrl = (url: string): boolean => {
-		return urlRegex.test(url);
-	};
-
-	// Function to handle URL submission
-	const handleSubmit = async () => {
-		// Validate that we have at least one URL
-		if (urlTags.length === 0) {
-			setError(t("error_no_url"));
-			return;
-		}
-
-		// Validate all URLs
-		const invalidUrls = urlTags.filter((tag) => !isValidUrl(tag.text));
-		if (invalidUrls.length > 0) {
-			setError(t("error_invalid_urls", { urls: invalidUrls.map((tag) => tag.text).join(", ") }));
-			return;
-		}
-
-		setError(null);
-		setIsSubmitting(true);
-
-		try {
-			toast(t("crawling_toast"), {
-				description: t("crawling_toast_desc"),
-			});
-
-			// Extract URLs from tags
-			const urls = urlTags.map((tag) => tag.text);
-
-			// Make API call to backend
-			const response = await fetch(
-				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents`,
-				{
-					method: "POST",
-					headers: {
-						"Content-Type": "application/json",
-						Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
-					},
-					body: JSON.stringify({
-						document_type: "CRAWLED_URL",
-						content: urls,
-						search_space_id: parseInt(search_space_id),
-					}),
-				}
-			);
-
-			if (!response.ok) {
-				throw new Error("Failed to crawl URLs");
-			}
-
-			await response.json();
-
-			toast(t("success_toast"), {
-				description: t("success_toast_desc"),
-			});
-
-			// Redirect to documents page
-			router.push(`/dashboard/${search_space_id}/documents`);
-		} catch (error: any) {
-			setError(error.message || t("error_generic"));
-			toast(t("error_toast"), {
-				description: `${t("error_toast_desc")}: ${error.message}`,
-			});
-		} finally {
-			setIsSubmitting(false);
-		}
-	};
-
-	// Function to add a new URL tag
-	const handleAddTag = (text: string) => {
-		// Basic URL validation
-		if (!isValidUrl(text)) {
-			toast(t("invalid_url_toast"), {
-				description: t("invalid_url_toast_desc"),
-			});
-			return;
-		}
-
-		// Check for duplicates
-		if (urlTags.some((tag) => tag.text === text)) {
-			toast(t("duplicate_url_toast"), {
-				description: t("duplicate_url_toast_desc"),
-			});
-			return;
-		}
-
-		// Add the new tag
-		const newTag: Tag = {
-			id: Date.now().toString(),
-			text: text,
-		};
-
-		setUrlTags([...urlTags, newTag]);
-	};
-
-	return (
-		<div className="container mx-auto py-8">
-			<Card className="max-w-2xl mx-auto">
-				<CardHeader>
-					<CardTitle className="flex items-center gap-2">
-						<Globe className="h-5 w-5" />
-						{t("title")}
-					</CardTitle>
-					<CardDescription>{t("subtitle")}</CardDescription>
-				</CardHeader>
-				<CardContent>
-					<div className="space-y-4">
-						<div className="space-y-2">
-							<Label htmlFor="url-input">{t("label")}</Label>
-							<TagInput
-								id="url-input"
-								tags={urlTags}
-								setTags={setUrlTags}
-								placeholder={t("placeholder")}
-								onAddTag={handleAddTag}
-								styleClasses={{
-									inlineTagsContainer:
-										"border-input rounded-lg bg-background shadow-sm shadow-black/5 transition-shadow focus-within:border-ring focus-within:outline-none focus-within:ring-[3px] focus-within:ring-ring/20 p-1 gap-1",
-									input: "w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7",
-									tag: {
-										body: "h-7 relative bg-background border border-input hover:bg-background rounded-md font-medium text-xs ps-2 pe-7 flex",
-										closeButton:
-											"absolute -inset-y-px -end-px p-0 rounded-e-lg flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-muted-foreground/80 hover:text-foreground",
-									},
-								}}
-								activeTagIndex={activeTagIndex}
-								setActiveTagIndex={setActiveTagIndex}
-							/>
-							<p className="text-xs text-muted-foreground mt-1">{t("hint")}</p>
-						</div>
-
-						{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
-
-						<div className="bg-muted/50 rounded-lg p-4 text-sm">
-							<h4 className="font-medium mb-2">{t("tips_title")}</h4>
-							<ul className="list-disc pl-5 space-y-1 text-muted-foreground">
-								<li>{t("tip_1")}</li>
-								<li>{t("tip_2")}</li>
-								<li>{t("tip_3")}</li>
-								<li>{t("tip_4")}</li>
-							</ul>
-						</div>
-					</div>
-				</CardContent>
-				<CardFooter className="flex justify-between">
-					<Button
-						variant="outline"
-						onClick={() => router.push(`/dashboard/${search_space_id}/documents`)}
-					>
-						{t("cancel")}
-					</Button>
-					<Button onClick={handleSubmit} disabled={isSubmitting || urlTags.length === 0}>
-						{isSubmitting ? (
-							<>
-								<Loader2 className="mr-2 h-4 w-4 animate-spin" />
-								{t("submitting")}
-							</>
-						) : (
-							t("submit")
-						)}
-					</Button>
-				</CardFooter>
-			</Card>
-		</div>
-	);
-}
--- a/surfsense_web/components/dashboard-breadcrumb.tsx
+++ b/surfsense_web/components/dashboard-breadcrumb.tsx
@ -138,7 +138,7 @@ export function DashboardBreadcrumb() {
 								"linkup-api": "LinkUp API",
 								"luma-connector": "Luma",
 								"elasticsearch-connector": "Elasticsearch",
-								"webcrawler-connector": "WebCrawler",
+								"webcrawler-connector": "Web Pages",
 							};

 							const connectorLabel = connectorLabels[connectorType] || connectorType;
--- a/surfsense_web/components/editConnector/types.ts
+++ b/surfsense_web/components/editConnector/types.ts
@ -53,5 +53,6 @@ export const editConnectorSchema = z.object({
 	LUMA_API_KEY: z.string().optional(),
 	ELASTICSEARCH_API_KEY: z.string().optional(),
 	FIRECRAWL_API_KEY: z.string().optional(),
+	INITIAL_URLS: z.string().optional()
 });
 export type EditConnectorFormValues = z.infer<typeof editConnectorSchema>;
--- a/surfsense_web/components/homepage/integrations.tsx
+++ b/surfsense_web/components/homepage/integrations.tsx
@ -29,7 +29,7 @@ const INTEGRATIONS: Integration[] = [
 	// Documentation & Knowledge
 	{ name: "Confluence", icon: "https://cdn.simpleicons.org/confluence/172B4D" },
 	{ name: "Notion", icon: "https://cdn.simpleicons.org/notion/000000/ffffff" },
-	{ name: "Web Crawler", icon: "https://cdn.jsdelivr.net/npm/lucide-static@0.294.0/icons/globe.svg"},
+	{ name: "Web Pages", icon: "https://cdn.jsdelivr.net/npm/lucide-static@0.294.0/icons/globe.svg"},

 	// Cloud Storage
 	{ name: "Google Drive", icon: "https://cdn.simpleicons.org/googledrive/4285F4" },
--- a/surfsense_web/components/sources/connector-data.tsx
+++ b/surfsense_web/components/sources/connector-data.tsx
@ -140,7 +140,7 @@ export const connectorCategories: ConnectorCategory[] = [
 			},
 			{
 				id: "webcrawler-connector",
-				title: "Web Crawler",
+				title: "Web Pages",
 				description: "webcrawler_desc",
 				icon: getConnectorIcon(EnumConnectorName.WEBCRAWLER_CONNECTOR, "h-6 w-6"),
 				status: "available",
--- a/surfsense_web/content/docs/docker-installation.mdx
+++ b/surfsense_web/content/docs/docker-installation.mdx
@ -96,8 +96,7 @@ Before you begin, ensure you have:
 | TTS_SERVICE_API_BASE       | (Optional) Custom API base URL for the Text-to-Speech service                                                                                                                           |
 | STT_SERVICE                | Speech-to-Text API provider for Audio Files (e.g., `local/base`, `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers)                   |
 | STT_SERVICE_API_KEY        | (Optional if local) API key for the Speech-to-Text service                                                                                                                                                    |
-| STT_SERVICE_API_BASE       | (Optional) Custom API base URL for the Speech-to-Text service                                                                                                                      |
-| FIRECRAWL_API_KEY          | API key for Firecrawl service for web crawling                                                                                                                                            |
+| STT_SERVICE_API_BASE       | (Optional) Custom API base URL for the Speech-to-Text service                                                                                                                      |                                                                                                                                          |
 | ETL_SERVICE                | Document parsing service: `UNSTRUCTURED` (supports 34+ formats), `LLAMACLOUD` (supports 50+ formats including legacy document types), or `DOCLING` (local processing, supports PDF, Office docs, images, HTML, CSV)                                                  |
 | UNSTRUCTURED_API_KEY       | API key for Unstructured.io service for document parsing (required if ETL_SERVICE=UNSTRUCTURED)                                                                                           |
 | LLAMA_CLOUD_API_KEY        | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD)                                                                                                  |
--- a/surfsense_web/contracts/enums/connectorIcons.tsx
+++ b/surfsense_web/contracts/enums/connectorIcons.tsx
@ -62,6 +62,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
 		case EnumConnectorName.WEBCRAWLER_CONNECTOR:
 			return <Globe {...iconProps} />;
 		// Additional cases for non-enum connector types
+		case "CRAWLED_URL":
+			return <Globe {...iconProps} />;
 		case "YOUTUBE_VIDEO":
 			return <IconBrandYoutube {...iconProps} />;
 		case "FILE":
--- a/surfsense_web/hooks/use-connector-edit-page.ts
+++ b/surfsense_web/hooks/use-connector-edit-page.ts
@ -98,6 +98,7 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
 			LUMA_API_KEY: "",
 			ELASTICSEARCH_API_KEY: "",
 			FIRECRAWL_API_KEY: "",
+			INITIAL_URLS: ""
 		},
 	});

@ -144,6 +145,7 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
 					LUMA_API_KEY: config.LUMA_API_KEY || "",
 					ELASTICSEARCH_API_KEY: config.ELASTICSEARCH_API_KEY || "",
 					FIRECRAWL_API_KEY: config.FIRECRAWL_API_KEY || "",
+					INTIAL_URLS: config.INITIAL_URLS || ""
 				});
 				if (currentConnector.connector_type === "GITHUB_CONNECTOR") {
 					const savedRepos = config.repo_full_names || [];
@ -472,16 +474,28 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
 					}
 					break;
 				case "WEBCRAWLER_CONNECTOR":
-					if (formData.FIRECRAWL_API_KEY !== originalConfig.FIRECRAWL_API_KEY) {
+					if (
+						formData.FIRECRAWL_API_KEY !== originalConfig.FIRECRAWL_API_KEY ||
+						formData.INITIAL_URLS !== originalConfig.INITIAL_URLS
+					) {
+						newConfig = {};
+						
 						if (formData.FIRECRAWL_API_KEY && formData.FIRECRAWL_API_KEY.trim()) {
 							if (!formData.FIRECRAWL_API_KEY.startsWith("fc-")) {
 								toast.warning("Firecrawl API keys typically start with 'fc-'. Please verify your key.");
 							}
-							newConfig = { FIRECRAWL_API_KEY: formData.FIRECRAWL_API_KEY };
-						} else {
-							newConfig = {};
+							newConfig.FIRECRAWL_API_KEY = formData.FIRECRAWL_API_KEY.trim();
+						} else if (originalConfig.FIRECRAWL_API_KEY) {
 							toast.info("Firecrawl API key removed. Web crawler will use AsyncChromiumLoader as fallback.");
 						}
+
+						if (formData.INITIAL_URLS !== undefined) {
+							if (formData.INITIAL_URLS && formData.INITIAL_URLS.trim()) {
+								newConfig.INITIAL_URLS = formData.INITIAL_URLS.trim();
+							} else if (originalConfig.INITIAL_URLS) {
+								toast.info("URLs removed from crawler configuration.");
+							}
+						}
 					}
 					break;
 			}
@ -579,6 +593,7 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
 						);
 					} else if (connector.connector_type == "WEBCRAWLER_CONNECTOR") {
 						editForm.setValue("FIRECRAWL_API_KEY",newlySavedConfig.FIRECRAWL_API_KEY || "");
+						editForm.setValue("INITIAL_URLS", newlySavedConfig.INITIAL_URLS || "");
 					}
 				}
 				if (connector.connector_type === "GITHUB_CONNECTOR") {
--- a/surfsense_web/lib/connectors/utils.ts
+++ b/surfsense_web/lib/connectors/utils.ts
@ -18,7 +18,7 @@ export const getConnectorTypeDisplay = (type: string): string => {
 		AIRTABLE_CONNECTOR: "Airtable",
 		LUMA_CONNECTOR: "Luma",
 		ELASTICSEARCH_CONNECTOR: "Elasticsearch",
-		WEBCRAWLER_CONNECTOR: "Web Crawler",
+		WEBCRAWLER_CONNECTOR: "Web Pages",
 	};
 	return typeMap[type] || type;
 };
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@ -332,7 +332,7 @@
 		"calendar_desc": "Connect to Google Calendar to search events, meetings and schedules.",
 		"gmail_desc": "Connect to your Gmail account to search through your emails.",
 		"zoom_desc": "Connect to Zoom to access meeting recordings and transcripts.",
-		"webcrawler_desc": "Scrape web pages using FireCrawl."
+		"webcrawler_desc": "Crawl web pages"
 	},
 	"upload_documents": {
 		"title": "Upload Documents",