refactor: Update GitHub connector to use gitingest CLI

- Refactored GitHubConnector to utilize gitingest CLI via subprocess, improving performance and avoiding async issues with Celery. - Updated ingestion method to handle repository digests more efficiently, including error handling for subprocess execution. - Adjusted GitHub indexer to call the new synchronous ingestion method. - Clarified documentation regarding the optional nature of the Personal Access Token for public repositories.
2026-06-08 20:25:19 +02:00 · 2026-01-20 23:24:33 +05:30 · 2026-01-20 23:24:33 +05:30 · 35888144eb
commit 35888144eb
parent 49b8a46d10
8 changed files with 221 additions and 256 deletions
--- a/surfsense_backend/app/connectors/github_connector.py
+++ b/surfsense_backend/app/connectors/github_connector.py
@ -1,130 +1,21 @@
 """
-GitHub connector using gitingest for efficient repository digestion.
+GitHub connector using gitingest CLI for efficient repository digestion.

-This connector replaces the previous file-by-file approach with a single
-digest generation per repository, dramatically reducing LLM API calls.
+This connector uses subprocess to call gitingest CLI, completely isolating
+it from any Python event loop/async complexity that can cause hangs in Celery.
 """

 import logging
+import os
+import subprocess
+import tempfile
 from dataclasses import dataclass

-from gitingest import ingest_async
-
 logger = logging.getLogger(__name__)

 # Maximum file size in bytes (5MB)
 MAX_FILE_SIZE = 5 * 1024 * 1024

-# Default patterns to exclude (recommended approach for comprehensive analysis)
-# Using only exclude_patterns ensures we don't miss any relevant file types
-DEFAULT_EXCLUDE_PATTERNS = [
-    # Dependencies
-    "node_modules/*",
-    "vendor/*",
-    "bower_components/*",
-    ".pnpm/*",
-    # Build artifacts / Caches
-    "build/*",
-    "dist/*",
-    "target/*",
-    "out/*",
-    "__pycache__/*",
-    "*.pyc",
-    ".cache/*",
-    ".next/*",
-    ".nuxt/*",
-    # Virtual environments
-    "venv/*",
-    ".venv/*",
-    "env/*",
-    ".env/*",
-    # IDE/Editor config
-    ".vscode/*",
-    ".idea/*",
-    ".project",
-    ".settings/*",
-    "*.swp",
-    "*.swo",
-    # Version control
-    ".git/*",
-    ".svn/*",
-    ".hg/*",
-    # Temporary / Logs
-    "tmp/*",
-    "temp/*",
-    "logs/*",
-    "*.log",
-    # Lock files (usually not needed for understanding code)
-    "package-lock.json",
-    "pnpm-lock.yaml",
-    "yarn.lock",
-    "uv.lock",
-    "Gemfile.lock",
-    "poetry.lock",
-    "Cargo.lock",
-    "composer.lock",
-    # Binary/media files
-    "*.png",
-    "*.jpg",
-    "*.jpeg",
-    "*.gif",
-    "*.ico",
-    "*.svg",
-    "*.webp",
-    "*.bmp",
-    "*.tiff",
-    "*.woff",
-    "*.woff2",
-    "*.ttf",
-    "*.eot",
-    "*.otf",
-    "*.mp3",
-    "*.mp4",
-    "*.wav",
-    "*.ogg",
-    "*.webm",
-    "*.avi",
-    "*.mov",
-    "*.pdf",
-    "*.doc",
-    "*.docx",
-    "*.xls",
-    "*.xlsx",
-    "*.ppt",
-    "*.pptx",
-    "*.zip",
-    "*.tar",
-    "*.tar.gz",
-    "*.tgz",
-    "*.rar",
-    "*.7z",
-    "*.exe",
-    "*.dll",
-    "*.so",
-    "*.dylib",
-    "*.bin",
-    "*.obj",
-    "*.o",
-    "*.a",
-    "*.lib",
-    # Minified files
-    "*.min.js",
-    "*.min.css",
-    # Source maps
-    "*.map",
-    # Database files
-    "*.db",
-    "*.sqlite",
-    "*.sqlite3",
-    # Coverage reports
-    "coverage/*",
-    ".coverage",
-    "htmlcov/*",
-    ".nyc_output/*",
-    # Test snapshots (can be large)
-    "__snapshots__/*",
-]
-

@dataclass
 class RepositoryDigest:
@ -149,21 +40,19 @@ class RepositoryDigest:

 class GitHubConnector:
    """
-    Connector for ingesting GitHub repositories using gitingest.
+    Connector for ingesting GitHub repositories using gitingest CLI.

-    This connector efficiently processes entire repositories into a single
-    digest, reducing the number of API calls and LLM invocations compared
-    to file-by-file processing.
+    Uses subprocess to run gitingest, which avoids all async/event loop
+    issues that can occur when mixing gitingest with Celery workers.
    """

    def __init__(self, token: str | None = None):
        """
-        Initializes the GitHub connector.
+        Initialize the GitHub connector.

        Args:
            token: Optional GitHub Personal Access Token (PAT).
                   Only required for private repositories.
-                   Public repositories can be ingested without a token.
        """
        self.token = token if token and token.strip() else None
        if self.token:
@ -171,72 +60,104 @@ class GitHubConnector:
        else:
            logger.info("GitHub connector initialized without token (public repos only).")

-    async def ingest_repository(
+    def ingest_repository(
        self,
        repo_full_name: str,
        branch: str | None = None,
-        include_patterns: list[str] | None = None,
-        exclude_patterns: list[str] | None = None,
        max_file_size: int = MAX_FILE_SIZE,
    ) -> RepositoryDigest | None:
        """
-        Ingest an entire repository and return a digest.
+        Ingest a repository using gitingest CLI via subprocess.
+
+        This approach completely isolates gitingest from Python's event loop,
+        avoiding any async/Celery conflicts.

        Args:
            repo_full_name: The full name of the repository (e.g., 'owner/repo').
            branch: Optional specific branch or tag to ingest.
-            include_patterns: Optional list of glob patterns for files to include.
-                             If None, includes all files (recommended).
-            exclude_patterns: Optional list of glob patterns for files to exclude.
-                             If None, uses DEFAULT_EXCLUDE_PATTERNS.
-            max_file_size: Maximum file size in bytes to include (default 5MB).
+            max_file_size: Maximum file size in bytes to include.

        Returns:
-            RepositoryDigest containing the summary, tree structure, and content,
-            or None if ingestion fails.
+            RepositoryDigest or None if ingestion fails.
        """
        repo_url = f"https://github.com/{repo_full_name}"

-        # Use only exclude_patterns by default (recommended for comprehensive analysis)
-        # This ensures we don't miss any relevant file types
-        exclude_pats = exclude_patterns if exclude_patterns is not None else DEFAULT_EXCLUDE_PATTERNS
-
-        logger.info(f"Starting gitingest for repository: {repo_full_name}")
+        logger.info(f"Starting gitingest CLI for repository: {repo_full_name}")

        try:
-            # Build kwargs dynamically
-            ingest_kwargs = {
-                "max_file_size": max_file_size,
-                "exclude_patterns": exclude_pats,
-                "include_gitignored": False,
-                "include_submodules": False,
-            }
+            # Create a temporary file for output
+            with tempfile.NamedTemporaryFile(
+                mode="w", suffix=".txt", delete=False
+            ) as tmp_file:
+                output_path = tmp_file.name

-            # Only add token if provided (required only for private repos)
-            if self.token:
-                ingest_kwargs["token"] = self.token
+            # Build the gitingest CLI command
+            cmd = [
+                "gitingest",
+                repo_url,
+                "--output", output_path,
+                "--max-size", str(max_file_size),
+                # Common exclude patterns
+                "-e", "node_modules/*",
+                "-e", "vendor/*",
+                "-e", ".git/*",
+                "-e", "__pycache__/*",
+                "-e", "dist/*",
+                "-e", "build/*",
+                "-e", "*.lock",
+                "-e", "package-lock.json",
+            ]

-            # Only add branch if specified
+            # Add branch if specified
            if branch:
-                ingest_kwargs["branch"] = branch
+                cmd.extend(["--branch", branch])

-            # Only add include_patterns if explicitly provided
-            if include_patterns is not None:
-                ingest_kwargs["include_patterns"] = include_patterns
+            # Set up environment with token if provided
+            env = os.environ.copy()
+            if self.token:
+                env["GITHUB_TOKEN"] = self.token

-            summary, tree, content = await ingest_async(repo_url, **ingest_kwargs)
+            logger.info(f"Running gitingest CLI: {' '.join(cmd[:5])}...")

-            if not content or not content.strip():
-                logger.warning(
-                    f"No content retrieved from repository: {repo_full_name}"
-                )
+            # Run gitingest as subprocess with timeout
+            result = subprocess.run(
+                cmd,
+                env=env,
+                capture_output=True,
+                text=True,
+                timeout=900,  # 5 minute timeout
+            )
+
+            if result.returncode != 0:
+                logger.error(f"gitingest failed: {result.stderr}")
+                # Clean up temp file
+                if os.path.exists(output_path):
+                    os.unlink(output_path)
                return None

+            # Read the output file
+            if not os.path.exists(output_path):
+                logger.error("gitingest did not create output file")
+                return None
+
+            with open(output_path, encoding="utf-8") as f:
+                full_content = f.read()
+
+            # Clean up temp file
+            os.unlink(output_path)
+
+            if not full_content or not full_content.strip():
+                logger.warning(f"No content retrieved from repository: {repo_full_name}")
+                return None
+
+            # Parse the gitingest output
+            # The output format is: summary + tree + content
+            # We'll extract what we can
            digest = RepositoryDigest(
                repo_full_name=repo_full_name,
-                summary=summary,
-                tree=tree,
-                content=content,
+                summary=f"Repository: {repo_full_name}",
+                tree="",  # gitingest CLI combines everything into one file
+                content=full_content,
                branch=branch,
            )

@ -246,50 +167,70 @@ class GitHubConnector:
            )
            return digest

+        except subprocess.TimeoutExpired:
+            logger.error(f"gitingest timed out for repository: {repo_full_name}")
+            return None
+        except FileNotFoundError:
+            logger.error(
+                "gitingest CLI not found. Falling back to Python library."
+            )
+            # Fall back to Python library
+            return self._ingest_with_python_library(repo_full_name, branch, max_file_size)
        except Exception as e:
            logger.error(f"Failed to ingest repository {repo_full_name}: {e}")
            return None

-    async def ingest_repositories(
+    def _ingest_with_python_library(
        self,
-        repo_full_names: list[str],
+        repo_full_name: str,
        branch: str | None = None,
-        include_patterns: list[str] | None = None,
-        exclude_patterns: list[str] | None = None,
        max_file_size: int = MAX_FILE_SIZE,
-    ) -> list[RepositoryDigest]:
+    ) -> RepositoryDigest | None:
        """
-        Ingest multiple repositories and return their digests.
-
-        Args:
-            repo_full_names: List of repository full names (e.g., ['owner/repo1', 'owner/repo2']).
-            branch: Optional specific branch or tag to ingest (applied to all repos).
-            include_patterns: Optional list of glob patterns for files to include.
-            exclude_patterns: Optional list of glob patterns for files to exclude.
-            max_file_size: Maximum file size in bytes to include.
-
-        Returns:
-            List of RepositoryDigest objects for successfully ingested repositories.
+        Fallback: Ingest using the Python library directly.
        """
-        digests = []
+        from gitingest import ingest

-        for repo_full_name in repo_full_names:
-            if not repo_full_name or not isinstance(repo_full_name, str):
-                logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
-                continue
+        repo_url = f"https://github.com/{repo_full_name}"

-            digest = await self.ingest_repository(
+        logger.info(f"Using Python gitingest library for: {repo_full_name}")
+
+        try:
+            kwargs = {
+                "max_file_size": max_file_size,
+                "exclude_patterns": [
+                    "node_modules/*",
+                    "vendor/*",
+                    ".git/*",
+                    "__pycache__/*",
+                    "dist/*",
+                    "build/*",
+                    "*.lock",
+                    "package-lock.json",
+                ],
+                "include_gitignored": False,
+                "include_submodules": False,
+            }
+
+            if self.token:
+                kwargs["token"] = self.token
+            if branch:
+                kwargs["branch"] = branch
+
+            summary, tree, content = ingest(repo_url, **kwargs)
+
+            if not content or not content.strip():
+                logger.warning(f"No content from {repo_full_name}")
+                return None
+
+            return RepositoryDigest(
                repo_full_name=repo_full_name,
+                summary=summary,
+                tree=tree,
+                content=content,
                branch=branch,
-                include_patterns=include_patterns,
-                exclude_patterns=exclude_patterns,
-                max_file_size=max_file_size,
            )

-            if digest:
-                digests.append(digest)
-
-        logger.info(
-            f"Ingested {len(digests)} out of {len(repo_full_names)} repositories."
-        )
-        return digests
+        except Exception as e:
+            logger.error(f"Python library failed for {repo_full_name}: {e}")
+            return None
--- a/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/github_indexer.py
@ -173,8 +173,13 @@ async def index_github_repos(
            logger.info(f"Ingesting repository: {repo_full_name}")

            try:
-                # Ingest the entire repository
-                digest = await github_client.ingest_repository(repo_full_name)
+                # Run gitingest via subprocess (isolated from event loop)
+                # Using to_thread to not block the async database operations
+                import asyncio
+
+                digest = await asyncio.to_thread(
+                    github_client.ingest_repository, repo_full_name
+                )

                if not digest:
                    logger.warning(
--- a/surfsense_backend/app/utils/validators.py
+++ b/surfsense_backend/app/utils/validators.py
@ -530,7 +530,10 @@ def validate_connector_config(
        #     "validators": {},
        # },
        "GITHUB_CONNECTOR": {
-            "required": ["GITHUB_PAT", "repo_full_names"],
+            # GITHUB_PAT is optional - only required for private repositories
+            # Public repositories can be indexed without authentication
+            "required": ["repo_full_names"],
+            "optional": ["GITHUB_PAT"],  # Optional - only needed for private repos
            "validators": {
                "repo_full_names": lambda: validate_list_field(
                    "repo_full_names", "repo_full_names"
--- a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json
+++ b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json
@ -24,11 +24,6 @@
 			"enabled": true,
 			"status": "warning",
 			"statusMessage": "Some requests may be blocked if not using Firecrawl."
-		},
-		"GITHUB_CONNECTOR": {
-			"enabled": false,
-			"status": "maintenance",
-			"statusMessage": "Rework in progress."
 		}
 	},
 	"globalSettings": {
--- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/github-connect-form.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/github-connect-form.tsx
@ -96,6 +96,7 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting
 					repo_full_names: repoList,
 				},
 				is_indexable: true,
+				is_active: true,
 				last_indexed_at: null,
 				periodic_indexing_enabled: periodicEnabled,
 				indexing_frequency_minutes: periodicEnabled ? parseInt(frequencyMinutes, 10) : null,
@ -119,16 +120,16 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting
 					<AlertTitle className="text-xs sm:text-sm">Personal Access Token (Optional)</AlertTitle>
 					<AlertDescription className="text-[10px] sm:text-xs !pl-0">
 						A GitHub PAT is only required for private repositories. Public repos work without a
-						token. Create one from{" "}
+						token. {" "}
 						<a
-							href="https://github.com/settings/tokens"
+							href="https://github.com/settings/tokens/new?description=surfsense&scopes=repo"
 							target="_blank"
 							rel="noopener noreferrer"
 							className="font-medium underline underline-offset-4"
 						>
-							GitHub Settings
+							Get your token
 						</a>{" "}
-						if needed.
+						.
 					</AlertDescription>
 				</div>
 			</Alert>
@ -324,20 +325,21 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting
 						<div>
 							<h3 className="text-sm sm:text-base font-semibold mb-2">How it works</h3>
 							<p className="text-[10px] sm:text-xs text-muted-foreground">
-								The GitHub connector uses a Personal Access Token (PAT) to authenticate with the
-								GitHub API. You provide a comma-separated list of repository full names (e.g.,
-								"owner/repo1, owner/repo2") that you want to index. The connector indexes relevant
-								files (code, markdown, text) from the selected repositories.
+								The GitHub connector ingests entire repositories in one pass using gitingest,
+								making it highly efficient. Provide a comma-separated list of repository full
+								names (e.g., "owner/repo1, owner/repo2") to index.
 							</p>
 							<ul className="mt-2 list-disc pl-5 text-[10px] sm:text-xs text-muted-foreground space-y-1">
 								<li>
-									The connector indexes files based on common code and documentation extensions.
+									<strong>Public repos:</strong> No authentication required.
 								</li>
-								<li>Large files (over 1MB) are skipped during indexing.</li>
-								<li>Only specified repositories are indexed.</li>
 								<li>
-									Indexing runs periodically (check connector settings for frequency) to keep
-									content up-to-date.
+									<strong>Private repos:</strong> Requires a GitHub Personal Access Token (PAT).
+								</li>
+								<li>Indexes code, documentation, and configuration files.</li>
+								<li>Large files (over 5MB) and binary files are automatically skipped.</li>
+								<li>
+									Periodic sync detects changes and only re-indexes when content has changed.
 								</li>
 							</ul>
 						</div>
@ -348,19 +350,23 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting
 								<Alert className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 mb-4">
 									<Info className="h-3 w-3 sm:h-4 sm:w-4" />
 									<AlertTitle className="text-[10px] sm:text-xs">
-										Personal Access Token Required
+										Personal Access Token (Optional)
 									</AlertTitle>
 									<AlertDescription className="text-[9px] sm:text-[10px]">
-										You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to fetch
-										repositories. The PAT will be stored securely to enable indexing.
+										A GitHub PAT is only needed for <strong>private repositories</strong>. Public
+										repos can be indexed without authentication. If you need to access private
+										repos, create a PAT with the 'repo' scope.
 									</AlertDescription>
 								</Alert>

 								<div className="space-y-4 sm:space-y-6">
 									<div>
 										<h4 className="text-[10px] sm:text-xs font-medium mb-2">
-											Step 1: Generate GitHub PAT
+											For Private Repositories Only: Generate GitHub PAT
 										</h4>
+										<p className="text-[10px] sm:text-xs text-muted-foreground mb-2">
+											Skip this step if you're only indexing public repositories.
+										</p>
 										<ol className="list-decimal pl-5 space-y-2 text-[10px] sm:text-xs text-muted-foreground">
 											<li>
 												Go to your GitHub{" "}
@ -375,46 +381,36 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting
 											</li>
 											<li>
 												Click on <strong>Personal access tokens</strong>, then choose{" "}
-												<strong>Tokens (classic)</strong> or <strong>Fine-grained tokens</strong>{" "}
-												(recommended if available).
+												<strong>Tokens (classic)</strong> or <strong>Fine-grained tokens</strong>.
 											</li>
 											<li>
-												Click <strong>Generate new token</strong> (and choose the appropriate type).
+												Click <strong>Generate new token</strong>.
 											</li>
 											<li>Give your token a descriptive name (e.g., "SurfSense Connector").</li>
-											<li>Set an expiration date for the token (recommended for security).</li>
 											<li>
-												Under <strong>Select scopes</strong> (for classic tokens) or{" "}
-												<strong>Repository access</strong> (for fine-grained), grant the necessary
-												permissions. At minimum, the <strong>`repo`</strong> scope (or equivalent
-												read access to repositories for fine-grained tokens) is required to read
-												repository content.
+												Grant the <strong>`repo`</strong> scope (for classic tokens) or read access
+												to the specific repositories you want to index (for fine-grained tokens).
 											</li>
 											<li>
-												Click <strong>Generate token</strong>.
-											</li>
-											<li>
-												<strong>Important:</strong> Copy your new PAT immediately. You won't be able
-												to see it again after leaving the page.
+												Click <strong>Generate token</strong> and copy it immediately.
 											</li>
 										</ol>
 									</div>

 									<div>
 										<h4 className="text-[10px] sm:text-xs font-medium mb-2">
-											Step 2: Specify repositories
+											Specify Repositories
 										</h4>
 										<p className="text-[10px] sm:text-xs text-muted-foreground mb-3">
 											Enter a comma-separated list of repository full names in the format
-											"owner/repo1, owner/repo2". The connector will index files from only the
-											specified repositories.
+											"owner/repo1, owner/repo2". For example: "facebook/react, vercel/next.js".
 										</p>
 										<Alert className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20">
 											<Info className="h-3 w-3 sm:h-4 sm:w-4" />
-											<AlertTitle className="text-[10px] sm:text-xs">Repository Access</AlertTitle>
+											<AlertTitle className="text-[10px] sm:text-xs">Public vs Private</AlertTitle>
 											<AlertDescription className="text-[9px] sm:text-[10px]">
-												Make sure your PAT has access to all repositories you want to index. Private
-												repositories require appropriate permissions.
+												Public repositories work without a PAT. For private repositories, ensure
+												your PAT has access to the repos you want to index.
 											</AlertDescription>
 										</Alert>
 									</div>
@ -424,36 +420,38 @@ export const GithubConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitting

 						<div className="space-y-4">
 							<div>
-								<h3 className="text-sm sm:text-base font-semibold mb-2">Indexing</h3>
+								<h3 className="text-sm sm:text-base font-semibold mb-2">Quick Start</h3>
 								<ol className="list-decimal pl-5 space-y-2 text-[10px] sm:text-xs text-muted-foreground mb-4">
 									<li>
-										Navigate to the Connector Dashboard and select the <strong>GitHub</strong>{" "}
-										Connector.
+										Enter the <strong>Repository Names</strong> you want to index (e.g.,
+										"facebook/react, vercel/next.js").
 									</li>
 									<li>
-										Enter your <strong>GitHub Personal Access Token</strong> in the form field.
+										<strong>(Optional)</strong> Add a GitHub PAT if indexing private repositories.
 									</li>
 									<li>
-										Enter a comma-separated list of <strong>Repository Names</strong> (e.g.,
-										"owner/repo1, owner/repo2").
+										Click <strong>Connect GitHub</strong> to start indexing.
 									</li>
 									<li>
-										Click <strong>Connect</strong> to establish the connection.
+										Enable <strong>Periodic Sync</strong> to automatically detect and index
+										changes.
 									</li>
-									<li>Once connected, your GitHub repositories will be indexed automatically.</li>
 								</ol>

 								<Alert className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20">
 									<Info className="h-3 w-3 sm:h-4 sm:w-4" />
 									<AlertTitle className="text-[10px] sm:text-xs">What Gets Indexed</AlertTitle>
 									<AlertDescription className="text-[9px] sm:text-[10px]">
-										<p className="mb-2">The GitHub connector indexes the following data:</p>
+										<p className="mb-2">The GitHub connector indexes:</p>
 										<ul className="list-disc pl-5 space-y-1">
-											<li>Code files from selected repositories</li>
-											<li>README files and Markdown documentation</li>
-											<li>Common text-based file formats</li>
-											<li>Repository metadata and structure</li>
+											<li>All code files (Python, JavaScript, TypeScript, etc.)</li>
+											<li>Documentation (README, Markdown, text files)</li>
+											<li>Configuration files (JSON, YAML, TOML, etc.)</li>
+											<li>Repository structure and file tree</li>
 										</ul>
+										<p className="mt-2">
+											Binary files, images, and build artifacts are automatically excluded.
+										</p>
 									</AlertDescription>
 								</Alert>
 							</div>
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/github-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/github-config.tsx
@ -1,8 +1,9 @@
 "use client";

-import { KeyRound } from "lucide-react";
+import { Info, KeyRound } from "lucide-react";
 import type { FC } from "react";
 import { useEffect, useState } from "react";
+import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
 import { Badge } from "@/components/ui/badge";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
@ -79,6 +80,26 @@ export const GithubConfig: FC<GithubConfigProps> = ({

 	return (
 		<div className="space-y-6">
+			<Alert className="bg-slate-400/5 dark:bg-white/5 border-slate-400/20 p-2 sm:p-3 flex items-center [&>svg]:relative [&>svg]:left-0 [&>svg]:top-0 [&>svg+div]:translate-y-0">
+				<Info className="h-3 w-3 sm:h-4 sm:w-4 shrink-0 ml-1" />
+				<div className="-ml-1">
+					<AlertTitle className="text-xs sm:text-sm">Personal Access Token (Optional)</AlertTitle>
+					<AlertDescription className="text-[10px] sm:text-xs !pl-0">
+						A GitHub PAT is only required for private repositories. Public repos work without a
+						token. Create one from{" "}
+						<a
+							href="https://github.com/settings/tokens/new?description=surfsense&scopes=repo"
+							target="_blank"
+							rel="noopener noreferrer"
+							className="font-medium underline underline-offset-4"
+						>
+							GitHub Settings
+						</a>{" "}
+						if needed.
+					</AlertDescription>
+				</div>
+			</Alert>
+
 			{/* Connector Name */}
 			<div className="rounded-xl border border-border bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6 space-y-3 sm:space-y-4">
 				<div className="space-y-2">
@ -105,7 +126,7 @@ export const GithubConfig: FC<GithubConfigProps> = ({
 					<div className="space-y-2">
 						<Label className="flex items-center gap-2 text-xs sm:text-sm">
 							<KeyRound className="h-4 w-4" />
-							GitHub Personal Access Token
+							GitHub Personal Access Token (optional)
 						</Label>
 						<Input
 							type="password"
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx
@ -206,9 +206,10 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
 						{/* Date range selector and periodic sync - only shown for indexable connectors */}
 						{connector.is_indexable && (
 							<>
-								{/* Date range selector - not shown for Google Drive (uses folder selection) or Webcrawler (uses config) */}
+								{/* Date range selector - not shown for Google Drive, Webcrawler, or GitHub (indexes full repo snapshots) */}
 								{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
-									connector.connector_type !== "WEBCRAWLER_CONNECTOR" && (
+									connector.connector_type !== "WEBCRAWLER_CONNECTOR" &&
+									connector.connector_type !== "GITHUB_CONNECTOR" && (
 										<DateRangeSelector
 											startDate={startDate}
 											endDate={endDate}
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/indexing-configuration-view.tsx
@ -151,9 +151,10 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
 						{/* Date range selector and periodic sync - only shown for indexable connectors */}
 						{connector?.is_indexable && (
 							<>
-								{/* Date range selector - not shown for Google Drive (uses folder selection) or Webcrawler (uses config) */}
+								{/* Date range selector - not shown for Google Drive, Webcrawler, or GitHub (indexes full repo snapshots) */}
 								{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
-									config.connectorType !== "WEBCRAWLER_CONNECTOR" && (
+									config.connectorType !== "WEBCRAWLER_CONNECTOR" &&
+									config.connectorType !== "GITHUB_CONNECTOR" && (
 										<DateRangeSelector
 											startDate={startDate}
 											endDate={endDate}