select repos when adding gh connector

This commit is contained in:
Adamsmith6300 2025-04-16 19:59:38 -07:00
parent 2b7a1b1082
commit ae8c74a5aa
4 changed files with 330 additions and 138 deletions

View file

@ -9,7 +9,7 @@ POST /search-source-connectors/{connector_id}/index - Index content from a conne
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR).
"""
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.exc import IntegrityError
@ -18,8 +18,9 @@ from app.db import get_async_session, User, SearchSourceConnector, SearchSourceC
from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead
from app.users import current_active_user
from app.utils.check_ownership import check_ownership
from pydantic import ValidationError
from pydantic import ValidationError, BaseModel, Field
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos
from app.connectors.github_connector import GitHubConnector
from datetime import datetime, timezone
import logging
@ -28,6 +29,34 @@ logger = logging.getLogger(__name__)
router = APIRouter()
# --- New Schema for GitHub PAT ---
class GitHubPATRequest(BaseModel):
github_pat: str = Field(..., description="GitHub Personal Access Token")
# --- New Endpoint to list GitHub Repositories ---
@router.post("/github/repositories/", response_model=List[Dict[str, Any]])
async def list_github_repositories(
pat_request: GitHubPATRequest,
user: User = Depends(current_active_user) # Ensure the user is logged in
):
"""
Fetches a list of repositories accessible by the provided GitHub PAT.
The PAT is used for this request only and is not stored.
"""
try:
# Initialize GitHubConnector with the provided PAT
github_client = GitHubConnector(token=pat_request.github_pat)
# Fetch repositories
repositories = github_client.get_user_repositories()
return repositories
except ValueError as e:
# Handle invalid token error specifically
logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}")
raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}")
except Exception as e:
logger.error(f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to fetch GitHub repositories.")
@router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead)
async def create_search_source_connector(
connector: SearchSourceConnectorCreate,

View file

@ -4,7 +4,6 @@ from typing import Dict, Any
from pydantic import BaseModel, field_validator
from .base import IDModel, TimestampModel
from app.db import SearchSourceConnectorType
from fastapi import HTTPException
class SearchSourceConnectorBase(BaseModel):
name: str
@ -59,8 +58,8 @@ class SearchSourceConnectorBase(BaseModel):
raise ValueError("NOTION_INTEGRATION_TOKEN cannot be empty")
elif connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
# For GITHUB_CONNECTOR, only allow GITHUB_PAT
allowed_keys = ["GITHUB_PAT"]
# For GITHUB_CONNECTOR, only allow GITHUB_PAT and repo_full_names
allowed_keys = ["GITHUB_PAT", "repo_full_names"]
if set(config.keys()) != set(allowed_keys):
raise ValueError(f"For GITHUB_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
@ -68,6 +67,11 @@ class SearchSourceConnectorBase(BaseModel):
if not config.get("GITHUB_PAT"):
raise ValueError("GITHUB_PAT cannot be empty")
# Ensure the repo_full_names is present and is a non-empty list
repo_full_names = config.get("repo_full_names")
if not isinstance(repo_full_names, list) or not repo_full_names:
raise ValueError("repo_full_names must be a non-empty list of strings")
return config
class SearchSourceConnectorCreate(SearchSourceConnectorBase):

View file

@ -1,4 +1,4 @@
from typing import Optional, List, Dict, Any, Tuple
from typing import Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.future import select
@ -626,24 +626,26 @@ async def index_github_repos(
if not connector:
return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector"
# 2. Get the GitHub PAT from the connector config
# 2. Get the GitHub PAT and selected repositories from the connector config
github_pat = connector.config.get("GITHUB_PAT")
repo_full_names_to_index = connector.config.get("repo_full_names")
if not github_pat:
return 0, "GitHub Personal Access Token (PAT) not found in connector config"
if not repo_full_names_to_index or not isinstance(repo_full_names_to_index, list):
return 0, "'repo_full_names' not found or is not a list in connector config"
# 3. Initialize GitHub connector client
try:
github_client = GitHubConnector(token=github_pat)
except ValueError as e:
return 0, f"Failed to initialize GitHub client: {str(e)}"
# 4. Get list of accessible repositories
repositories = github_client.get_user_repositories()
if not repositories:
logger.info("No accessible GitHub repositories found for the provided token.")
return 0, "No accessible GitHub repositories found."
logger.info(f"Found {len(repositories)} repositories to potentially index.")
# 4. Validate selected repositories
# For simplicity, we'll proceed with the list provided.
# If a repo is inaccessible, get_repository_files will likely fail gracefully later.
logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.")
# 5. Get existing documents for this search space and connector type to prevent duplicates
existing_docs_result = await session.execute(
@ -658,11 +660,10 @@ async def index_github_repos(
existing_docs_lookup = {doc.document_metadata.get("full_path"): doc for doc in existing_docs if doc.document_metadata.get("full_path")}
logger.info(f"Found {len(existing_docs_lookup)} existing GitHub documents in database for search space {search_space_id}")
# 6. Iterate through repositories and index files
for repo_info in repositories:
repo_full_name = repo_info.get("full_name")
if not repo_full_name:
logger.warning(f"Skipping repository with missing full_name: {repo_info.get('name')}")
# 6. Iterate through selected repositories and index files
for repo_full_name in repo_full_names_to_index:
if not repo_full_name or not isinstance(repo_full_name, str):
logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
continue
logger.info(f"Processing repository: {repo_full_name}")

View file

@ -7,7 +7,7 @@ import { zodResolver } from "@hookform/resolvers/zod";
import { useForm } from "react-hook-form";
import * as z from "zod";
import { toast } from "sonner";
import { ArrowLeft, Check, Info, Loader2, Github } from "lucide-react";
import { ArrowLeft, Check, Info, Loader2, Github, CircleAlert, ListChecks } from "lucide-react";
// Assuming useSearchSourceConnectors hook exists and works similarly
import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors";
@ -42,9 +42,10 @@ import {
AccordionTrigger,
} from "@/components/ui/accordion";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { Checkbox } from "@/components/ui/checkbox";
// Define the form schema with Zod for GitHub
const githubConnectorFormSchema = z.object({
// Define the form schema with Zod for GitHub PAT entry step
const githubPatFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
@ -58,61 +59,144 @@ const githubConnectorFormSchema = z.object({
});
// Define the type for the form values
type GithubConnectorFormValues = z.infer<typeof githubConnectorFormSchema>;
type GithubPatFormValues = z.infer<typeof githubPatFormSchema>;
// Type for fetched GitHub repositories
interface GithubRepo {
id: number;
name: string;
full_name: string;
private: boolean;
url: string;
description: string | null;
last_updated: string | null;
}
export default function GithubConnectorPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors(); // Assuming this hook exists
const [step, setStep] = useState<'enter_pat' | 'select_repos'>('enter_pat');
const [isFetchingRepos, setIsFetchingRepos] = useState(false);
const [isCreatingConnector, setIsCreatingConnector] = useState(false);
const [repositories, setRepositories] = useState<GithubRepo[]>([]);
const [selectedRepos, setSelectedRepos] = useState<string[]>([]);
const [connectorName, setConnectorName] = useState<string>("GitHub Connector");
const [validatedPat, setValidatedPat] = useState<string>(""); // Store the validated PAT
// Initialize the form
const form = useForm<GithubConnectorFormValues>({
resolver: zodResolver(githubConnectorFormSchema),
const { createConnector } = useSearchSourceConnectors();
// Initialize the form for PAT entry
const form = useForm<GithubPatFormValues>({
resolver: zodResolver(githubPatFormSchema),
defaultValues: {
name: "GitHub Connector",
name: connectorName,
github_pat: "",
},
});
// Handle form submission
const onSubmit = async (values: GithubConnectorFormValues) => {
setIsSubmitting(true);
// Function to fetch repositories using the new backend endpoint
const fetchRepositories = async (values: GithubPatFormValues) => {
setIsFetchingRepos(true);
setConnectorName(values.name); // Store the name
setValidatedPat(values.github_pat); // Store the PAT temporarily
try {
const token = localStorage.getItem('surfsense_bearer_token');
if (!token) {
throw new Error('No authentication token found');
}
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/github/repositories/`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`
},
body: JSON.stringify({ github_pat: values.github_pat })
}
);
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `Failed to fetch repositories: ${response.statusText}`);
}
const data: GithubRepo[] = await response.json();
setRepositories(data);
setStep('select_repos'); // Move to the next step
toast.success(`Found ${data.length} repositories.`);
} catch (error) {
console.error("Error fetching GitHub repositories:", error);
const errorMessage = error instanceof Error ? error.message : "Failed to fetch repositories. Please check the PAT and try again.";
toast.error(errorMessage);
} finally {
setIsFetchingRepos(false);
}
};
// Handle final connector creation
const handleCreateConnector = async () => {
if (selectedRepos.length === 0) {
toast.warning("Please select at least one repository to index.");
return;
}
setIsCreatingConnector(true);
try {
await createConnector({
name: values.name,
name: connectorName, // Use the stored name
connector_type: "GITHUB_CONNECTOR",
config: {
GITHUB_PAT: values.github_pat,
GITHUB_PAT: validatedPat, // Use the stored validated PAT
repo_full_names: selectedRepos, // Add the selected repo names
},
is_indexable: true, // GitHub connector is indexable
last_indexed_at: null, // New connector hasn't been indexed
is_indexable: true,
last_indexed_at: null,
});
toast.success("GitHub connector created successfully!");
// Navigate back to connectors management page (or the add page)
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) { // Added type check for error
} catch (error) {
console.error("Error creating GitHub connector:", error);
// Display specific backend error message if available
const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector. Please check the PAT and permissions.";
const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector.";
toast.error(errorMessage);
} finally {
setIsSubmitting(false);
setIsCreatingConnector(false);
}
};
// Handle checkbox changes
const handleRepoSelection = (repoFullName: string, checked: boolean) => {
setSelectedRepos(prev =>
checked
? [...prev, repoFullName]
: prev.filter(name => name !== repoFullName)
);
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
onClick={() => {
if (step === 'select_repos') {
// Go back to PAT entry, clear sensitive/fetched data
setStep('enter_pat');
setRepositories([]);
setSelectedRepos([]);
setValidatedPat("");
// Reset form PAT field, keep name
form.reset({ name: connectorName, github_pat: "" });
} else {
router.push(`/dashboard/${searchSpaceId}/connectors/add`);
}
}}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Add Connectors
{step === 'select_repos' ? "Back to PAT Entry" : "Back to Add Connectors"}
</Button>
<motion.div
@ -129,30 +213,38 @@ export default function GithubConnectorPage() {
<TabsContent value="connect">
<Card className="border-2 border-border">
<CardHeader>
<CardTitle className="text-2xl font-bold flex items-center gap-2"><Github className="h-6 w-6" /> Connect GitHub Account</CardTitle>
<CardTitle className="text-2xl font-bold flex items-center gap-2">
{step === 'enter_pat' ? <Github className="h-6 w-6" /> : <ListChecks className="h-6 w-6" />}
{step === 'enter_pat' ? "Connect GitHub Account" : "Select Repositories to Index"}
</CardTitle>
<CardDescription>
Integrate with GitHub using a Personal Access Token (PAT) to search and retrieve information from accessible repositories. This connector can index your code and documentation.
{step === 'enter_pat'
? "Provide a name and GitHub Personal Access Token (PAT) to fetch accessible repositories."
: `Select which repositories you want SurfSense to index for search. Found ${repositories.length} repositories accessible via your PAT.`
}
</CardDescription>
</CardHeader>
<Form {...form}>
{step === 'enter_pat' && (
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>GitHub Personal Access Token (PAT) Required</AlertTitle>
<AlertDescription>
You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to use this connector. You can create one from your
You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to fetch repositories. You can create one from your{' '}
<a
href="https://github.com/settings/personal-access-tokens"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4 ml-1"
className="font-medium underline underline-offset-4"
>
GitHub Developer Settings
</a>.
</a>. The PAT will be used to fetch repositories and then stored securely to enable indexing.
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<form onSubmit={form.handleSubmit(fetchRepositories)} className="space-y-6">
<FormField
control={form.control}
name="name"
@ -184,7 +276,7 @@ export default function GithubConnectorPage() {
/>
</FormControl>
<FormDescription>
Your GitHub PAT will be encrypted and stored securely. Ensure it has the necessary 'repo' scopes.
Enter your GitHub PAT here to fetch your repositories. It will be stored encrypted later.
</FormDescription>
<FormMessage />
</FormItem>
@ -194,32 +286,101 @@ export default function GithubConnectorPage() {
<div className="flex justify-end">
<Button
type="submit"
disabled={isSubmitting}
disabled={isFetchingRepos}
className="w-full sm:w-auto"
>
{isSubmitting ? (
{isFetchingRepos ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
Fetching Repositories...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect GitHub
</>
"Fetch Repositories"
)}
</Button>
</div>
</form>
</Form>
</CardContent>
)}
{step === 'select_repos' && (
<CardContent>
{repositories.length === 0 ? (
<Alert variant="destructive">
<CircleAlert className="h-4 w-4" />
<AlertTitle>No Repositories Found</AlertTitle>
<AlertDescription>
No repositories were found or accessible with the provided PAT. Please check the token and its permissions, then go back and try again.
</AlertDescription>
</Alert>
) : (
<div className="space-y-4">
<FormLabel>Repositories ({selectedRepos.length} selected)</FormLabel>
<div className="h-64 w-full rounded-md border p-4 overflow-y-auto">
{repositories.map((repo) => (
<div key={repo.id} className="flex items-center space-x-2 mb-2 py-1">
<Checkbox
id={`repo-${repo.id}`}
checked={selectedRepos.includes(repo.full_name)}
onCheckedChange={(checked) => handleRepoSelection(repo.full_name, !!checked)}
/>
<label
htmlFor={`repo-${repo.id}`}
className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
>
{repo.full_name} {repo.private && "(Private)"}
</label>
</div>
))}
</div>
<FormDescription>
Select the repositories you wish to index. Only checked repositories will be processed.
</FormDescription>
<div className="flex justify-between items-center pt-4">
<Button
variant="outline"
onClick={() => {
setStep('enter_pat');
setRepositories([]);
setSelectedRepos([]);
setValidatedPat("");
form.reset({ name: connectorName, github_pat: "" });
}}
>
Back
</Button>
<Button
onClick={handleCreateConnector}
disabled={isCreatingConnector || selectedRepos.length === 0}
className="w-full sm:w-auto"
>
{isCreatingConnector ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Creating Connector...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Create Connector
</>
)}
</Button>
</div>
</div>
)}
</CardContent>
)}
</Form>
<CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
<h4 className="text-sm font-medium">What you get with GitHub integration:</h4>
<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
<li>Search through code and documentation in your repositories</li>
<li>Search through code and documentation in your selected repositories</li>
<li>Access READMEs, Markdown files, and common code files</li>
<li>Connect your project knowledge directly to your search space</li>
<li>Index your repositories for enhanced search capabilities</li>
<li>Index your selected repositories for enhanced search capabilities</li>
</ul>
</CardFooter>
</Card>
@ -237,27 +398,20 @@ export default function GithubConnectorPage() {
<div>
<h3 className="text-xl font-semibold mb-2">How it works</h3>
<p className="text-muted-foreground">
The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. It fetches information about repositories accessible to the token and indexes relevant files (code, markdown, text).
The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. First, it fetches a list of repositories accessible to the token. You then select which repositories you want to index. The connector indexes relevant files (code, markdown, text) from only the selected repositories.
</p>
<ul className="mt-2 list-disc pl-5 text-muted-foreground">
<li>The connector indexes files based on common code and documentation extensions.</li>
<li>Large files (over 1MB) are skipped during indexing.</li>
<li>Only selected repositories are indexed.</li>
<li>Indexing runs periodically (check connector settings for frequency) to keep content up-to-date.</li>
</ul>
</div>
<Accordion type="single" collapsible className="w-full">
<AccordionItem value="create_pat">
<AccordionTrigger className="text-lg font-medium">Step 1: Create a GitHub PAT</AccordionTrigger>
<AccordionContent className="space-y-4">
<Alert className="bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>Token Security</AlertTitle>
<AlertDescription>
Treat your PAT like a password. Store it securely and consider using fine-grained tokens if possible.
</AlertDescription>
</Alert>
<AccordionTrigger className="text-lg font-medium">Step 1: Generate GitHub PAT</AccordionTrigger>
<AccordionContent>
<div className="space-y-6">
<div>
<h4 className="font-medium mb-2">Generating a Token:</h4>
@ -280,9 +434,13 @@ export default function GithubConnectorPage() {
<AccordionTrigger className="text-lg font-medium">Step 2: Connect in SurfSense</AccordionTrigger>
<AccordionContent className="space-y-4">
<ol className="list-decimal pl-5 space-y-3">
<li>Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field on the "Connect GitHub" tab.</li>
<li>Optionally, give the connector a custom name.</li>
<li>Click the <strong>Connect GitHub</strong> button.</li>
<li>Navigate to the "Connect GitHub" tab.</li>
<li>Enter a name for your connector.</li>
<li>Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field.</li>
<li>Click <strong>Fetch Repositories</strong>.</li>
<li>If the PAT is valid, you'll see a list of your accessible repositories.</li>
<li>Select the repositories you want SurfSense to index using the checkboxes.</li>
<li>Click the <strong>Create Connector</strong> button.</li>
<li>If the connection is successful, you will be redirected and can start indexing from the Connectors page.</li>
</ol>
</AccordionContent>