From 2156c94449cde0aa4105473e06c5d542d52b834e Mon Sep 17 00:00:00 2001 From: akhisud3195 Date: Fri, 9 May 2025 09:33:12 +0530 Subject: [PATCH 01/26] Improve function call rendering style --- .../playground/components/messages.tsx | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/apps/rowboat/app/projects/[projectId]/playground/components/messages.tsx b/apps/rowboat/app/projects/[projectId]/playground/components/messages.tsx index 36a9ae80..77d9b63b 100644 --- a/apps/rowboat/app/projects/[projectId]/playground/components/messages.tsx +++ b/apps/rowboat/app/projects/[projectId]/playground/components/messages.tsx @@ -6,7 +6,7 @@ import { Workflow } from "@/app/lib/types/workflow_types"; import { WorkflowTool } from "@/app/lib/types/workflow_types"; import MarkdownContent from "@/app/lib/components/markdown-content"; import { apiV1 } from "rowboat-shared"; -import { MessageSquareIcon, EllipsisIcon, CircleCheckIcon, ChevronRightIcon, ChevronDownIcon, ChevronUpIcon, XIcon, PlusIcon } from "lucide-react"; +import { MessageSquareIcon, EllipsisIcon, CircleCheckIcon, ChevronRightIcon, ChevronDownIcon, ChevronUpIcon, XIcon, PlusIcon, CodeIcon, CheckCircleIcon, FileTextIcon } from "lucide-react"; import { TestProfile } from "@/app/lib/types/testing_types"; import { ProfileContextBox } from "./profile-context-box"; @@ -258,18 +258,19 @@ function ClientToolCall({
{!availableResult && } - {availableResult && } -
- Function Call: + {availableResult && } +
+ Function Call: + {toolCall.function.name} - +
- - {availableResult && } + } /> + {availableResult && } />}
@@ -280,11 +281,13 @@ function ClientToolCall({ function ExpandableContent({ label, content, - expanded = false + expanded = false, + icon }: { label: string, content: string | object | undefined, - expanded?: boolean + expanded?: boolean, + icon?: React.ReactNode }) { const [isExpanded, setIsExpanded] = useState(expanded); @@ -314,6 +317,7 @@ function ExpandableContent({
{!isExpanded && } {isExpanded && } + {icon && {icon}}
{label}
{isExpanded && ( @@ -322,7 +326,10 @@ function ExpandableContent({ ) : ( -
+                
                     {formattedContent}
                 
) From b80eaffbe9318d2a17493505228d742ba4c8d1f6 Mon Sep 17 00:00:00 2001 From: Ramnique Singh <30795890+ramnique@users.noreply.github.com> Date: Wed, 30 Apr 2025 23:36:49 +0530 Subject: [PATCH 02/26] Add support for RAG --- README.md | 4 +- apps/docs/docs/using_rag.md | 92 ++ apps/docs/mkdocs.yml | 1 + .../rowboat/app/actions/datasource_actions.ts | 72 +- .../rowboat/app/api/uploads/[fileId]/route.ts | 87 ++ apps/rowboat/app/lib/embedding.ts | 13 +- apps/rowboat/app/lib/feature_flags.ts | 2 + .../rowboat/app/lib/types/datasource_types.ts | 13 +- .../sources/[sourceId]/source-page.tsx | 11 +- .../sources/components/files-source.tsx | 54 +- .../sources/components/sources-list.tsx | 10 +- .../projects/[projectId]/sources/new/form.tsx | 19 +- .../projects/[projectId]/sources/new/page.tsx | 3 +- apps/rowboat/app/scripts/delete_qdrant.ts | 10 +- apps/rowboat/app/scripts/rag_files_worker.ts | 99 +- apps/rowboat/app/scripts/rag_text_worker.ts | 1 - apps/rowboat/app/scripts/rag_urls_worker.ts | 1 - apps/rowboat/app/scripts/setup_qdrant.ts | 22 +- apps/rowboat/package-lock.json | 843 ++---------------- apps/rowboat/package.json | 4 +- docker-compose.yml | 62 +- start.sh | 29 + 22 files changed, 552 insertions(+), 900 deletions(-) create mode 100644 apps/docs/docs/using_rag.md create mode 100644 apps/rowboat/app/api/uploads/[fileId]/route.ts create mode 100755 start.sh diff --git a/README.md b/README.md index bc806147..5dad330d 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,11 @@ Powered by OpenAI's Agents SDK, Rowboat is the fastest way to build multi-agents export OPENAI_API_KEY=your-openai-api-key ``` -2. Clone the repository and start Rowboat docker +2. Clone the repository and start Rowboat ```bash git clone git@github.com:rowboatlabs/rowboat.git cd rowboat - docker-compose up --build + ./start.sh ``` 3. Access the app at [http://localhost:3000](http://localhost:3000). diff --git a/apps/docs/docs/using_rag.md b/apps/docs/docs/using_rag.md new file mode 100644 index 00000000..cf0a2e97 --- /dev/null +++ b/apps/docs/docs/using_rag.md @@ -0,0 +1,92 @@ +# Using RAG in Rowboat + +Rowboat provides multiple ways to enhance your agents with Retrieval-Augmented Generation (RAG). This guide will help you set up and use each RAG feature. + +## Quick Start + +Text RAG and local file uploads are enabled by default - no configuration needed! Just start using them right away. + +## Available RAG Features + +### 1. Text RAG +āœ… Enabled by default: + +- Process and reason over text content directly +- No configuration required + +### 2. Local File Uploads +āœ… Enabled by default: + +- Upload PDF files directly from your device +- Files are stored locally +- No configuration required +- Files are parsed using OpenAI by default + +### 3. S3 File Uploads +To enable S3 file uploads, set the following variables: + +```bash +# Enable S3 uploads +export USE_RAG_S3_UPLOADS=true + +# S3 Configuration +export AWS_ACCESS_KEY_ID=your_access_key +export AWS_SECRET_ACCESS_KEY=your_secret_key +export RAG_UPLOADS_S3_BUCKET=your_bucket_name +export RAG_UPLOADS_S3_REGION=your_region +``` + +### 4. URL Scraping +To enable URL scraping, set the following variables: + +```bash +# Enable URL scraping +export USE_RAG_SCRAPING=true + +# Firecrawl API key for web scraping +export FIRECRAWL_API_KEY=your_firecrawl_api_key +``` + +## File Parsing Options + +### Default Parsing (OpenAI) +By default, uploaded PDF files are parsed using `gpt-4o`. You can customize this by setting the following: + +```bash +# Override the default parsing model +export FILE_PARSING_MODEL=your-preferred-model +``` + +You can also change the model provider like so: +```bash +# Optional: Override the parsing provider settings +export FILE_PARSING_PROVIDER_BASE_URL=your-provider-base-url +export FILE_PARSING_PROVIDER_API_KEY=your-provider-api-key +``` + +### Using Gemini for File Parsing +To use Google's Gemini model for parsing uploaded PDFs, set the following variable: + +```bash +# Enable Gemini for file parsing +export USE_GEMINI_FILE_PARSING=true +export GOOGLE_API_KEY=your_google_api_key +``` + +## Embedding Model options + +By default, Rowboat uses OpenAI's `text-embedding-3-small` model for generating embeddings. You can customize this by setting the following: + +```bash +# Override the default embedding model +export EMBEDDING_MODEL=your-preferred-model +``` + +You can also change the model provider like so: +```bash +# Optional: Override the embedding provider settings +export EMBEDDING_PROVIDER_BASE_URL=your-provider-base-url +export EMBEDDING_PROVIDER_API_KEY=your-provider-api-key +``` + +If you don't specify the provider settings, Rowboat will use OpenAI as the default provider. \ No newline at end of file diff --git a/apps/docs/mkdocs.yml b/apps/docs/mkdocs.yml index 39b0c22d..4e5eb44c 100644 --- a/apps/docs/mkdocs.yml +++ b/apps/docs/mkdocs.yml @@ -14,6 +14,7 @@ nav: - Test chats in the playground: playground.md - Add tools: add_tools.md - Update agents: update_agents.md + - Using RAG: using_rag.md - API & SDK: - Using the API: using_the_api.md diff --git a/apps/rowboat/app/actions/datasource_actions.ts b/apps/rowboat/app/actions/datasource_actions.ts index 806e1709..bfe5feed 100644 --- a/apps/rowboat/app/actions/datasource_actions.ts +++ b/apps/rowboat/app/actions/datasource_actions.ts @@ -10,6 +10,7 @@ import { WithStringId } from "../lib/types/types"; import { DataSourceDoc } from "../lib/types/datasource_types"; import { DataSource } from "../lib/types/datasource_types"; import { uploadsS3Client } from "../lib/uploads_s3_client"; +import { USE_RAG_S3_UPLOADS } from "../lib/feature_flags"; export async function getDataSource(projectId: string, sourceId: string): Promise>> { await projectAuthCheck(projectId); @@ -279,26 +280,27 @@ export async function getDownloadUrlForFile( ): Promise { await projectAuthCheck(projectId); await getDataSource(projectId, sourceId); - - // fetch s3 key for file const file = await dataSourceDocsCollection.findOne({ sourceId, _id: new ObjectId(fileId), - 'data.type': 'file', + 'data.type': { $in: ['file_local', 'file_s3'] }, }); if (!file) { throw new Error('File not found'); } - if (file.data.type !== 'file') { - throw new Error('File not found'); + + // if local, return path + if (file.data.type === 'file_local') { + return `/api/uploads/${fileId}`; + } else if (file.data.type === 'file_s3') { + const command = new GetObjectCommand({ + Bucket: process.env.RAG_UPLOADS_S3_BUCKET, + Key: file.data.s3Key, + }); + return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute } - const command = new GetObjectCommand({ - Bucket: process.env.RAG_UPLOADS_S3_BUCKET, - Key: file.data.s3Key, - }); - - return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute + throw new Error('Invalid file type'); } export async function getUploadUrlsForFilesDataSource( @@ -307,37 +309,47 @@ export async function getUploadUrlsForFilesDataSource( files: { name: string; type: string; size: number }[] ): Promise<{ fileId: string, - presignedUrl: string, - s3Key: string, + uploadUrl: string, + path: string, }[]> { await projectAuthCheck(projectId); const source = await getDataSource(projectId, sourceId); - if (source.data.type !== 'files') { + if (source.data.type !== 'files_local' && source.data.type !== 'files_s3') { throw new Error('Invalid files data source'); } const urls: { fileId: string, - presignedUrl: string, - s3Key: string, + uploadUrl: string, + path: string, }[] = []; for (const file of files) { const fileId = new ObjectId().toString(); - const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId - const s3Key = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`; - // Generate presigned URL - const command = new PutObjectCommand({ - Bucket: process.env.RAG_UPLOADS_S3_BUCKET, - Key: s3Key, - ContentType: file.type, - }); - const presignedUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes - urls.push({ - fileId, - presignedUrl, - s3Key, - }); + + if (source.data.type === 'files_s3') { + // Generate presigned URL + const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId + const path = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`; + const command = new PutObjectCommand({ + Bucket: process.env.RAG_UPLOADS_S3_BUCKET, + Key: path, + ContentType: file.type, + }); + const uploadUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes + urls.push({ + fileId, + uploadUrl, + path, + }); + } else if (source.data.type === 'files_local') { + // Generate local upload URL + urls.push({ + fileId, + uploadUrl: '/api/uploads/' + fileId, + path: '/api/uploads/' + fileId, + }); + } } return urls; diff --git a/apps/rowboat/app/api/uploads/[fileId]/route.ts b/apps/rowboat/app/api/uploads/[fileId]/route.ts new file mode 100644 index 00000000..eef03da7 --- /dev/null +++ b/apps/rowboat/app/api/uploads/[fileId]/route.ts @@ -0,0 +1,87 @@ +import { NextRequest, NextResponse } from 'next/server'; +import path from 'path'; +import fs from 'fs/promises'; +import fsSync from 'fs'; +import { dataSourceDocsCollection } from '@/app/lib/mongodb'; +import { ObjectId } from 'mongodb'; + +const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads'; + +// PUT endpoint to handle file uploads +export async function PUT( + request: NextRequest, + { params }: { params: { fileId: string } } +) { + const fileId = params.fileId; + if (!fileId) { + return NextResponse.json({ error: 'Missing file ID' }, { status: 400 }); + } + + const filePath = path.join(UPLOADS_DIR, fileId); + + try { + const data = await request.arrayBuffer(); + await fs.writeFile(filePath, new Uint8Array(data)); + + return NextResponse.json({ success: true }); + } catch (error) { + console.error('Error saving file:', error); + return NextResponse.json( + { error: 'Failed to save file' }, + { status: 500 } + ); + } +} + +// GET endpoint to handle file downloads +export async function GET( + request: NextRequest, + { params }: { params: { fileId: string } } +) { + const fileId = params.fileId; + if (!fileId) { + return NextResponse.json({ error: 'Missing file ID' }, { status: 400 }); + } + + const filePath = path.join(UPLOADS_DIR, fileId); + + // get mimetype from database + const doc = await dataSourceDocsCollection.findOne({ _id: new ObjectId(fileId) }); + if (!doc) { + return NextResponse.json({ error: 'File not found' }, { status: 404 }); + } + + if (doc.data.type !== 'file_local') { + return NextResponse.json({ error: 'File is not local' }, { status: 400 }); + } + const mimeType = 'application/octet-stream'; + const fileName = doc.data.name; + + try { + // Check if file exists + await fs.access(filePath); + // Create a readable stream + const nodeStream = fsSync.createReadStream(filePath); + // Convert Node.js stream to Web stream + const webStream = new ReadableStream({ + start(controller) { + nodeStream.on('data', (chunk) => controller.enqueue(chunk)); + nodeStream.on('end', () => controller.close()); + nodeStream.on('error', (err) => controller.error(err)); + } + }); + return new NextResponse(webStream, { + status: 200, + headers: { + 'Content-Type': mimeType, + 'Content-Disposition': `attachment; filename="${fileName}"`, + }, + }); + } catch (error) { + console.error('Error reading file:', error); + return NextResponse.json( + { error: 'File not found' }, + { status: 404 } + ); + } +} diff --git a/apps/rowboat/app/lib/embedding.ts b/apps/rowboat/app/lib/embedding.ts index 5fc9738a..0cc0c35e 100644 --- a/apps/rowboat/app/lib/embedding.ts +++ b/apps/rowboat/app/lib/embedding.ts @@ -1,3 +1,12 @@ -import { openai } from "@ai-sdk/openai"; +import { createOpenAI } from "@ai-sdk/openai"; -export const embeddingModel = openai.embedding('text-embedding-3-small'); \ No newline at end of file +const EMBEDDING_PROVIDER_API_KEY = process.env.EMBEDDING_PROVIDER_API_KEY || process.env.OPENAI_API_KEY || ''; +const EMBEDDING_PROVIDER_BASE_URL = process.env.EMBEDDING_PROVIDER_BASE_URL || undefined; +const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'text-embedding-3-small'; + +const openai = createOpenAI({ + apiKey: EMBEDDING_PROVIDER_API_KEY, + baseURL: EMBEDDING_PROVIDER_BASE_URL, +}); + +export const embeddingModel = openai.embedding(EMBEDDING_MODEL); \ No newline at end of file diff --git a/apps/rowboat/app/lib/feature_flags.ts b/apps/rowboat/app/lib/feature_flags.ts index 350e8372..b2f82535 100644 --- a/apps/rowboat/app/lib/feature_flags.ts +++ b/apps/rowboat/app/lib/feature_flags.ts @@ -3,6 +3,8 @@ export const USE_RAG_UPLOADS = process.env.USE_RAG_UPLOADS === 'true'; export const USE_RAG_SCRAPING = process.env.USE_RAG_SCRAPING === 'true'; export const USE_CHAT_WIDGET = process.env.USE_CHAT_WIDGET === 'true'; export const USE_AUTH = process.env.USE_AUTH === 'true'; +export const USE_RAG_S3_UPLOADS = process.env.USE_RAG_S3_UPLOADS === 'true'; +export const USE_GEMINI_FILE_PARSING = process.env.USE_GEMINI_FILE_PARSING === 'true'; // Hardcoded flags export const USE_MULTIPLE_PROJECTS = true; diff --git a/apps/rowboat/app/lib/types/datasource_types.ts b/apps/rowboat/app/lib/types/datasource_types.ts index f6316c3b..5e4ce7a2 100644 --- a/apps/rowboat/app/lib/types/datasource_types.ts +++ b/apps/rowboat/app/lib/types/datasource_types.ts @@ -22,7 +22,10 @@ export const DataSource = z.object({ type: z.literal('urls'), }), z.object({ - type: z.literal('files'), + type: z.literal('files_local'), + }), + z.object({ + type: z.literal('files_s3'), }), z.object({ type: z.literal('text'), @@ -50,7 +53,13 @@ export const DataSourceDoc = z.object({ url: z.string(), }), z.object({ - type: z.literal('file'), + type: z.literal('file_local'), + name: z.string(), + size: z.number(), + mimeType: z.string(), + }), + z.object({ + type: z.literal('file_s3'), name: z.string(), size: z.number(), mimeType: z.string(), diff --git a/apps/rowboat/app/projects/[projectId]/sources/[sourceId]/source-page.tsx b/apps/rowboat/app/projects/[projectId]/sources/[sourceId]/source-page.tsx index a7c18d8b..bfa15475 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/[sourceId]/source-page.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/[sourceId]/source-page.tsx @@ -119,9 +119,13 @@ export function SourcePage({
Specify URLs
} - {source.data.type === 'files' && <> + {source.data.type === 'files_local' && <> -
File upload
+
File upload (local)
+ } + {source.data.type === 'files_s3' && <> + +
File upload (S3)
} {source.data.type === 'text' && <> @@ -148,11 +152,12 @@ export function SourcePage({ handleReload={handleReload} /> } - {source.data.type === 'files' && + {(source.data.type === 'files_local' || source.data.type === 'files_s3') && } {source.data.type === 'text' && diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx index 0fb13e49..b945f27a 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx @@ -46,7 +46,7 @@ function FileListItem({ } }; - if (file.data.type !== 'file') { + if (file.data.type !== 'file_local' && file.data.type !== 'file_s3') { return null; } @@ -180,10 +180,12 @@ export function FilesSource({ projectId, dataSource, handleReload, + type, }: { projectId: string, dataSource: WithStringId>, handleReload: () => void; + type: 'files_local' | 'files_s3'; }) { const [uploading, setUploading] = useState(false); const [fileListKey, setFileListKey] = useState(0); @@ -199,7 +201,7 @@ export function FilesSource({ // Upload files in parallel await Promise.all(acceptedFiles.map(async (file, index) => { - await fetch(urls[index].presignedUrl, { + await fetch(urls[index].uploadUrl, { method: 'PUT', body: file, headers: { @@ -209,20 +211,40 @@ export function FilesSource({ })); // After successful uploads, update the database with file information - await addDocsToDataSource({ - projectId, - sourceId: dataSource._id, - docData: acceptedFiles.map((file, index) => ({ + let docData: { + _id: string, + name: string, + data: z.infer['data'] + }[] = []; + if (type === 'files_s3') { + docData = acceptedFiles.map((file, index) => ({ _id: urls[index].fileId, name: file.name, data: { - type: 'file', + type: 'file_s3' as const, name: file.name, size: file.size, mimeType: file.type, - s3Key: urls[index].s3Key, + s3Key: urls[index].path, }, - })), + })); + } else { + docData = acceptedFiles.map((file, index) => ({ + _id: urls[index].fileId, + name: file.name, + data: { + type: 'file_local' as const, + name: file.name, + size: file.size, + mimeType: file.type, + }, + })); + } + + await addDocsToDataSource({ + projectId, + sourceId: dataSource._id, + docData, }); handleReload(); @@ -233,22 +255,22 @@ export function FilesSource({ } finally { setUploading(false); } - }, [projectId, dataSource._id, handleReload]); + }, [projectId, dataSource._id, handleReload, type]); const { getRootProps, getInputProps, isDragActive } = useDropzone({ onDrop, disabled: uploading, accept: { 'application/pdf': ['.pdf'], - 'text/plain': ['.txt'], - 'application/msword': ['.doc'], - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], + // 'text/plain': ['.txt'], + // 'application/msword': ['.doc'], + // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], }, }); return ( -
@@ -269,7 +291,7 @@ export function FilesSource({

Drag and drop files here, or click to select files

- Supported file types: PDF, TXT, DOC, DOCX + Only PDF files are supported for now.

)} diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx index d971c51c..2afe58c6 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx @@ -118,10 +118,16 @@ export function SourcesList({ projectId }: { projectId: string }) {
Text
)} - {source.data.type == 'files' && ( + {source.data.type == 'files_local' && (
-
Files
+
Files (Local)
+
+ )} + {source.data.type == 'files_s3' && ( +
+ +
Files (S3)
)} diff --git a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx index 943d07e5..6a4c8f5f 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx @@ -13,10 +13,12 @@ import { Panel } from "@/components/common/panel-common"; export function Form({ projectId, useRagUploads, + useRagS3Uploads, useRagScraping, }: { projectId: string; useRagUploads: boolean; + useRagS3Uploads: boolean; useRagScraping: boolean; }) { const [sourceType, setSourceType] = useState(""); @@ -34,8 +36,13 @@ export function Form({ startContent: }, { - key: "files", - label: "Upload files", + key: "files_local", + label: "Upload files (Local)", + startContent: + }, + { + key: "files_s3", + label: "Upload files (S3)", startContent: } ]; @@ -73,7 +80,7 @@ export function Form({ projectId, name: formData.get('name') as string, data: { - type: 'files', + type: formData.get('type') as 'files_local' | 'files_s3', }, status: 'ready', }); @@ -125,7 +132,8 @@ export function Form({ onChange={setSourceType} options={dropdownOptions} disabledKeys={[ - ...(useRagUploads ? [] : ['files']), + ...(useRagUploads ? [] : ['files_local']), + ...(useRagS3Uploads ? [] : ['files_s3']), ...(useRagScraping ? [] : ['urls']), ]} /> @@ -196,10 +204,11 @@ export function Form({ /> } - {sourceType === "files" &&
+