mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-25 00:16:29 +02:00
Add support for RAG
This commit is contained in:
parent
2156c94449
commit
b80eaffbe9
22 changed files with 552 additions and 900 deletions
|
|
@ -22,11 +22,11 @@ Powered by OpenAI's Agents SDK, Rowboat is the fastest way to build multi-agents
|
||||||
export OPENAI_API_KEY=your-openai-api-key
|
export OPENAI_API_KEY=your-openai-api-key
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Clone the repository and start Rowboat docker
|
2. Clone the repository and start Rowboat
|
||||||
```bash
|
```bash
|
||||||
git clone git@github.com:rowboatlabs/rowboat.git
|
git clone git@github.com:rowboatlabs/rowboat.git
|
||||||
cd rowboat
|
cd rowboat
|
||||||
docker-compose up --build
|
./start.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Access the app at [http://localhost:3000](http://localhost:3000).
|
3. Access the app at [http://localhost:3000](http://localhost:3000).
|
||||||
|
|
|
||||||
92
apps/docs/docs/using_rag.md
Normal file
92
apps/docs/docs/using_rag.md
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
# Using RAG in Rowboat
|
||||||
|
|
||||||
|
Rowboat provides multiple ways to enhance your agents with Retrieval-Augmented Generation (RAG). This guide will help you set up and use each RAG feature.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
Text RAG and local file uploads are enabled by default - no configuration needed! Just start using them right away.
|
||||||
|
|
||||||
|
## Available RAG Features
|
||||||
|
|
||||||
|
### 1. Text RAG
|
||||||
|
✅ Enabled by default:
|
||||||
|
|
||||||
|
- Process and reason over text content directly
|
||||||
|
- No configuration required
|
||||||
|
|
||||||
|
### 2. Local File Uploads
|
||||||
|
✅ Enabled by default:
|
||||||
|
|
||||||
|
- Upload PDF files directly from your device
|
||||||
|
- Files are stored locally
|
||||||
|
- No configuration required
|
||||||
|
- Files are parsed using OpenAI by default
|
||||||
|
|
||||||
|
### 3. S3 File Uploads
|
||||||
|
To enable S3 file uploads, set the following variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable S3 uploads
|
||||||
|
export USE_RAG_S3_UPLOADS=true
|
||||||
|
|
||||||
|
# S3 Configuration
|
||||||
|
export AWS_ACCESS_KEY_ID=your_access_key
|
||||||
|
export AWS_SECRET_ACCESS_KEY=your_secret_key
|
||||||
|
export RAG_UPLOADS_S3_BUCKET=your_bucket_name
|
||||||
|
export RAG_UPLOADS_S3_REGION=your_region
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. URL Scraping
|
||||||
|
To enable URL scraping, set the following variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable URL scraping
|
||||||
|
export USE_RAG_SCRAPING=true
|
||||||
|
|
||||||
|
# Firecrawl API key for web scraping
|
||||||
|
export FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## File Parsing Options
|
||||||
|
|
||||||
|
### Default Parsing (OpenAI)
|
||||||
|
By default, uploaded PDF files are parsed using `gpt-4o`. You can customize this by setting the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Override the default parsing model
|
||||||
|
export FILE_PARSING_MODEL=your-preferred-model
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also change the model provider like so:
|
||||||
|
```bash
|
||||||
|
# Optional: Override the parsing provider settings
|
||||||
|
export FILE_PARSING_PROVIDER_BASE_URL=your-provider-base-url
|
||||||
|
export FILE_PARSING_PROVIDER_API_KEY=your-provider-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using Gemini for File Parsing
|
||||||
|
To use Google's Gemini model for parsing uploaded PDFs, set the following variable:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable Gemini for file parsing
|
||||||
|
export USE_GEMINI_FILE_PARSING=true
|
||||||
|
export GOOGLE_API_KEY=your_google_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## Embedding Model options
|
||||||
|
|
||||||
|
By default, Rowboat uses OpenAI's `text-embedding-3-small` model for generating embeddings. You can customize this by setting the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Override the default embedding model
|
||||||
|
export EMBEDDING_MODEL=your-preferred-model
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also change the model provider like so:
|
||||||
|
```bash
|
||||||
|
# Optional: Override the embedding provider settings
|
||||||
|
export EMBEDDING_PROVIDER_BASE_URL=your-provider-base-url
|
||||||
|
export EMBEDDING_PROVIDER_API_KEY=your-provider-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
If you don't specify the provider settings, Rowboat will use OpenAI as the default provider.
|
||||||
|
|
@ -14,6 +14,7 @@ nav:
|
||||||
- Test chats in the playground: playground.md
|
- Test chats in the playground: playground.md
|
||||||
- Add tools: add_tools.md
|
- Add tools: add_tools.md
|
||||||
- Update agents: update_agents.md
|
- Update agents: update_agents.md
|
||||||
|
- Using RAG: using_rag.md
|
||||||
|
|
||||||
- API & SDK:
|
- API & SDK:
|
||||||
- Using the API: using_the_api.md
|
- Using the API: using_the_api.md
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import { WithStringId } from "../lib/types/types";
|
||||||
import { DataSourceDoc } from "../lib/types/datasource_types";
|
import { DataSourceDoc } from "../lib/types/datasource_types";
|
||||||
import { DataSource } from "../lib/types/datasource_types";
|
import { DataSource } from "../lib/types/datasource_types";
|
||||||
import { uploadsS3Client } from "../lib/uploads_s3_client";
|
import { uploadsS3Client } from "../lib/uploads_s3_client";
|
||||||
|
import { USE_RAG_S3_UPLOADS } from "../lib/feature_flags";
|
||||||
|
|
||||||
export async function getDataSource(projectId: string, sourceId: string): Promise<WithStringId<z.infer<typeof DataSource>>> {
|
export async function getDataSource(projectId: string, sourceId: string): Promise<WithStringId<z.infer<typeof DataSource>>> {
|
||||||
await projectAuthCheck(projectId);
|
await projectAuthCheck(projectId);
|
||||||
|
|
@ -279,26 +280,27 @@ export async function getDownloadUrlForFile(
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
await projectAuthCheck(projectId);
|
await projectAuthCheck(projectId);
|
||||||
await getDataSource(projectId, sourceId);
|
await getDataSource(projectId, sourceId);
|
||||||
|
|
||||||
// fetch s3 key for file
|
|
||||||
const file = await dataSourceDocsCollection.findOne({
|
const file = await dataSourceDocsCollection.findOne({
|
||||||
sourceId,
|
sourceId,
|
||||||
_id: new ObjectId(fileId),
|
_id: new ObjectId(fileId),
|
||||||
'data.type': 'file',
|
'data.type': { $in: ['file_local', 'file_s3'] },
|
||||||
});
|
});
|
||||||
if (!file) {
|
if (!file) {
|
||||||
throw new Error('File not found');
|
throw new Error('File not found');
|
||||||
}
|
}
|
||||||
if (file.data.type !== 'file') {
|
|
||||||
throw new Error('File not found');
|
// if local, return path
|
||||||
|
if (file.data.type === 'file_local') {
|
||||||
|
return `/api/uploads/${fileId}`;
|
||||||
|
} else if (file.data.type === 'file_s3') {
|
||||||
|
const command = new GetObjectCommand({
|
||||||
|
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
||||||
|
Key: file.data.s3Key,
|
||||||
|
});
|
||||||
|
return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute
|
||||||
}
|
}
|
||||||
|
|
||||||
const command = new GetObjectCommand({
|
throw new Error('Invalid file type');
|
||||||
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
|
||||||
Key: file.data.s3Key,
|
|
||||||
});
|
|
||||||
|
|
||||||
return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getUploadUrlsForFilesDataSource(
|
export async function getUploadUrlsForFilesDataSource(
|
||||||
|
|
@ -307,37 +309,47 @@ export async function getUploadUrlsForFilesDataSource(
|
||||||
files: { name: string; type: string; size: number }[]
|
files: { name: string; type: string; size: number }[]
|
||||||
): Promise<{
|
): Promise<{
|
||||||
fileId: string,
|
fileId: string,
|
||||||
presignedUrl: string,
|
uploadUrl: string,
|
||||||
s3Key: string,
|
path: string,
|
||||||
}[]> {
|
}[]> {
|
||||||
await projectAuthCheck(projectId);
|
await projectAuthCheck(projectId);
|
||||||
const source = await getDataSource(projectId, sourceId);
|
const source = await getDataSource(projectId, sourceId);
|
||||||
if (source.data.type !== 'files') {
|
if (source.data.type !== 'files_local' && source.data.type !== 'files_s3') {
|
||||||
throw new Error('Invalid files data source');
|
throw new Error('Invalid files data source');
|
||||||
}
|
}
|
||||||
|
|
||||||
const urls: {
|
const urls: {
|
||||||
fileId: string,
|
fileId: string,
|
||||||
presignedUrl: string,
|
uploadUrl: string,
|
||||||
s3Key: string,
|
path: string,
|
||||||
}[] = [];
|
}[] = [];
|
||||||
|
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
const fileId = new ObjectId().toString();
|
const fileId = new ObjectId().toString();
|
||||||
const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId
|
|
||||||
const s3Key = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`;
|
if (source.data.type === 'files_s3') {
|
||||||
// Generate presigned URL
|
// Generate presigned URL
|
||||||
const command = new PutObjectCommand({
|
const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId
|
||||||
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
const path = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`;
|
||||||
Key: s3Key,
|
const command = new PutObjectCommand({
|
||||||
ContentType: file.type,
|
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
||||||
});
|
Key: path,
|
||||||
const presignedUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes
|
ContentType: file.type,
|
||||||
urls.push({
|
});
|
||||||
fileId,
|
const uploadUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes
|
||||||
presignedUrl,
|
urls.push({
|
||||||
s3Key,
|
fileId,
|
||||||
});
|
uploadUrl,
|
||||||
|
path,
|
||||||
|
});
|
||||||
|
} else if (source.data.type === 'files_local') {
|
||||||
|
// Generate local upload URL
|
||||||
|
urls.push({
|
||||||
|
fileId,
|
||||||
|
uploadUrl: '/api/uploads/' + fileId,
|
||||||
|
path: '/api/uploads/' + fileId,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return urls;
|
return urls;
|
||||||
|
|
|
||||||
87
apps/rowboat/app/api/uploads/[fileId]/route.ts
Normal file
87
apps/rowboat/app/api/uploads/[fileId]/route.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
|
import path from 'path';
|
||||||
|
import fs from 'fs/promises';
|
||||||
|
import fsSync from 'fs';
|
||||||
|
import { dataSourceDocsCollection } from '@/app/lib/mongodb';
|
||||||
|
import { ObjectId } from 'mongodb';
|
||||||
|
|
||||||
|
const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads';
|
||||||
|
|
||||||
|
// PUT endpoint to handle file uploads
|
||||||
|
export async function PUT(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: { fileId: string } }
|
||||||
|
) {
|
||||||
|
const fileId = params.fileId;
|
||||||
|
if (!fileId) {
|
||||||
|
return NextResponse.json({ error: 'Missing file ID' }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const filePath = path.join(UPLOADS_DIR, fileId);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = await request.arrayBuffer();
|
||||||
|
await fs.writeFile(filePath, new Uint8Array(data));
|
||||||
|
|
||||||
|
return NextResponse.json({ success: true });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error saving file:', error);
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Failed to save file' },
|
||||||
|
{ status: 500 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GET endpoint to handle file downloads
|
||||||
|
export async function GET(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: { fileId: string } }
|
||||||
|
) {
|
||||||
|
const fileId = params.fileId;
|
||||||
|
if (!fileId) {
|
||||||
|
return NextResponse.json({ error: 'Missing file ID' }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const filePath = path.join(UPLOADS_DIR, fileId);
|
||||||
|
|
||||||
|
// get mimetype from database
|
||||||
|
const doc = await dataSourceDocsCollection.findOne({ _id: new ObjectId(fileId) });
|
||||||
|
if (!doc) {
|
||||||
|
return NextResponse.json({ error: 'File not found' }, { status: 404 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (doc.data.type !== 'file_local') {
|
||||||
|
return NextResponse.json({ error: 'File is not local' }, { status: 400 });
|
||||||
|
}
|
||||||
|
const mimeType = 'application/octet-stream';
|
||||||
|
const fileName = doc.data.name;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check if file exists
|
||||||
|
await fs.access(filePath);
|
||||||
|
// Create a readable stream
|
||||||
|
const nodeStream = fsSync.createReadStream(filePath);
|
||||||
|
// Convert Node.js stream to Web stream
|
||||||
|
const webStream = new ReadableStream({
|
||||||
|
start(controller) {
|
||||||
|
nodeStream.on('data', (chunk) => controller.enqueue(chunk));
|
||||||
|
nodeStream.on('end', () => controller.close());
|
||||||
|
nodeStream.on('error', (err) => controller.error(err));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return new NextResponse(webStream, {
|
||||||
|
status: 200,
|
||||||
|
headers: {
|
||||||
|
'Content-Type': mimeType,
|
||||||
|
'Content-Disposition': `attachment; filename="${fileName}"`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error reading file:', error);
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'File not found' },
|
||||||
|
{ status: 404 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
import { openai } from "@ai-sdk/openai";
|
import { createOpenAI } from "@ai-sdk/openai";
|
||||||
|
|
||||||
export const embeddingModel = openai.embedding('text-embedding-3-small');
|
const EMBEDDING_PROVIDER_API_KEY = process.env.EMBEDDING_PROVIDER_API_KEY || process.env.OPENAI_API_KEY || '';
|
||||||
|
const EMBEDDING_PROVIDER_BASE_URL = process.env.EMBEDDING_PROVIDER_BASE_URL || undefined;
|
||||||
|
const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'text-embedding-3-small';
|
||||||
|
|
||||||
|
const openai = createOpenAI({
|
||||||
|
apiKey: EMBEDDING_PROVIDER_API_KEY,
|
||||||
|
baseURL: EMBEDDING_PROVIDER_BASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
export const embeddingModel = openai.embedding(EMBEDDING_MODEL);
|
||||||
|
|
@ -3,6 +3,8 @@ export const USE_RAG_UPLOADS = process.env.USE_RAG_UPLOADS === 'true';
|
||||||
export const USE_RAG_SCRAPING = process.env.USE_RAG_SCRAPING === 'true';
|
export const USE_RAG_SCRAPING = process.env.USE_RAG_SCRAPING === 'true';
|
||||||
export const USE_CHAT_WIDGET = process.env.USE_CHAT_WIDGET === 'true';
|
export const USE_CHAT_WIDGET = process.env.USE_CHAT_WIDGET === 'true';
|
||||||
export const USE_AUTH = process.env.USE_AUTH === 'true';
|
export const USE_AUTH = process.env.USE_AUTH === 'true';
|
||||||
|
export const USE_RAG_S3_UPLOADS = process.env.USE_RAG_S3_UPLOADS === 'true';
|
||||||
|
export const USE_GEMINI_FILE_PARSING = process.env.USE_GEMINI_FILE_PARSING === 'true';
|
||||||
|
|
||||||
// Hardcoded flags
|
// Hardcoded flags
|
||||||
export const USE_MULTIPLE_PROJECTS = true;
|
export const USE_MULTIPLE_PROJECTS = true;
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,10 @@ export const DataSource = z.object({
|
||||||
type: z.literal('urls'),
|
type: z.literal('urls'),
|
||||||
}),
|
}),
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal('files'),
|
type: z.literal('files_local'),
|
||||||
|
}),
|
||||||
|
z.object({
|
||||||
|
type: z.literal('files_s3'),
|
||||||
}),
|
}),
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal('text'),
|
type: z.literal('text'),
|
||||||
|
|
@ -50,7 +53,13 @@ export const DataSourceDoc = z.object({
|
||||||
url: z.string(),
|
url: z.string(),
|
||||||
}),
|
}),
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal('file'),
|
type: z.literal('file_local'),
|
||||||
|
name: z.string(),
|
||||||
|
size: z.number(),
|
||||||
|
mimeType: z.string(),
|
||||||
|
}),
|
||||||
|
z.object({
|
||||||
|
type: z.literal('file_s3'),
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
size: z.number(),
|
size: z.number(),
|
||||||
mimeType: z.string(),
|
mimeType: z.string(),
|
||||||
|
|
|
||||||
|
|
@ -119,9 +119,13 @@ export function SourcePage({
|
||||||
<DataSourceIcon type="urls" />
|
<DataSourceIcon type="urls" />
|
||||||
<div>Specify URLs</div>
|
<div>Specify URLs</div>
|
||||||
</>}
|
</>}
|
||||||
{source.data.type === 'files' && <>
|
{source.data.type === 'files_local' && <>
|
||||||
<DataSourceIcon type="files" />
|
<DataSourceIcon type="files" />
|
||||||
<div>File upload</div>
|
<div>File upload (local)</div>
|
||||||
|
</>}
|
||||||
|
{source.data.type === 'files_s3' && <>
|
||||||
|
<DataSourceIcon type="files" />
|
||||||
|
<div>File upload (S3)</div>
|
||||||
</>}
|
</>}
|
||||||
{source.data.type === 'text' && <>
|
{source.data.type === 'text' && <>
|
||||||
<DataSourceIcon type="text" />
|
<DataSourceIcon type="text" />
|
||||||
|
|
@ -148,11 +152,12 @@ export function SourcePage({
|
||||||
handleReload={handleReload}
|
handleReload={handleReload}
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
{source.data.type === 'files' &&
|
{(source.data.type === 'files_local' || source.data.type === 'files_s3') &&
|
||||||
<FilesSource
|
<FilesSource
|
||||||
projectId={projectId}
|
projectId={projectId}
|
||||||
dataSource={source}
|
dataSource={source}
|
||||||
handleReload={handleReload}
|
handleReload={handleReload}
|
||||||
|
type={source.data.type}
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
{source.data.type === 'text' &&
|
{source.data.type === 'text' &&
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ function FileListItem({
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (file.data.type !== 'file') {
|
if (file.data.type !== 'file_local' && file.data.type !== 'file_s3') {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -180,10 +180,12 @@ export function FilesSource({
|
||||||
projectId,
|
projectId,
|
||||||
dataSource,
|
dataSource,
|
||||||
handleReload,
|
handleReload,
|
||||||
|
type,
|
||||||
}: {
|
}: {
|
||||||
projectId: string,
|
projectId: string,
|
||||||
dataSource: WithStringId<z.infer<typeof DataSource>>,
|
dataSource: WithStringId<z.infer<typeof DataSource>>,
|
||||||
handleReload: () => void;
|
handleReload: () => void;
|
||||||
|
type: 'files_local' | 'files_s3';
|
||||||
}) {
|
}) {
|
||||||
const [uploading, setUploading] = useState(false);
|
const [uploading, setUploading] = useState(false);
|
||||||
const [fileListKey, setFileListKey] = useState(0);
|
const [fileListKey, setFileListKey] = useState(0);
|
||||||
|
|
@ -199,7 +201,7 @@ export function FilesSource({
|
||||||
|
|
||||||
// Upload files in parallel
|
// Upload files in parallel
|
||||||
await Promise.all(acceptedFiles.map(async (file, index) => {
|
await Promise.all(acceptedFiles.map(async (file, index) => {
|
||||||
await fetch(urls[index].presignedUrl, {
|
await fetch(urls[index].uploadUrl, {
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
body: file,
|
body: file,
|
||||||
headers: {
|
headers: {
|
||||||
|
|
@ -209,20 +211,40 @@ export function FilesSource({
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// After successful uploads, update the database with file information
|
// After successful uploads, update the database with file information
|
||||||
await addDocsToDataSource({
|
let docData: {
|
||||||
projectId,
|
_id: string,
|
||||||
sourceId: dataSource._id,
|
name: string,
|
||||||
docData: acceptedFiles.map((file, index) => ({
|
data: z.infer<typeof DataSourceDoc>['data']
|
||||||
|
}[] = [];
|
||||||
|
if (type === 'files_s3') {
|
||||||
|
docData = acceptedFiles.map((file, index) => ({
|
||||||
_id: urls[index].fileId,
|
_id: urls[index].fileId,
|
||||||
name: file.name,
|
name: file.name,
|
||||||
data: {
|
data: {
|
||||||
type: 'file',
|
type: 'file_s3' as const,
|
||||||
name: file.name,
|
name: file.name,
|
||||||
size: file.size,
|
size: file.size,
|
||||||
mimeType: file.type,
|
mimeType: file.type,
|
||||||
s3Key: urls[index].s3Key,
|
s3Key: urls[index].path,
|
||||||
},
|
},
|
||||||
})),
|
}));
|
||||||
|
} else {
|
||||||
|
docData = acceptedFiles.map((file, index) => ({
|
||||||
|
_id: urls[index].fileId,
|
||||||
|
name: file.name,
|
||||||
|
data: {
|
||||||
|
type: 'file_local' as const,
|
||||||
|
name: file.name,
|
||||||
|
size: file.size,
|
||||||
|
mimeType: file.type,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
await addDocsToDataSource({
|
||||||
|
projectId,
|
||||||
|
sourceId: dataSource._id,
|
||||||
|
docData,
|
||||||
});
|
});
|
||||||
|
|
||||||
handleReload();
|
handleReload();
|
||||||
|
|
@ -233,22 +255,22 @@ export function FilesSource({
|
||||||
} finally {
|
} finally {
|
||||||
setUploading(false);
|
setUploading(false);
|
||||||
}
|
}
|
||||||
}, [projectId, dataSource._id, handleReload]);
|
}, [projectId, dataSource._id, handleReload, type]);
|
||||||
|
|
||||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||||
onDrop,
|
onDrop,
|
||||||
disabled: uploading,
|
disabled: uploading,
|
||||||
accept: {
|
accept: {
|
||||||
'application/pdf': ['.pdf'],
|
'application/pdf': ['.pdf'],
|
||||||
'text/plain': ['.txt'],
|
// 'text/plain': ['.txt'],
|
||||||
'application/msword': ['.doc'],
|
// 'application/msword': ['.doc'],
|
||||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
|
// 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Section
|
<Section
|
||||||
title="File Uploads"
|
title="File Uploads"
|
||||||
description="Upload and manage files for this data source."
|
description="Upload and manage files for this data source."
|
||||||
>
|
>
|
||||||
<div className="space-y-8">
|
<div className="space-y-8">
|
||||||
|
|
@ -269,7 +291,7 @@ export function FilesSource({
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<p>Drag and drop files here, or click to select files</p>
|
<p>Drag and drop files here, or click to select files</p>
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||||
Supported file types: PDF, TXT, DOC, DOCX
|
Only PDF files are supported for now.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
|
||||||
|
|
@ -118,10 +118,16 @@ export function SourcesList({ projectId }: { projectId: string }) {
|
||||||
<div>Text</div>
|
<div>Text</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{source.data.type == 'files' && (
|
{source.data.type == 'files_local' && (
|
||||||
<div className="flex gap-2 items-center text-sm text-gray-600 dark:text-gray-300">
|
<div className="flex gap-2 items-center text-sm text-gray-600 dark:text-gray-300">
|
||||||
<DataSourceIcon type="files" />
|
<DataSourceIcon type="files" />
|
||||||
<div>Files</div>
|
<div>Files (Local)</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{source.data.type == 'files_s3' && (
|
||||||
|
<div className="flex gap-2 items-center text-sm text-gray-600 dark:text-gray-300">
|
||||||
|
<DataSourceIcon type="files" />
|
||||||
|
<div>Files (S3)</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</td>
|
</td>
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,12 @@ import { Panel } from "@/components/common/panel-common";
|
||||||
export function Form({
|
export function Form({
|
||||||
projectId,
|
projectId,
|
||||||
useRagUploads,
|
useRagUploads,
|
||||||
|
useRagS3Uploads,
|
||||||
useRagScraping,
|
useRagScraping,
|
||||||
}: {
|
}: {
|
||||||
projectId: string;
|
projectId: string;
|
||||||
useRagUploads: boolean;
|
useRagUploads: boolean;
|
||||||
|
useRagS3Uploads: boolean;
|
||||||
useRagScraping: boolean;
|
useRagScraping: boolean;
|
||||||
}) {
|
}) {
|
||||||
const [sourceType, setSourceType] = useState("");
|
const [sourceType, setSourceType] = useState("");
|
||||||
|
|
@ -34,8 +36,13 @@ export function Form({
|
||||||
startContent: <DataSourceIcon type="urls" />
|
startContent: <DataSourceIcon type="urls" />
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: "files",
|
key: "files_local",
|
||||||
label: "Upload files",
|
label: "Upload files (Local)",
|
||||||
|
startContent: <DataSourceIcon type="files" />
|
||||||
|
},
|
||||||
|
{
|
||||||
|
key: "files_s3",
|
||||||
|
label: "Upload files (S3)",
|
||||||
startContent: <DataSourceIcon type="files" />
|
startContent: <DataSourceIcon type="files" />
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
@ -73,7 +80,7 @@ export function Form({
|
||||||
projectId,
|
projectId,
|
||||||
name: formData.get('name') as string,
|
name: formData.get('name') as string,
|
||||||
data: {
|
data: {
|
||||||
type: 'files',
|
type: formData.get('type') as 'files_local' | 'files_s3',
|
||||||
},
|
},
|
||||||
status: 'ready',
|
status: 'ready',
|
||||||
});
|
});
|
||||||
|
|
@ -125,7 +132,8 @@ export function Form({
|
||||||
onChange={setSourceType}
|
onChange={setSourceType}
|
||||||
options={dropdownOptions}
|
options={dropdownOptions}
|
||||||
disabledKeys={[
|
disabledKeys={[
|
||||||
...(useRagUploads ? [] : ['files']),
|
...(useRagUploads ? [] : ['files_local']),
|
||||||
|
...(useRagS3Uploads ? [] : ['files_s3']),
|
||||||
...(useRagScraping ? [] : ['urls']),
|
...(useRagScraping ? [] : ['urls']),
|
||||||
]}
|
]}
|
||||||
/>
|
/>
|
||||||
|
|
@ -196,10 +204,11 @@ export function Form({
|
||||||
/>
|
/>
|
||||||
</form>}
|
</form>}
|
||||||
|
|
||||||
{sourceType === "files" && <form
|
{(sourceType === "files_local" || sourceType === "files_s3") && <form
|
||||||
action={createFilesDataSource}
|
action={createFilesDataSource}
|
||||||
className="flex flex-col gap-4"
|
className="flex flex-col gap-4"
|
||||||
>
|
>
|
||||||
|
<input type="hidden" name="type" value={sourceType} />
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<label className="text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400">
|
<label className="text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400">
|
||||||
Name
|
Name
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { Metadata } from "next";
|
import { Metadata } from "next";
|
||||||
import { Form } from "./form";
|
import { Form } from "./form";
|
||||||
import { redirect } from "next/navigation";
|
import { redirect } from "next/navigation";
|
||||||
import { USE_RAG, USE_RAG_UPLOADS, USE_RAG_SCRAPING } from "../../../../lib/feature_flags";
|
import { USE_RAG, USE_RAG_UPLOADS, USE_RAG_S3_UPLOADS, USE_RAG_SCRAPING } from "../../../../lib/feature_flags";
|
||||||
|
|
||||||
export const metadata: Metadata = {
|
export const metadata: Metadata = {
|
||||||
title: "Add data source"
|
title: "Add data source"
|
||||||
|
|
@ -20,6 +20,7 @@ export default async function Page({
|
||||||
<Form
|
<Form
|
||||||
projectId={params.projectId}
|
projectId={params.projectId}
|
||||||
useRagUploads={USE_RAG_UPLOADS}
|
useRagUploads={USE_RAG_UPLOADS}
|
||||||
|
useRagS3Uploads={USE_RAG_S3_UPLOADS}
|
||||||
useRagScraping={USE_RAG_SCRAPING}
|
useRagScraping={USE_RAG_SCRAPING}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,10 @@ import '../lib/loadenv';
|
||||||
import { qdrantClient } from '../lib/qdrant';
|
import { qdrantClient } from '../lib/qdrant';
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
await qdrantClient.deleteCollection('embeddings');
|
try {
|
||||||
|
const result = await qdrantClient.deleteCollection('embeddings');
|
||||||
const { collections } = await qdrantClient.getCollections();
|
console.log(`Delete qdrant collection 'embeddings' completed with result: ${result}`);
|
||||||
console.log(collections);
|
} catch (error) {
|
||||||
|
console.error(`Unable to delete qdrant collection 'embeddings': ${error}`);
|
||||||
|
}
|
||||||
})();
|
})();
|
||||||
|
|
@ -4,14 +4,29 @@ import { z } from 'zod';
|
||||||
import { dataSourceDocsCollection, dataSourcesCollection } from '../lib/mongodb';
|
import { dataSourceDocsCollection, dataSourcesCollection } from '../lib/mongodb';
|
||||||
import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types";
|
import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types";
|
||||||
import { WithId } from 'mongodb';
|
import { WithId } from 'mongodb';
|
||||||
import { embedMany } from 'ai';
|
import { embedMany, generateText } from 'ai';
|
||||||
import { embeddingModel } from '../lib/embedding';
|
import { embeddingModel } from '../lib/embedding';
|
||||||
import { qdrantClient } from '../lib/qdrant';
|
import { qdrantClient } from '../lib/qdrant';
|
||||||
import { PrefixLogger } from "../lib/utils";
|
import { PrefixLogger } from "../lib/utils";
|
||||||
import { GoogleGenerativeAI } from "@google/generative-ai";
|
import { GoogleGenerativeAI } from "@google/generative-ai";
|
||||||
import { GetObjectCommand } from "@aws-sdk/client-s3";
|
import { GetObjectCommand } from "@aws-sdk/client-s3";
|
||||||
import { uploadsS3Client } from '../lib/uploads_s3_client';
|
import { uploadsS3Client } from '../lib/uploads_s3_client';
|
||||||
|
import fs from 'fs/promises';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
|
import path from 'path';
|
||||||
|
import { createOpenAI } from '@ai-sdk/openai';
|
||||||
|
import { USE_GEMINI_FILE_PARSING } from '../lib/feature_flags';
|
||||||
|
|
||||||
|
const FILE_PARSING_PROVIDER_API_KEY = process.env.FILE_PARSING_PROVIDER_API_KEY || process.env.OPENAI_API_KEY || '';
|
||||||
|
const FILE_PARSING_PROVIDER_BASE_URL = process.env.FILE_PARSING_PROVIDER_BASE_URL || undefined;
|
||||||
|
const FILE_PARSING_MODEL = process.env.FILE_PARSING_MODEL || 'gpt-4o';
|
||||||
|
|
||||||
|
const openai = createOpenAI({
|
||||||
|
apiKey: FILE_PARSING_PROVIDER_API_KEY,
|
||||||
|
baseURL: FILE_PARSING_PROVIDER_BASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads';
|
||||||
|
|
||||||
const splitter = new RecursiveCharacterTextSplitter({
|
const splitter = new RecursiveCharacterTextSplitter({
|
||||||
separators: ['\n\n', '\n', '. ', '.', ''],
|
separators: ['\n\n', '\n', '. ', '.', ''],
|
||||||
|
|
@ -27,7 +42,11 @@ const day = 24 * hour;
|
||||||
// Configure Google Gemini API
|
// Configure Google Gemini API
|
||||||
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
|
||||||
|
|
||||||
async function getFileContent(s3Key: string): Promise<Buffer> {
|
async function getLocalFileContent(path: string): Promise<Buffer> {
|
||||||
|
return await fs.readFile(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getS3FileContent(s3Key: string): Promise<Buffer> {
|
||||||
const command = new GetObjectCommand({
|
const command = new GetObjectCommand({
|
||||||
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
|
||||||
Key: s3Key,
|
Key: s3Key,
|
||||||
|
|
@ -54,33 +73,59 @@ async function retryable<T>(fn: () => Promise<T>, maxAttempts: number = 3): Prom
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runProcessPipeline(_logger: PrefixLogger, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>): Promise<void> {
|
async function runProcessPipeline(_logger: PrefixLogger, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>> & { data: { type: "file_local" | "file_s3" } }): Promise<void> {
|
||||||
const logger = _logger
|
const logger = _logger
|
||||||
.child(doc._id.toString())
|
.child(doc._id.toString())
|
||||||
.child(doc.name);
|
.child(doc.name);
|
||||||
|
|
||||||
// Get file content from S3
|
// Get file content
|
||||||
logger.log("Fetching file from S3");
|
let fileData: Buffer;
|
||||||
if (doc.data.type !== 'file') {
|
if (doc.data.type === 'file_local') {
|
||||||
throw new Error("Invalid data source type");
|
logger.log("Fetching file from local");
|
||||||
|
fileData = await getLocalFileContent(path.join(UPLOADS_DIR, doc._id.toString()));
|
||||||
|
} else {
|
||||||
|
logger.log("Fetching file from S3");
|
||||||
|
fileData = await getS3FileContent(doc.data.s3Key);
|
||||||
}
|
}
|
||||||
const fileData = await getFileContent(doc.data.s3Key);
|
|
||||||
|
|
||||||
// Use Gemini to extract text content
|
let markdown = "";
|
||||||
logger.log("Extracting content using Gemini");
|
const extractPrompt = "Extract and return only the text content from this document in markdown format. Exclude any formatting instructions or additional commentary.";
|
||||||
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash-001" });
|
if (!USE_GEMINI_FILE_PARSING) {
|
||||||
const prompt = "Extract and return only the text content from this document in markdown format. Exclude any formatting instructions or additional commentary.";
|
// Use OpenAI to extract text content
|
||||||
|
logger.log("Extracting content using OpenAI");
|
||||||
const result = await model.generateContent([
|
const { text } = await generateText({
|
||||||
{
|
model: openai(FILE_PARSING_MODEL),
|
||||||
inlineData: {
|
system: extractPrompt,
|
||||||
data: fileData.toString('base64'),
|
messages: [
|
||||||
mimeType: doc.data.mimeType
|
{
|
||||||
}
|
role: "user",
|
||||||
},
|
content: [
|
||||||
prompt
|
{
|
||||||
]);
|
type: "file",
|
||||||
const markdown = result.response.text();
|
data: fileData.toString('base64'),
|
||||||
|
mimeType: doc.data.mimeType,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
});
|
||||||
|
markdown = text;
|
||||||
|
} else {
|
||||||
|
// Use Gemini to extract text content
|
||||||
|
logger.log("Extracting content using Gemini");
|
||||||
|
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash-001" });
|
||||||
|
|
||||||
|
const result = await model.generateContent([
|
||||||
|
{
|
||||||
|
inlineData: {
|
||||||
|
data: fileData.toString('base64'),
|
||||||
|
mimeType: doc.data.mimeType
|
||||||
|
}
|
||||||
|
},
|
||||||
|
extractPrompt,
|
||||||
|
]);
|
||||||
|
markdown = result.response.text();
|
||||||
|
}
|
||||||
|
|
||||||
// split into chunks
|
// split into chunks
|
||||||
logger.log("Splitting into chunks");
|
logger.log("Splitting into chunks");
|
||||||
|
|
@ -165,7 +210,6 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
// fetch next job from mongodb
|
// fetch next job from mongodb
|
||||||
(async () => {
|
(async () => {
|
||||||
while (true) {
|
while (true) {
|
||||||
console.log("Polling for job...")
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
||||||
|
|
||||||
|
|
@ -183,7 +227,7 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
job = await dataSourcesCollection.findOneAndUpdate(
|
job = await dataSourcesCollection.findOneAndUpdate(
|
||||||
{
|
{
|
||||||
$and: [
|
$and: [
|
||||||
{ 'data.type': { $eq: "files" } },
|
{ 'data.type': { $in: ["files_local", "files_s3"] } },
|
||||||
{
|
{
|
||||||
$or: [
|
$or: [
|
||||||
// if the job has never been attempted
|
// if the job has never been attempted
|
||||||
|
|
@ -234,7 +278,7 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
let errors = false;
|
let errors = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (job.data.type !== 'files') {
|
if (job.data.type !== 'files_local' && job.data.type !== 'files_s3') {
|
||||||
throw new Error("Invalid data source type");
|
throw new Error("Invalid data source type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,8 +320,9 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
|
|
||||||
// for each doc
|
// for each doc
|
||||||
for (const doc of pendingDocs) {
|
for (const doc of pendingDocs) {
|
||||||
|
const ldoc = doc as WithId<z.infer<typeof DataSourceDoc>> & { data: { type: "file_local" | "file_s3" } };
|
||||||
try {
|
try {
|
||||||
await runProcessPipeline(logger, job, doc);
|
await runProcessPipeline(logger, job, ldoc);
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
errors = true;
|
errors = true;
|
||||||
logger.log("Error processing doc:", e);
|
logger.log("Error processing doc:", e);
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,6 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
// fetch next job from mongodb
|
// fetch next job from mongodb
|
||||||
(async () => {
|
(async () => {
|
||||||
while (true) {
|
while (true) {
|
||||||
console.log("Polling for job...")
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,6 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
|
||||||
// fetch next job from mongodb
|
// fetch next job from mongodb
|
||||||
(async () => {
|
(async () => {
|
||||||
while (true) {
|
while (true) {
|
||||||
console.log("Polling for job...")
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
let job: WithId<z.infer<typeof DataSource>> | null = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,18 @@
|
||||||
import '../lib/loadenv';
|
import '../lib/loadenv';
|
||||||
import { qdrantClient } from '../lib/qdrant';
|
import { qdrantClient } from '../lib/qdrant';
|
||||||
|
|
||||||
(async () => {
|
const EMBEDDING_VECTOR_SIZE = Number(process.env.EMBEDDING_VECTOR_SIZE) || 1536;
|
||||||
await qdrantClient.createCollection('embeddings', {
|
|
||||||
vectors: {
|
|
||||||
size: 1536,
|
|
||||||
distance: 'Dot',
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const { collections } = await qdrantClient.getCollections();
|
(async () => {
|
||||||
console.log(collections);
|
try {
|
||||||
|
const result = await qdrantClient.createCollection('embeddings', {
|
||||||
|
vectors: {
|
||||||
|
size: EMBEDDING_VECTOR_SIZE,
|
||||||
|
distance: 'Dot',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
console.log(`Create qdrant collection 'embeddings' completed with result: ${result}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Unable to create qdrant collection 'embeddings': ${error}`);
|
||||||
|
}
|
||||||
})();
|
})();
|
||||||
843
apps/rowboat/package-lock.json
generated
843
apps/rowboat/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -15,7 +15,7 @@
|
||||||
"ragTextWorker": "tsx app/scripts/rag_text_worker.ts"
|
"ragTextWorker": "tsx app/scripts/rag_text_worker.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ai-sdk/openai": "^0.0.37",
|
"@ai-sdk/openai": "^1.3.21",
|
||||||
"@auth0/nextjs-auth0": "^3.5.0",
|
"@auth0/nextjs-auth0": "^3.5.0",
|
||||||
"@aws-sdk/client-s3": "^3.743.0",
|
"@aws-sdk/client-s3": "^3.743.0",
|
||||||
"@aws-sdk/s3-request-presigner": "^3.743.0",
|
"@aws-sdk/s3-request-presigner": "^3.743.0",
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
"@modelcontextprotocol/sdk": "^1.7.0",
|
"@modelcontextprotocol/sdk": "^1.7.0",
|
||||||
"@primer/react": "^36.27.0",
|
"@primer/react": "^36.27.0",
|
||||||
"@qdrant/js-client-rest": "^1.13.0",
|
"@qdrant/js-client-rest": "^1.13.0",
|
||||||
"ai": "^3.3.28",
|
"ai": "^4.3.13",
|
||||||
"cheerio": "^1.0.0",
|
"cheerio": "^1.0.0",
|
||||||
"class-variance-authority": "^0.7.1",
|
"class-variance-authority": "^0.7.1",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,13 @@
|
||||||
version: '3.8'
|
version: '3.8'
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
uploads:
|
||||||
|
driver: local
|
||||||
|
driver_opts:
|
||||||
|
type: none
|
||||||
|
o: bind
|
||||||
|
device: ./data/uploads
|
||||||
|
|
||||||
services:
|
services:
|
||||||
rowboat:
|
rowboat:
|
||||||
build:
|
build:
|
||||||
|
|
@ -21,10 +29,11 @@ services:
|
||||||
- COPILOT_API_URL=http://copilot:3002
|
- COPILOT_API_URL=http://copilot:3002
|
||||||
- COPILOT_API_KEY=${COPILOT_API_KEY}
|
- COPILOT_API_KEY=${COPILOT_API_KEY}
|
||||||
- REDIS_URL=redis://redis:6379
|
- REDIS_URL=redis://redis:6379
|
||||||
- USE_RAG=${USE_RAG}
|
- USE_RAG=true
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
- USE_RAG_UPLOADS=${USE_RAG_UPLOADS}
|
- USE_RAG_UPLOADS=true
|
||||||
|
- USE_RAG_S3_UPLOADS=${USE_RAG_S3_UPLOADS}
|
||||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||||
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
|
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
|
||||||
|
|
@ -38,7 +47,10 @@ services:
|
||||||
- MAX_PROJECTS_PER_USER=${MAX_PROJECTS_PER_USER}
|
- MAX_PROJECTS_PER_USER=${MAX_PROJECTS_PER_USER}
|
||||||
- VOICE_API_URL=${VOICE_API_URL}
|
- VOICE_API_URL=${VOICE_API_URL}
|
||||||
- PROVIDER_DEFAULT_MODEL=${PROVIDER_DEFAULT_MODEL}
|
- PROVIDER_DEFAULT_MODEL=${PROVIDER_DEFAULT_MODEL}
|
||||||
|
- RAG_UPLOADS_DIR=/app/uploads
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- uploads:/app/uploads
|
||||||
|
|
||||||
rowboat_agents:
|
rowboat_agents:
|
||||||
build:
|
build:
|
||||||
|
|
@ -51,7 +63,7 @@ services:
|
||||||
- API_KEY=${AGENTS_API_KEY}
|
- API_KEY=${AGENTS_API_KEY}
|
||||||
- REDIS_URL=redis://redis:6379
|
- REDIS_URL=redis://redis:6379
|
||||||
- MONGODB_URI=mongodb://mongo:27017/rowboat
|
- MONGODB_URI=mongodb://mongo:27017/rowboat
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
- PROVIDER_BASE_URL=${PROVIDER_BASE_URL}
|
- PROVIDER_BASE_URL=${PROVIDER_BASE_URL}
|
||||||
- PROVIDER_API_KEY=${PROVIDER_API_KEY}
|
- PROVIDER_API_KEY=${PROVIDER_API_KEY}
|
||||||
|
|
@ -99,21 +111,21 @@ services:
|
||||||
build:
|
build:
|
||||||
context: ./apps/rowboat
|
context: ./apps/rowboat
|
||||||
dockerfile: scripts.Dockerfile
|
dockerfile: scripts.Dockerfile
|
||||||
command: ["sh", "-c", "npm run setupQdrant && echo 'index created successfully'"]
|
command: ["sh", "-c", "npm run setupQdrant"]
|
||||||
profiles: [ "setup_qdrant" ]
|
|
||||||
environment:
|
environment:
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
|
- EMBEDDING_VECTOR_SIZE=${EMBEDDING_VECTOR_SIZE}
|
||||||
restart: no
|
restart: no
|
||||||
|
|
||||||
delete_qdrant:
|
delete_qdrant:
|
||||||
build:
|
build:
|
||||||
context: ./apps/rowboat
|
context: ./apps/rowboat
|
||||||
dockerfile: scripts.Dockerfile
|
dockerfile: scripts.Dockerfile
|
||||||
command: ["sh", "-c", "npm run deleteQdrant && echo 'index deleted successfully'"]
|
command: ["sh", "-c", "npm run deleteQdrant"]
|
||||||
profiles: [ "delete_qdrant" ]
|
profiles: [ "delete_qdrant" ]
|
||||||
environment:
|
environment:
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
restart: no
|
restart: no
|
||||||
|
|
||||||
|
|
@ -125,15 +137,23 @@ services:
|
||||||
profiles: [ "rag_files_worker" ]
|
profiles: [ "rag_files_worker" ]
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
|
||||||
|
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
|
||||||
|
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
|
||||||
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
||||||
|
- REDIS_URL=redis://redis:6379
|
||||||
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
|
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
|
||||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||||
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
|
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
|
||||||
- RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION}
|
- RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION}
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
|
- RAG_UPLOADS_DIR=/app/uploads
|
||||||
|
- USE_GEMINI_FILE_PARSING=${USE_GEMINI_FILE_PARSING}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- uploads:/app/uploads
|
||||||
|
|
||||||
rag_urls_worker:
|
rag_urls_worker:
|
||||||
build:
|
build:
|
||||||
|
|
@ -143,9 +163,13 @@ services:
|
||||||
profiles: [ "rag_urls_worker" ]
|
profiles: [ "rag_urls_worker" ]
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
|
||||||
|
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
|
||||||
|
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
|
||||||
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
||||||
|
- REDIS_URL=redis://redis:6379
|
||||||
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
|
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
@ -157,8 +181,12 @@ services:
|
||||||
profiles: [ "rag_text_worker" ]
|
profiles: [ "rag_text_worker" ]
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
|
||||||
|
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
|
||||||
|
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
|
||||||
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
|
||||||
- QDRANT_URL=${QDRANT_URL}
|
- REDIS_URL=redis://redis:6379
|
||||||
|
- QDRANT_URL=http://qdrant:6333
|
||||||
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
- QDRANT_API_KEY=${QDRANT_API_KEY}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
@ -209,3 +237,13 @@ services:
|
||||||
# - ROWBOAT_API_HOST=http://rowboat:3000
|
# - ROWBOAT_API_HOST=http://rowboat:3000
|
||||||
# - MONGODB_URI=mongodb://mongo:27017/rowboat
|
# - MONGODB_URI=mongodb://mongo:27017/rowboat
|
||||||
# restart: unless-stopped
|
# restart: unless-stopped
|
||||||
|
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant
|
||||||
|
ports:
|
||||||
|
- "6333:6333"
|
||||||
|
environment:
|
||||||
|
- QDRANT__STORAGE__STORAGE_PATH=/data/qdrant
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./data/qdrant:/data/qdrant
|
||||||
|
|
|
||||||
29
start.sh
Executable file
29
start.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# ensure data dirs exist
|
||||||
|
mkdir -p data/uploads
|
||||||
|
mkdir -p data/qdrant
|
||||||
|
mkdir -p data/mongo
|
||||||
|
|
||||||
|
# Start with the base command and profile flags
|
||||||
|
CMD="docker-compose"
|
||||||
|
|
||||||
|
# enable rag text and files workers
|
||||||
|
CMD="$CMD --profile rag_text_worker"
|
||||||
|
CMD="$CMD --profile rag_files_worker"
|
||||||
|
|
||||||
|
# enable rag urls worker
|
||||||
|
if [ "$USE_RAG_SCRAPING" = "true" ]; then
|
||||||
|
CMD="$CMD --profile rag_urls_worker"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Add more mappings as needed
|
||||||
|
# if [ "$SOME_OTHER_ENV" = "true" ]; then
|
||||||
|
# CMD="$CMD --profile some_other_profile"
|
||||||
|
# fi
|
||||||
|
|
||||||
|
# Add the up and build flags at the end
|
||||||
|
CMD="$CMD up --build"
|
||||||
|
|
||||||
|
echo "Running: $CMD"
|
||||||
|
exec $CMD
|
||||||
Loading…
Add table
Add a link
Reference in a new issue