mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-06 19:35:44 +02:00
commit
7400050aef
4 changed files with 39 additions and 37 deletions
|
|
@ -1,12 +1,12 @@
|
|||
# Using RAG in Rowboat
|
||||
|
||||
Rowboat provides multiple ways to enhance your agents with Retrieval-Augmented Generation (RAG). This guide will help you set up and use each RAG feature.
|
||||
Rowboat provides multiple ways to enhance your agents' context with Retrieval-Augmented Generation (RAG). This guide will help you set up and use each RAG features.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Text RAG and local file uploads are enabled by default - no configuration needed! Just start using them right away.
|
||||
|
||||
## Available RAG Features
|
||||
## RAG Features
|
||||
|
||||
### 1. Text RAG
|
||||
✅ Enabled by default:
|
||||
|
|
@ -21,8 +21,28 @@ Text RAG and local file uploads are enabled by default - no configuration needed
|
|||
- Files are stored locally
|
||||
- No configuration required
|
||||
- Files are parsed using OpenAI by default
|
||||
- For larger files, we recommend using Gemini models - see section below.
|
||||
|
||||
### 3. S3 File Uploads
|
||||
#### 2.1 Using Gemini for File Parsing
|
||||
To use Google's Gemini model for parsing uploaded PDFs, set the following variable:
|
||||
|
||||
```bash
|
||||
# Enable Gemini for file parsing
|
||||
export USE_GEMINI_FILE_PARSING=true
|
||||
export GOOGLE_API_KEY=your_google_api_key
|
||||
```
|
||||
|
||||
### 3. URL Scraping
|
||||
Rowboat uses Firecrawl for URL scraping. To enable URL scraping, set the following variables:
|
||||
|
||||
```bash
|
||||
export USE_RAG_SCRAPING=true
|
||||
export FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||
```
|
||||
|
||||
## Advanced RAG features
|
||||
|
||||
### 1. File Uploads Backed by S3
|
||||
To enable S3 file uploads, set the following variables:
|
||||
|
||||
```bash
|
||||
|
|
@ -36,20 +56,8 @@ export RAG_UPLOADS_S3_BUCKET=your_bucket_name
|
|||
export RAG_UPLOADS_S3_REGION=your_region
|
||||
```
|
||||
|
||||
### 4. URL Scraping
|
||||
To enable URL scraping, set the following variables:
|
||||
### 2. Changing Default Parsing Model
|
||||
|
||||
```bash
|
||||
# Enable URL scraping
|
||||
export USE_RAG_SCRAPING=true
|
||||
|
||||
# Firecrawl API key for web scraping
|
||||
export FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||
```
|
||||
|
||||
## File Parsing Options
|
||||
|
||||
### Default Parsing (OpenAI)
|
||||
By default, uploaded PDF files are parsed using `gpt-4o`. You can customize this by setting the following:
|
||||
|
||||
```bash
|
||||
|
|
@ -64,16 +72,7 @@ export FILE_PARSING_PROVIDER_BASE_URL=your-provider-base-url
|
|||
export FILE_PARSING_PROVIDER_API_KEY=your-provider-api-key
|
||||
```
|
||||
|
||||
### Using Gemini for File Parsing
|
||||
To use Google's Gemini model for parsing uploaded PDFs, set the following variable:
|
||||
|
||||
```bash
|
||||
# Enable Gemini for file parsing
|
||||
export USE_GEMINI_FILE_PARSING=true
|
||||
export GOOGLE_API_KEY=your_google_api_key
|
||||
```
|
||||
|
||||
## Embedding Model options
|
||||
### 3. Embedding Model Options
|
||||
|
||||
By default, Rowboat uses OpenAI's `text-embedding-3-small` model for generating embeddings. You can customize this by setting the following:
|
||||
|
||||
|
|
@ -101,4 +100,4 @@ export EMBEDDING_PROVIDER_BASE_URL=your-provider-base-url
|
|||
export EMBEDDING_PROVIDER_API_KEY=your-provider-api-key
|
||||
```
|
||||
|
||||
If you don't specify the provider settings, Rowboat will use OpenAI as the default provider.
|
||||
If you don't specify the provider settings, Rowboat will use OpenAI as the default provider.
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ function McpServersSection({
|
|||
/>
|
||||
<Input
|
||||
label="SSE URL"
|
||||
placeholder="https://localhost:8000/sse"
|
||||
placeholder="http://host.docker.internal:8000/sse"
|
||||
value={newServer.url}
|
||||
onChange={(e) => {
|
||||
setNewServer({ ...newServer, url: e.target.value });
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ function McpServersSection({ projectId }: { projectId: string }) {
|
|||
<div className="space-y-2">
|
||||
<label className="text-sm font-medium">SSE URL</label>
|
||||
<Input
|
||||
placeholder="https://localhost:8000/sse"
|
||||
placeholder="http://host.docker.internal:8000/sse"
|
||||
value={newServer.url}
|
||||
onChange={(e) => {
|
||||
setNewServer({ ...newServer, url: e.target.value });
|
||||
|
|
|
|||
|
|
@ -153,8 +153,8 @@ async def run_turn_streamed(
|
|||
# Initialize agents and get external tools
|
||||
|
||||
new_agents = get_agents(
|
||||
agent_configs=agent_configs,
|
||||
tool_configs=tool_configs,
|
||||
agent_configs=agent_configs,
|
||||
tool_configs=tool_configs,
|
||||
complete_request=complete_request
|
||||
)
|
||||
new_agents = add_child_transfer_related_instructions_to_agents(new_agents)
|
||||
|
|
@ -196,12 +196,15 @@ async def run_turn_streamed(
|
|||
if event.type == "raw_response_event":
|
||||
# Handle token usage counting
|
||||
if hasattr(event.data, 'type') and event.data.type == "response.completed" and hasattr(event.data.response, 'usage'):
|
||||
tokens_used["total"] += event.data.response.usage.total_tokens
|
||||
tokens_used["prompt"] += event.data.response.usage.input_tokens
|
||||
tokens_used["completion"] += event.data.response.usage.output_tokens
|
||||
print('-'*50)
|
||||
print(f"Found usage information. Updated cumulative tokens: {tokens_used}")
|
||||
print('-'*50)
|
||||
try:
|
||||
tokens_used["total"] += event.data.response.usage.total_tokens
|
||||
tokens_used["prompt"] += event.data.response.usage.input_tokens
|
||||
tokens_used["completion"] += event.data.response.usage.output_tokens
|
||||
print('-'*50)
|
||||
print(f"Found usage information. Updated cumulative tokens: {tokens_used}")
|
||||
print('-'*50)
|
||||
except Exception as e:
|
||||
print(f"Warning: Tokens used is likely not available for your chosen model: {e}")
|
||||
|
||||
web_search_messages = handle_web_search_event(event, current_agent)
|
||||
for message in web_search_messages:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue