DDD refactor: data-sources (#205)

This commit is contained in:
Ramnique Singh 2025-08-17 08:06:17 +05:30 committed by GitHub
parent 912c8be156
commit 4b33b20e76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
68 changed files with 2589 additions and 1588 deletions

View file

@ -5,13 +5,12 @@ import {
} from "../lib/types/copilot_types";
import {
Workflow} from "../lib/types/workflow_types";
import { DataSource } from "../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from 'zod';
import { projectAuthCheck } from "./project.actions";
import { redisClient } from "../lib/redis";
import { authorizeUserAction, logUsage } from "./billing.actions";
import { USE_BILLING } from "../lib/feature_flags";
import { WithStringId } from "../lib/types/types";
import { getEditAgentInstructionsResponse } from "../lib/copilot/copilot";
import { container } from "@/di/container";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
@ -24,7 +23,7 @@ export async function getCopilotResponseStream(
messages: z.infer<typeof CopilotMessage>[],
current_workflow_config: z.infer<typeof Workflow>,
context: z.infer<typeof CopilotChatContext> | null,
dataSources?: WithStringId<z.infer<typeof DataSource>>[]
dataSources?: z.infer<typeof DataSource>[]
): Promise<{
streamId: string;
} | { billingError: string }> {

View file

@ -1,41 +1,53 @@
'use server';
import { ObjectId, WithId } from "mongodb";
import { dataSourcesCollection, dataSourceDocsCollection } from "../lib/mongodb";
import { z } from 'zod';
import { GetObjectCommand, PutObjectCommand } from "@aws-sdk/client-s3";
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
import { projectAuthCheck } from "./project.actions";
import { WithStringId } from "../lib/types/types";
import { DataSourceDoc } from "../lib/types/datasource_types";
import { DataSource } from "../lib/types/datasource_types";
import { uploadsS3Client } from "../lib/uploads_s3_client";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { DataSource } from "@/src/entities/models/data-source";
import { container } from "@/di/container";
import { IFetchDataSourceController } from "@/src/interface-adapters/controllers/data-sources/fetch-data-source.controller";
import { authCheck } from "./auth.actions";
import { IListDataSourcesController } from "@/src/interface-adapters/controllers/data-sources/list-data-sources.controller";
import { ICreateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/create-data-source.controller";
import { IRecrawlWebDataSourceController } from "@/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller";
import { IDeleteDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-data-source.controller";
import { IToggleDataSourceController } from "@/src/interface-adapters/controllers/data-sources/toggle-data-source.controller";
import { IAddDocsToDataSourceController } from "@/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller";
import { IListDocsInDataSourceController } from "@/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller";
import { IDeleteDocFromDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller";
import { IGetDownloadUrlForFileController } from "@/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller";
import { IGetUploadUrlsForFilesController } from "@/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller";
import { IUpdateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/update-data-source.controller";
export async function getDataSource(projectId: string, sourceId: string): Promise<WithStringId<z.infer<typeof DataSource>>> {
await projectAuthCheck(projectId);
const source = await dataSourcesCollection.findOne({
_id: new ObjectId(sourceId),
projectId,
const fetchDataSourceController = container.resolve<IFetchDataSourceController>("fetchDataSourceController");
const listDataSourcesController = container.resolve<IListDataSourcesController>("listDataSourcesController");
const createDataSourceController = container.resolve<ICreateDataSourceController>("createDataSourceController");
const recrawlWebDataSourceController = container.resolve<IRecrawlWebDataSourceController>("recrawlWebDataSourceController");
const deleteDataSourceController = container.resolve<IDeleteDataSourceController>("deleteDataSourceController");
const toggleDataSourceController = container.resolve<IToggleDataSourceController>("toggleDataSourceController");
const addDocsToDataSourceController = container.resolve<IAddDocsToDataSourceController>("addDocsToDataSourceController");
const listDocsInDataSourceController = container.resolve<IListDocsInDataSourceController>("listDocsInDataSourceController");
const deleteDocFromDataSourceController = container.resolve<IDeleteDocFromDataSourceController>("deleteDocFromDataSourceController");
const getDownloadUrlForFileController = container.resolve<IGetDownloadUrlForFileController>("getDownloadUrlForFileController");
const getUploadUrlsForFilesController = container.resolve<IGetUploadUrlsForFilesController>("getUploadUrlsForFilesController");
const updateDataSourceController = container.resolve<IUpdateDataSourceController>("updateDataSourceController");
export async function getDataSource(sourceId: string): Promise<z.infer<typeof DataSource>> {
const user = await authCheck();
return await fetchDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
});
if (!source) {
throw new Error('Invalid data source');
}
const { _id, ...rest } = source;
return {
...rest,
_id: _id.toString(),
};
}
export async function listDataSources(projectId: string): Promise<WithStringId<z.infer<typeof DataSource>>[]> {
await projectAuthCheck(projectId);
const sources = await dataSourcesCollection.find({
projectId: projectId,
status: { $ne: 'deleted' },
}).toArray();
return sources.map((s) => ({
...s,
_id: s._id.toString(),
}));
export async function listDataSources(projectId: string): Promise<z.infer<typeof DataSource>[]> {
const user = await authCheck();
return await listDataSourcesController.execute({
caller: 'user',
userId: user._id,
projectId,
});
}
export async function createDataSource({
@ -50,272 +62,124 @@ export async function createDataSource({
description?: string,
data: z.infer<typeof DataSource>['data'],
status?: 'pending' | 'ready',
}): Promise<WithStringId<z.infer<typeof DataSource>>> {
await projectAuthCheck(projectId);
const source: z.infer<typeof DataSource> = {
projectId: projectId,
active: true,
name: name,
description,
createdAt: (new Date()).toISOString(),
attempts: 0,
version: 1,
data,
};
// Only set status for non-file data sources
if (data.type !== 'files_local' && data.type !== 'files_s3') {
source.status = status;
}
await dataSourcesCollection.insertOne(source);
const { _id, ...rest } = source as WithId<z.infer<typeof DataSource>>;
return {
...rest,
_id: _id.toString(),
};
}
export async function recrawlWebDataSource(projectId: string, sourceId: string) {
await projectAuthCheck(projectId);
const source = await getDataSource(projectId, sourceId);
if (source.data.type !== 'urls') {
throw new Error('Invalid data source type');
}
// mark all files as queued
await dataSourceDocsCollection.updateMany({
sourceId: sourceId,
}, {
$set: {
status: 'pending',
lastUpdatedAt: (new Date()).toISOString(),
attempts: 0,
}
});
// mark data source as pending
await dataSourcesCollection.updateOne({
_id: new ObjectId(sourceId),
}, {
$set: {
status: 'pending',
billingError: undefined,
lastUpdatedAt: (new Date()).toISOString(),
attempts: 0,
},
$inc: {
version: 1,
}): Promise<z.infer<typeof DataSource>> {
const user = await authCheck();
return await createDataSourceController.execute({
caller: 'user',
userId: user._id,
data: {
projectId,
name,
description: description || '',
status,
data,
},
});
}
export async function deleteDataSource(projectId: string, sourceId: string) {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
export async function recrawlWebDataSource(sourceId: string) {
const user = await authCheck();
// mark data source as deleted
await dataSourcesCollection.updateOne({
_id: new ObjectId(sourceId),
}, {
$set: {
status: 'deleted',
billingError: undefined,
lastUpdatedAt: (new Date()).toISOString(),
attempts: 0,
},
$inc: {
version: 1,
},
return await recrawlWebDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
});
}
export async function toggleDataSource(projectId: string, sourceId: string, active: boolean) {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
export async function deleteDataSource(sourceId: string) {
const user = await authCheck();
await dataSourcesCollection.updateOne({
"_id": new ObjectId(sourceId),
"projectId": projectId,
}, {
$set: {
"active": active,
}
return await deleteDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
});
}
export async function toggleDataSource(sourceId: string, active: boolean) {
const user = await authCheck();
return await toggleDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
active,
});
}
export async function addDocsToDataSource({
projectId,
sourceId,
docData,
}: {
projectId: string,
sourceId: string,
docData: {
_id?: string,
name: string,
data: z.infer<typeof DataSourceDoc>['data']
}[]
}): Promise<void> {
await projectAuthCheck(projectId);
const source = await getDataSource(projectId, sourceId);
const user = await authCheck();
await dataSourceDocsCollection.insertMany(docData.map(doc => {
const record: z.infer<typeof DataSourceDoc> = {
sourceId,
name: doc.name,
status: 'pending',
createdAt: new Date().toISOString(),
data: doc.data,
version: 1,
};
if (!doc._id) {
return record;
}
const recordWithId = record as WithId<z.infer<typeof DataSourceDoc>>;
recordWithId._id = new ObjectId(doc._id);
return recordWithId;
}));
// Only set status to pending when files are added
if (docData.length > 0 && (source.data.type === 'files_local' || source.data.type === 'files_s3')) {
await dataSourcesCollection.updateOne(
{ _id: new ObjectId(sourceId) },
{
$set: {
status: 'pending',
billingError: undefined,
attempts: 0,
lastUpdatedAt: new Date().toISOString(),
},
$inc: {
version: 1,
},
}
);
}
return await addDocsToDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
docs: docData,
});
}
export async function listDocsInDataSource({
projectId,
sourceId,
page = 1,
limit = 10,
}: {
projectId: string,
sourceId: string,
page?: number,
limit?: number,
}): Promise<{
files: WithStringId<z.infer<typeof DataSourceDoc>>[],
files: z.infer<typeof DataSourceDoc>[],
total: number
}> {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
const user = await authCheck();
// Get total count
const total = await dataSourceDocsCollection.countDocuments({
const docs = await listDocsInDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
status: { $ne: 'deleted' },
});
// Fetch docs with pagination
const docs = await dataSourceDocsCollection.find({
sourceId,
status: { $ne: 'deleted' },
})
.skip((page - 1) * limit)
.limit(limit)
.toArray();
return {
files: docs.map(f => ({ ...f, _id: f._id.toString() })),
total
files: docs,
total: docs.length,
};
}
export async function deleteDocsFromDataSource({
projectId,
sourceId,
docIds,
export async function deleteDocFromDataSource({
docId,
}: {
projectId: string,
sourceId: string,
docIds: string[],
docId: string,
}): Promise<void> {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
// mark for deletion
await dataSourceDocsCollection.updateMany(
{
sourceId,
_id: {
$in: docIds.map(id => new ObjectId(id))
}
},
{
$set: {
status: "deleted",
lastUpdatedAt: new Date().toISOString(),
},
$inc: {
version: 1,
},
}
);
// mark data source as pending
await dataSourcesCollection.updateOne({
_id: new ObjectId(sourceId),
}, {
$set: {
status: 'pending',
billingError: undefined,
attempts: 0,
lastUpdatedAt: new Date().toISOString(),
},
$inc: {
version: 1,
},
const user = await authCheck();
return await deleteDocFromDataSourceController.execute({
caller: 'user',
userId: user._id,
docId,
});
}
export async function getDownloadUrlForFile(
projectId: string,
sourceId: string,
fileId: string
): Promise<string> {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
const file = await dataSourceDocsCollection.findOne({
sourceId,
_id: new ObjectId(fileId),
'data.type': { $in: ['file_local', 'file_s3'] },
const user = await authCheck();
return await getDownloadUrlForFileController.execute({
caller: 'user',
userId: user._id,
fileId,
});
if (!file) {
throw new Error('File not found');
}
// if local, return path
if (file.data.type === 'file_local') {
return `/api/uploads/${fileId}`;
} else if (file.data.type === 'file_s3') {
const command = new GetObjectCommand({
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
Key: file.data.s3Key,
});
return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute
}
throw new Error('Invalid file type');
}
export async function getUploadUrlsForFilesDataSource(
projectId: string,
sourceId: string,
files: { name: string; type: string; size: number }[]
): Promise<{
@ -323,70 +187,31 @@ export async function getUploadUrlsForFilesDataSource(
uploadUrl: string,
path: string,
}[]> {
await projectAuthCheck(projectId);
const source = await getDataSource(projectId, sourceId);
if (source.data.type !== 'files_local' && source.data.type !== 'files_s3') {
throw new Error('Invalid files data source');
}
const user = await authCheck();
const urls: {
fileId: string,
uploadUrl: string,
path: string,
}[] = [];
for (const file of files) {
const fileId = new ObjectId().toString();
if (source.data.type === 'files_s3') {
// Generate presigned URL
const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId
const path = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`;
const command = new PutObjectCommand({
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
Key: path,
ContentType: file.type,
});
const uploadUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes
urls.push({
fileId,
uploadUrl,
path,
});
} else if (source.data.type === 'files_local') {
// Generate local upload URL
urls.push({
fileId,
uploadUrl: '/api/uploads/' + fileId,
path: '/api/uploads/' + fileId,
});
}
}
return urls;
return await getUploadUrlsForFilesController.execute({
caller: 'user',
userId: user._id,
sourceId,
files,
});
}
export async function updateDataSource({
projectId,
sourceId,
description,
}: {
projectId: string,
sourceId: string,
description: string,
}) {
await projectAuthCheck(projectId);
await getDataSource(projectId, sourceId);
const user = await authCheck();
await dataSourcesCollection.updateOne({
_id: new ObjectId(sourceId),
}, {
$set: {
return await updateDataSourceController.execute({
caller: 'user',
userId: user._id,
sourceId,
data: {
description,
lastUpdatedAt: (new Date()).toISOString(),
},
$inc: {
version: 1,
},
});
}

View file

@ -1,6 +1,6 @@
'use server';
import { redirect } from "next/navigation";
import { db, dataSourcesCollection, projectsCollection } from "../lib/mongodb";
import { db, projectsCollection } from "../lib/mongodb";
import { z } from 'zod';
import crypto from 'crypto';
import { revalidatePath } from "next/cache";
@ -12,13 +12,16 @@ import { Project } from "../lib/types/project_types";
import { USE_AUTH } from "../lib/feature_flags";
import { authorizeUserAction } from "./billing.actions";
import { Workflow } from "../lib/types/workflow_types";
import { container } from "@/di/container";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { ICreateApiKeyController } from "@/src/interface-adapters/controllers/api-keys/create-api-key.controller";
import { IListApiKeysController } from "@/src/interface-adapters/controllers/api-keys/list-api-keys.controller";
import { IDeleteApiKeyController } from "@/src/interface-adapters/controllers/api-keys/delete-api-key.controller";
import { IApiKeysRepository } from "@/src/application/repositories/api-keys.repository.interface";
import { IProjectMembersRepository } from "@/src/application/repositories/project-members.repository.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { container } from "@/di/container";
import { qdrantClient } from "../lib/qdrant";
const projectActionAuthorizationPolicy = container.resolve<IProjectActionAuthorizationPolicy>('projectActionAuthorizationPolicy');
const createApiKeyController = container.resolve<ICreateApiKeyController>('createApiKeyController');
@ -26,6 +29,8 @@ const listApiKeysController = container.resolve<IListApiKeysController>('listApi
const deleteApiKeyController = container.resolve<IDeleteApiKeyController>('deleteApiKeyController');
const apiKeysRepository = container.resolve<IApiKeysRepository>('apiKeysRepository');
const projectMembersRepository = container.resolve<IProjectMembersRepository>('projectMembersRepository');
const dataSourcesRepository = container.resolve<IDataSourcesRepository>('dataSourcesRepository');
const dataSourceDocsRepository = container.resolve<IDataSourceDocsRepository>('dataSourceDocsRepository');
export async function listTemplates() {
const templatesArray = Object.entries(templates)
@ -234,22 +239,15 @@ export async function deleteProject(projectId: string) {
// delete api keys
await apiKeysRepository.deleteAll(projectId);
// delete embeddings
const sources = await dataSourcesCollection.find({
projectId,
}, {
projection: {
_id: true,
}
}).toArray();
const ids = sources.map(s => s._id);
// delete data sources
await dataSourcesCollection.deleteMany({
_id: {
$in: ids,
}
// delete data sources data
await dataSourceDocsRepository.deleteByProjectId(projectId);
await dataSourcesRepository.deleteByProjectId(projectId);
await qdrantClient.delete("embeddings", {
filter: {
must: [
{ key: "projectId", match: { value: projectId } },
],
},
});
// delete project members

View file

@ -2,11 +2,13 @@ import { NextRequest, NextResponse } from 'next/server';
import path from 'path';
import fs from 'fs/promises';
import fsSync from 'fs';
import { dataSourceDocsCollection } from '@/app/lib/mongodb';
import { ObjectId } from 'mongodb';
import { container } from '@/di/container';
import { IDataSourceDocsRepository } from '@/src/application/repositories/data-source-docs.repository.interface';
const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads';
const dataSourceDocsRepository = container.resolve<IDataSourceDocsRepository>('dataSourceDocsRepository');
// PUT endpoint to handle file uploads
export async function PUT(request: NextRequest, props: { params: Promise<{ fileId: string }> }) {
const params = await props.params;
@ -39,10 +41,8 @@ export async function GET(request: NextRequest, props: { params: Promise<{ fileI
return NextResponse.json({ error: 'Missing file ID' }, { status: 400 });
}
const filePath = path.join(UPLOADS_DIR, fileId);
// get mimetype from database
const doc = await dataSourceDocsCollection.findOne({ _id: new ObjectId(fileId) });
const doc = await dataSourceDocsRepository.fetch(fileId);
if (!doc) {
return NextResponse.json({ error: 'File not found' }, { status: 404 });
}
@ -54,6 +54,9 @@ export async function GET(request: NextRequest, props: { params: Promise<{ fileI
const fileName = doc.data.name;
try {
// strip uploads dir from path
const filePath = path.join(UPLOADS_DIR, doc.data.path.split('/api/uploads/')[1]);
// Check if file exists
await fs.access(filePath);
// Create a readable stream

View file

@ -2,7 +2,6 @@
import { tool, Tool } from "@openai/agents";
import { createOpenAI } from "@ai-sdk/openai";
import { embed, generateText } from "ai";
import { ObjectId } from "mongodb";
import { z } from "zod";
import { composio } from "./composio/composio";
import { SignJWT } from "jose";
@ -11,12 +10,16 @@ import crypto from "crypto";
// Internal dependencies
import { embeddingModel } from '../lib/embedding';
import { getMcpClient } from "./mcp";
import { dataSourceDocsCollection, dataSourcesCollection, projectsCollection } from "./mongodb";
import { projectsCollection } from "./mongodb";
import { qdrantClient } from '../lib/qdrant';
import { EmbeddingRecord } from "./types/datasource_types";
import { WorkflowAgent, WorkflowTool } from "./types/workflow_types";
import { PrefixLogger } from "./utils";
import { UsageTracker } from "./billing";
import { DataSource } from "@/src/entities/models/data-source";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { container } from "@/di/container";
// Provider configuration
const PROVIDER_API_KEY = process.env.PROVIDER_API_KEY || process.env.OPENAI_API_KEY || '';
@ -92,6 +95,9 @@ export async function invokeRagTool(
logger.log(`returnType: ${returnType}`);
logger.log(`k: ${k}`);
const dataSourcesRepository = container.resolve<IDataSourcesRepository>('dataSourcesRepository');
const dataSourceDocsRepository = container.resolve<IDataSourceDocsRepository>('dataSourceDocsRepository');
// Create embedding for question
const { embedding, usage } = await embed({
model: embeddingModel,
@ -109,14 +115,19 @@ export async function invokeRagTool(
});
// Fetch all data sources for this project
const sources = await dataSourcesCollection.find({
projectId: projectId,
active: true,
}).toArray();
const sources: z.infer<typeof DataSource>[] = [];
let cursor = undefined;
do {
const resp = await dataSourcesRepository.list(projectId, {
active: true,
}, cursor);
sources.push(...resp.items);
cursor = resp.nextCursor;
} while(cursor);
const validSourceIds = sources
.filter(s => sourceIds.includes(s._id.toString())) // id should be in sourceIds
.filter(s => s.active) // should be active
.map(s => s._id.toString());
.filter(s => sourceIds.includes(s.id)) // id should be in sourceIds
.map(s => s.id);
logger.log(`valid source ids: ${validSourceIds.join(', ')}`);
// if no sources found, return empty response
@ -157,14 +168,12 @@ export async function invokeRagTool(
}
// otherwise, fetch the doc contents from mongodb
const docs = await dataSourceDocsCollection.find({
_id: { $in: results.map(r => new ObjectId(r.docId)) },
}).toArray();
const docs = await dataSourceDocsRepository.bulkFetch(results.map(r => r.docId));
logger.log(`fetched docs: ${docs.length}`);
// map the results to the docs
results = results.map(r => {
const doc = docs.find(d => d._id.toString() === r.docId);
const doc = docs.find(d => d.id === r.docId);
return {
...r,
content: doc?.content || '',

View file

@ -1,10 +1,9 @@
import z from "zod";
import { createOpenAI } from "@ai-sdk/openai";
import { generateObject, streamText, tool } from "ai";
import { WithStringId } from "../types/types";
import { Workflow, WorkflowTool } from "../types/workflow_types";
import { CopilotChatContext, CopilotMessage } from "../types/copilot_types";
import { DataSource } from "../types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { PrefixLogger } from "../utils";
import zodToJsonSchema from "zod-to-json-schema";
import { COPILOT_INSTRUCTIONS_EDIT_AGENT } from "./copilot_edit_agent";
@ -102,11 +101,11 @@ ${JSON.stringify(workflow)}
`;
}
function getDataSourcesPrompt(dataSources: WithStringId<z.infer<typeof DataSource>>[]): string {
function getDataSourcesPrompt(dataSources: z.infer<typeof DataSource>[]): string {
let prompt = '';
if (dataSources.length > 0) {
const simplifiedDataSources = dataSources.map(ds => ({
id: ds._id,
id: ds.id,
name: ds.name,
description: ds.description,
data: ds.data,
@ -274,7 +273,7 @@ export async function* streamMultiAgentResponse(
context: z.infer<typeof CopilotChatContext> | null,
messages: z.infer<typeof CopilotMessage>[],
workflow: z.infer<typeof Workflow>,
dataSources: WithStringId<z.infer<typeof DataSource>>[]
dataSources: z.infer<typeof DataSource>[]
): AsyncIterable<z.infer<typeof ZEvent>> {
const logger = new PrefixLogger('copilot /stream');
logger.log('context', context);

View file

@ -2,8 +2,6 @@ import { MongoClient } from "mongodb";
import { User } from "./types/types";
import { Workflow } from "./types/workflow_types";
import { Project } from "./types/project_types";
import { DataSourceDoc } from "./types/datasource_types";
import { DataSource } from "./types/datasource_types";
import { TwilioConfig, TwilioInboundCall } from "./types/voice_types";
import { z } from 'zod';
import { apiV1 } from "rowboat-shared";
@ -11,8 +9,6 @@ import { apiV1 } from "rowboat-shared";
const client = new MongoClient(process.env["MONGODB_CONNECTION_STRING"] || "mongodb://localhost:27017");
export const db = client.db("rowboat");
export const dataSourcesCollection = db.collection<z.infer<typeof DataSource>>("sources");
export const dataSourceDocsCollection = db.collection<z.infer<typeof DataSourceDoc>>("source_docs");
export const projectsCollection = db.collection<z.infer<typeof Project>>("projects");
export const agentWorkflowsCollection = db.collection<z.infer<typeof Workflow>>("agent_workflows");
export const chatsCollection = db.collection<z.infer<typeof apiV1.Chat>>("chats");

View file

@ -1,7 +1,7 @@
import { z } from "zod";
import { Workflow } from "./workflow_types";
import { Message } from "./types";
import { DataSource } from "./datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
export const CopilotUserMessage = z.object({
role: z.literal('user'),
@ -52,9 +52,7 @@ export const CopilotAPIRequest = z.object({
messages: z.array(CopilotMessage),
workflow: Workflow,
context: CopilotChatContext.nullable(),
dataSources: z.array(DataSource.extend({
_id: z.string(),
})).optional(),
dataSources: z.array(DataSource).optional(),
});
export const CopilotAPIResponse = z.union([
z.object({

View file

@ -1,90 +1,5 @@
import { z } from "zod";
export const DataSource = z.object({
name: z.string(),
description: z.string().optional(),
projectId: z.string(),
active: z.boolean().default(true),
status: z.union([
z.literal('pending'),
z.literal('ready'),
z.literal('error'),
z.literal('deleted'),
]).optional(),
version: z.number(),
error: z.string().optional(),
billingError: z.string().optional(),
createdAt: z.string().datetime(),
lastUpdatedAt: z.string().datetime().optional(),
attempts: z.number(),
lastAttemptAt: z.string().datetime().optional(),
pendingRefresh: z.boolean().default(false).optional(),
data: z.discriminatedUnion('type', [
z.object({
type: z.literal('urls'),
}),
z.object({
type: z.literal('files_local'),
}),
z.object({
type: z.literal('files_s3'),
}),
z.object({
type: z.literal('text'),
})
]),
});
export const DataSourceDoc = z.object({
sourceId: z.string(),
name: z.string(),
version: z.number(),
status: z.union([
z.literal('pending'),
z.literal('ready'),
z.literal('error'),
z.literal('deleted'),
]),
content: z.string().optional(),
createdAt: z.string().datetime(),
lastUpdatedAt: z.string().datetime().optional(),
error: z.string().optional(),
data: z.discriminatedUnion('type', [
z.object({
type: z.literal('url'),
url: z.string(),
}),
z.object({
type: z.literal('file_local'),
name: z.string(),
size: z.number(),
mimeType: z.string(),
}),
z.object({
type: z.literal('file_s3'),
name: z.string(),
size: z.number(),
mimeType: z.string(),
s3Key: z.string(),
}),
z.object({
type: z.literal('text'),
content: z.string(),
}),
]),
});
export const EmbeddingDoc = z.object({
content: z.string(),
sourceId: z.string(),
embeddings: z.array(z.number()),
metadata: z.object({
sourceURL: z.string(),
title: z.string(),
score: z.number().optional(),
}),
});
export const EmbeddingRecord = z.object({
id: z.string().uuid(),
vector: z.array(z.number()),

View file

@ -5,7 +5,7 @@ import { useRef, useState, createContext, useContext, useCallback, forwardRef, u
import { CopilotChatContext } from "../../../lib/types/copilot_types";
import { CopilotMessage } from "../../../lib/types/copilot_types";
import { Workflow } from "@/app/lib/types/workflow_types";
import { DataSource } from "@/app/lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { Action as WorkflowDispatch } from "@/app/projects/[projectId]/workflow/workflow_editor";
import { Panel } from "@/components/common/panel-common";
@ -14,7 +14,6 @@ import { Messages } from "./components/messages";
import { CopyIcon, CheckIcon, PlusIcon, XIcon, InfoIcon, Sparkles } from "lucide-react";
import { useCopilot } from "./use-copilot";
import { BillingUpgradeModal } from "@/components/common/billing-upgrade-modal";
import { WithStringId } from "@/app/lib/types/types";
const CopilotContext = createContext<{
workflow: z.infer<typeof Workflow> | null;
@ -33,7 +32,7 @@ interface AppProps {
onCopyJson?: (data: { messages: any[] }) => void;
onMessagesChange?: (messages: z.infer<typeof CopilotMessage>[]) => void;
isInitialState?: boolean;
dataSources?: WithStringId<z.infer<typeof DataSource>>[];
dataSources?: z.infer<typeof DataSource>[];
}
const App = forwardRef<{ handleCopyChat: () => void; handleUserMessage: (message: string) => void }, AppProps>(function App({
@ -277,7 +276,7 @@ export const Copilot = forwardRef<{ handleUserMessage: (message: string) => void
chatContext?: z.infer<typeof CopilotChatContext>;
dispatch: (action: WorkflowDispatch) => void;
isInitialState?: boolean;
dataSources?: WithStringId<z.infer<typeof DataSource>>[];
dataSources?: z.infer<typeof DataSource>[];
}>(({
projectId,
workflow,

View file

@ -2,7 +2,7 @@ import { useCallback, useRef, useState } from "react";
import { getCopilotResponseStream } from "@/app/actions/copilot.actions";
import { CopilotMessage } from "@/app/lib/types/copilot_types";
import { Workflow } from "@/app/lib/types/workflow_types";
import { DataSource } from "@/app/lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { WithStringId } from "@/app/lib/types/types";
@ -10,7 +10,7 @@ interface UseCopilotParams {
projectId: string;
workflow: z.infer<typeof Workflow>;
context: any;
dataSources?: WithStringId<z.infer<typeof DataSource>>[];
dataSources?: z.infer<typeof DataSource>[];
}
interface UseCopilotResult {

View file

@ -1,7 +1,6 @@
"use client";
import { WithStringId } from "../../../lib/types/types";
import { WorkflowPrompt, WorkflowAgent, Workflow, WorkflowTool } from "../../../lib/types/workflow_types";
import { DataSource } from "../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { PlusIcon, Sparkles, X as XIcon, ChevronDown, ChevronRight, Trash2, Maximize2, Minimize2, StarIcon, DatabaseIcon, UserIcon, Settings, Info } from "lucide-react";
import { useState, useEffect, useRef } from "react";
@ -59,7 +58,7 @@ export function AgentConfig({
agents: z.infer<typeof WorkflowAgent>[],
tools: z.infer<typeof WorkflowTool>[],
prompts: z.infer<typeof WorkflowPrompt>[],
dataSources: WithStringId<z.infer<typeof DataSource>>[],
dataSources: z.infer<typeof DataSource>[],
handleUpdate: (agent: z.infer<typeof WorkflowAgent>) => void,
handleClose: () => void,
useRag: boolean,
@ -726,12 +725,12 @@ export function AgentConfig({
startContent={<PlusIcon className="w-4 h-4 text-gray-500" />}
>
{dataSources
.filter((ds) => !(agent.ragDataSources || []).includes(ds._id))
.filter((ds) => !(agent.ragDataSources || []).includes(ds.id))
.length > 0 ? (
dataSources
.filter((ds) => !(agent.ragDataSources || []).includes(ds._id))
.filter((ds) => !(agent.ragDataSources || []).includes(ds.id))
.map((ds) => (
<SelectItem key={ds._id}>
<SelectItem key={ds.id}>
{ds.name}
</SelectItem>
))
@ -775,7 +774,7 @@ export function AgentConfig({
{agent.ragDataSources !== undefined && agent.ragDataSources.length > 0 && (
<div className="flex flex-col gap-2 mt-2">
{(agent.ragDataSources || []).map((source) => {
const ds = dataSources.find((ds) => ds._id === source);
const ds = dataSources.find((ds) => ds.id === source);
return (
<div
key={source}

View file

@ -1,6 +1,5 @@
"use client";
import { WithStringId } from "../../../lib/types/types";
import { DataSource } from "../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { XIcon, FileIcon, GlobeIcon, AlertTriangle, CheckCircle, Circle, ExternalLinkIcon, Type, PlusIcon, Edit3Icon, DownloadIcon, Trash2 } from "lucide-react";
import { useState, useEffect, useCallback } from "react";
@ -8,9 +7,9 @@ import { Panel } from "@/components/common/panel-common";
import { Button } from "@/components/ui/button";
import { DataSourceIcon } from "@/app/lib/components/datasource-icon";
import { Tooltip } from "@heroui/react";
import { getDataSource, listDocsInDataSource, deleteDocsFromDataSource, getDownloadUrlForFile, addDocsToDataSource, getUploadUrlsForFilesDataSource } from "@/app/actions/data-source.actions";
import { getDataSource, listDocsInDataSource, deleteDocFromDataSource, getDownloadUrlForFile, addDocsToDataSource, getUploadUrlsForFilesDataSource } from "@/app/actions/data-source.actions";
import { InputField } from "@/app/lib/components/input-field";
import { DataSourceDoc } from "../../../lib/types/datasource_types";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { RelativeTime } from "@primer/react";
import { Pagination, Spinner, Button as HeroButton, Textarea as HeroTextarea } from "@heroui/react";
import { useDropzone } from "react-dropzone";
@ -24,12 +23,12 @@ export function DataSourceConfig({
handleClose: () => void,
onDataSourceUpdate?: () => void
}) {
const [dataSource, setDataSource] = useState<WithStringId<z.infer<typeof DataSource>> | null>(null);
const [dataSource, setDataSource] = useState<z.infer<typeof DataSource> | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
// Files-related state
const [files, setFiles] = useState<WithStringId<z.infer<typeof DataSourceDoc>>[]>([]);
const [files, setFiles] = useState<z.infer<typeof DataSourceDoc>[]>([]);
const [filesLoading, setFilesLoading] = useState(false);
const [filesPage, setFilesPage] = useState(1);
const [filesTotal, setFilesTotal] = useState(0);
@ -44,22 +43,22 @@ export function DataSourceConfig({
const currentProjectId = pathParts[2]; // /projects/[projectId]/workflow
setProjectId(currentProjectId);
const ds = await getDataSource(currentProjectId, dataSourceId);
const ds = await getDataSource(dataSourceId);
setDataSource(ds);
// Load files if it's a files data source
if (ds.data.type === 'files_local' || ds.data.type === 'files_s3') {
await loadFiles(currentProjectId, dataSourceId, 1);
await loadFiles(dataSourceId, 1);
}
// Load URLs if it's a URLs data source
if (ds.data.type === 'urls') {
await loadUrls(currentProjectId, dataSourceId, 1);
await loadUrls(dataSourceId, 1);
}
// Load text content if it's a text data source
if (ds.data.type === 'text') {
await loadTextContent(currentProjectId, dataSourceId);
await loadTextContent(dataSourceId);
}
} catch (err) {
console.error('Failed to load data source:', err);
@ -91,7 +90,7 @@ export function DataSourceConfig({
}
try {
const updatedSource = await getDataSource(projectId, dataSourceId);
const updatedSource = await getDataSource(dataSourceId);
if (!ignore) {
setDataSource(updatedSource);
onDataSourceUpdate?.(); // Notify parent of status change
@ -124,20 +123,19 @@ export function DataSourceConfig({
// Helper function to update data source and notify parent
const updateDataSourceAndNotify = useCallback(async () => {
try {
const updatedSource = await getDataSource(projectId, dataSourceId);
const updatedSource = await getDataSource(dataSourceId);
setDataSource(updatedSource);
onDataSourceUpdate?.();
} catch (err) {
console.error('Failed to reload data source:', err);
}
}, [projectId, dataSourceId, onDataSourceUpdate]);
}, [dataSourceId, onDataSourceUpdate]);
// Load files function
const loadFiles = async (projectId: string, sourceId: string, page: number) => {
const loadFiles = async (sourceId: string, page: number) => {
try {
setFilesLoading(true);
const { files, total } = await listDocsInDataSource({
projectId,
sourceId,
page,
limit: 10,
@ -153,7 +151,7 @@ export function DataSourceConfig({
};
// URLs-related state
const [urls, setUrls] = useState<WithStringId<z.infer<typeof DataSourceDoc>>[]>([]);
const [urls, setUrls] = useState<z.infer<typeof DataSourceDoc>[]>([]);
const [urlsLoading, setUrlsLoading] = useState(false);
const [urlsPage, setUrlsPage] = useState(1);
const [urlsTotal, setUrlsTotal] = useState(0);
@ -171,11 +169,10 @@ export function DataSourceConfig({
const [uploadingFiles, setUploadingFiles] = useState(false);
// Load URLs function
const loadUrls = async (projectId: string, sourceId: string, page: number) => {
const loadUrls = async (sourceId: string, page: number) => {
try {
setUrlsLoading(true);
const { files, total } = await listDocsInDataSource({
projectId,
sourceId,
page,
limit: 10,
@ -191,11 +188,10 @@ export function DataSourceConfig({
};
// Load text content function
const loadTextContent = async (projectId: string, sourceId: string) => {
const loadTextContent = async (sourceId: string) => {
try {
setTextLoading(true);
const { files } = await listDocsInDataSource({
projectId,
sourceId,
limit: 1,
});
@ -218,13 +214,11 @@ export function DataSourceConfig({
if (!window.confirm('Are you sure you want to delete this file?')) return;
try {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSourceId,
docIds: [fileId],
await deleteDocFromDataSource({
docId: fileId,
});
// Reload files
await loadFiles(projectId, dataSourceId, filesPage);
await loadFiles(dataSourceId, filesPage);
// Reload data source to get updated status
await updateDataSourceAndNotify();
@ -236,7 +230,7 @@ export function DataSourceConfig({
// Handle file download
const handleDownloadFile = async (fileId: string) => {
try {
const url = await getDownloadUrlForFile(projectId, dataSourceId, fileId);
const url = await getDownloadUrlForFile(fileId);
window.open(url, '_blank');
} catch (err) {
console.error('Failed to download file:', err);
@ -245,7 +239,7 @@ export function DataSourceConfig({
// Handle page change
const handlePageChange = (page: number) => {
loadFiles(projectId, dataSourceId, page);
loadFiles(dataSourceId, page);
};
// Handle URL deletion
@ -253,13 +247,11 @@ export function DataSourceConfig({
if (!window.confirm('Are you sure you want to delete this URL?')) return;
try {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSourceId,
docIds: [urlId],
await deleteDocFromDataSource({
docId: urlId,
});
// Reload URLs
await loadUrls(projectId, dataSourceId, urlsPage);
await loadUrls(dataSourceId, urlsPage);
// Reload data source to get updated status
await updateDataSourceAndNotify();
@ -270,7 +262,7 @@ export function DataSourceConfig({
// Handle URL page change
const handleUrlPageChange = (page: number) => {
loadUrls(projectId, dataSourceId, page);
loadUrls(dataSourceId, page);
};
// Handle text content update
@ -279,22 +271,18 @@ export function DataSourceConfig({
try {
// Delete existing text doc if it exists
const { files } = await listDocsInDataSource({
projectId,
sourceId: dataSourceId,
limit: 1,
});
if (files.length > 0) {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSourceId,
docIds: [files[0]._id],
await deleteDocFromDataSource({
docId: files[0].id,
});
}
// Add new text doc
await addDocsToDataSource({
projectId,
sourceId: dataSourceId,
docData: [{
name: 'text',
@ -327,7 +315,6 @@ export function DataSourceConfig({
const first100Urls = urlsArray.slice(0, 100);
await addDocsToDataSource({
projectId,
sourceId: dataSourceId,
docData: first100Urls.map(url => ({
name: url,
@ -339,7 +326,7 @@ export function DataSourceConfig({
});
setShowAddUrlForm(false);
await loadUrls(projectId, dataSourceId, urlsPage);
await loadUrls(dataSourceId, urlsPage);
// Reload data source to get updated status
await updateDataSourceAndNotify();
@ -356,7 +343,7 @@ export function DataSourceConfig({
setUploadingFiles(true);
try {
const urls = await getUploadUrlsForFilesDataSource(projectId, dataSourceId, acceptedFiles.map(file => ({
const urls = await getUploadUrlsForFilesDataSource(dataSourceId, acceptedFiles.map(file => ({
name: file.name,
type: file.type,
size: file.size,
@ -403,17 +390,17 @@ export function DataSourceConfig({
name: file.name,
size: file.size,
mimeType: file.type,
path: urls[index].path,
},
}));
}
await addDocsToDataSource({
projectId,
sourceId: dataSourceId,
docData,
});
await loadFiles(projectId, dataSourceId, filesPage);
await loadFiles(dataSourceId, filesPage);
// Reload data source to get updated status
await updateDataSourceAndNotify();
@ -422,7 +409,7 @@ export function DataSourceConfig({
} finally {
setUploadingFiles(false);
}
}, [projectId, dataSourceId, dataSource, filesPage, updateDataSourceAndNotify]);
}, [dataSourceId, dataSource, filesPage, updateDataSourceAndNotify]);
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop: onFileDrop,
@ -676,7 +663,7 @@ export function DataSourceConfig({
<div className="space-y-2">
{files.map((file) => (
<div
key={file._id}
key={file.id}
className="flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-800/50 rounded-lg border"
>
<div className="flex items-center gap-3 flex-1 min-w-0">
@ -696,7 +683,7 @@ export function DataSourceConfig({
{(file.data.type === 'file_local' || file.data.type === 'file_s3') && (
<Tooltip content="Download file">
<button
onClick={() => handleDownloadFile(file._id)}
onClick={() => handleDownloadFile(file.id)}
className="p-1 hover:bg-gray-200 dark:hover:bg-gray-700 rounded transition-colors"
>
<DownloadIcon className="w-4 h-4 text-gray-500" />
@ -705,7 +692,7 @@ export function DataSourceConfig({
)}
<Tooltip content="Delete file">
<button
onClick={() => handleDeleteFile(file._id)}
onClick={() => handleDeleteFile(file.id)}
className="p-1 hover:bg-red-100 dark:hover:bg-red-900/20 rounded transition-colors"
>
<Trash2 className="w-4 h-4 text-red-500" />
@ -805,7 +792,7 @@ export function DataSourceConfig({
<div className="space-y-2">
{urls.map((url) => (
<div
key={url._id}
key={url.id}
className="flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-800/50 rounded-lg border"
>
<div className="flex items-center gap-3 flex-1 min-w-0">
@ -834,7 +821,7 @@ export function DataSourceConfig({
<div className="flex items-center gap-2">
<Tooltip content="Delete URL">
<button
onClick={() => handleDeleteUrl(url._id)}
onClick={() => handleDeleteUrl(url.id)}
className="p-1 hover:bg-red-100 dark:hover:bg-red-900/20 rounded transition-colors"
>
<Trash2 className="w-4 h-4 text-red-500" />

View file

@ -1,6 +1,5 @@
'use client';
import { WithStringId } from "../../../../lib/types/types";
import { DataSource } from "../../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { ToggleSource } from "../components/toggle-source";
import { Spinner } from "@heroui/react";
import { SourceStatus } from "../components/source-status";
@ -28,14 +27,14 @@ export function SourcePage({
sourceId: string;
projectId: string;
}) {
const [source, setSource] = useState<WithStringId<z.infer<typeof DataSource>> | null>(null);
const [source, setSource] = useState<z.infer<typeof DataSource> | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [showSaveSuccess, setShowSaveSuccess] = useState(false);
const [billingError, setBillingError] = useState<string | null>(null);
async function handleReload() {
setIsLoading(true);
const updatedSource = await getDataSource(projectId, sourceId);
const updatedSource = await getDataSource(sourceId);
setSource(updatedSource);
if ("billingError" in updatedSource && updatedSource.billingError) {
setBillingError(updatedSource.billingError);
@ -48,7 +47,7 @@ export function SourcePage({
let ignore = false;
async function fetchSource() {
setIsLoading(true);
const source = await getDataSource(projectId, sourceId);
const source = await getDataSource(sourceId);
if (!ignore) {
setSource(source);
if ("billingError" in source && source.billingError) {
@ -61,7 +60,7 @@ export function SourcePage({
return () => {
ignore = true;
};
}, [projectId, sourceId]);
}, [sourceId]);
// refresh source data every 15 seconds
// under certain conditions
@ -80,7 +79,7 @@ export function SourcePage({
if (timeout) {
clearTimeout(timeout);
}
const updatedSource = await getDataSource(projectId, sourceId);
const updatedSource = await getDataSource(sourceId);
if (!ignore) {
setSource(updatedSource);
if ("billingError" in updatedSource && updatedSource.billingError) {
@ -130,7 +129,6 @@ export function SourcePage({
<SectionLabel>Toggle</SectionLabel>
<SectionContent>
<ToggleSource
projectId={projectId}
sourceId={sourceId}
active={source.active}
/>
@ -153,7 +151,6 @@ export function SourcePage({
action={async (formData: FormData) => {
const description = formData.get('description') as string;
await updateDataSource({
projectId,
sourceId,
description,
});
@ -217,7 +214,7 @@ export function SourcePage({
<SectionRow>
<SectionLabel>Status</SectionLabel>
<SectionContent>
<SourceStatus status={source.status} projectId={projectId} />
<SourceStatus status={source.status} />
{("billingError" in source) && source.billingError && <div className="flex flex-col gap-1 items-start mt-4">
<div className="text-sm">{source.billingError}</div>
@ -240,14 +237,12 @@ export function SourcePage({
{/* Source-specific sections */}
{source.data.type === 'urls' &&
<ScrapeSource
projectId={projectId}
dataSource={source}
handleReload={handleReload}
/>
}
{(source.data.type === 'files_local' || source.data.type === 'files_s3') &&
<FilesSource
projectId={projectId}
dataSource={source}
handleReload={handleReload}
type={source.data.type}
@ -255,7 +250,6 @@ export function SourcePage({
}
{source.data.type === 'text' &&
<TextSource
projectId={projectId}
dataSource={source}
handleReload={handleReload}
/>
@ -272,7 +266,7 @@ export function SourcePage({
This action cannot be undone.
</p>
</div>
<DeleteSource projectId={projectId} sourceId={sourceId} />
<DeleteSource sourceId={sourceId} />
</div>
</Section>
</div>

View file

@ -4,15 +4,13 @@ import { deleteDataSource } from "../../../../actions/data-source.actions";
import { FormStatusButton } from "../../../../lib/components/form-status-button";
export function DeleteSource({
projectId,
sourceId,
}: {
projectId: string;
sourceId: string;
}) {
function handleDelete() {
if (window.confirm('Are you sure you want to delete this data source?')) {
deleteDataSource(projectId, sourceId);
deleteDataSource(sourceId);
}
}

View file

@ -1,24 +1,20 @@
"use client";
import { WithStringId } from "../../../../lib/types/types";
import { DataSourceDoc, DataSource } from "../../../../lib/types/datasource_types";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { useCallback, useEffect, useState } from "react";
import { useDropzone } from "react-dropzone";
import { deleteDocsFromDataSource, getUploadUrlsForFilesDataSource, addDocsToDataSource, getDownloadUrlForFile, listDocsInDataSource } from "../../../../actions/data-source.actions";
import { deleteDocFromDataSource, getUploadUrlsForFilesDataSource, addDocsToDataSource, getDownloadUrlForFile, listDocsInDataSource } from "../../../../actions/data-source.actions";
import { RelativeTime } from "@primer/react";
import { Pagination, Spinner } from "@heroui/react";
import { DownloadIcon } from "lucide-react";
import { Section } from "./section";
function FileListItem({
projectId,
sourceId,
file,
onDelete,
}: {
projectId: string,
sourceId: string,
file: WithStringId<z.infer<typeof DataSourceDoc>>,
file: z.infer<typeof DataSourceDoc>,
onDelete: (fileId: string) => Promise<void>;
}) {
const [isDeleting, setIsDeleting] = useState(false);
@ -27,7 +23,7 @@ function FileListItem({
const handleDeleteClick = async () => {
setIsDeleting(true);
try {
await onDelete(file._id);
await onDelete(file.id);
} finally {
setIsDeleting(false);
}
@ -36,7 +32,7 @@ function FileListItem({
const handleDownloadClick = async () => {
setIsDownloading(true);
try {
const url = await getDownloadUrlForFile(projectId, sourceId, file._id);
const url = await getDownloadUrlForFile(file.id);
window.open(url, '_blank');
} catch (error) {
console.error('Download failed:', error);
@ -90,17 +86,15 @@ function FileListItem({
}
function PaginatedFileList({
projectId,
sourceId,
handleReload,
onDelete,
}: {
projectId: string,
sourceId: string,
handleReload: () => void;
onDelete: (fileId: string) => Promise<void>;
}) {
const [files, setFiles] = useState<WithStringId<z.infer<typeof DataSourceDoc>>[]>([]);
const [files, setFiles] = useState<z.infer<typeof DataSourceDoc>[]>([]);
const [page, setPage] = useState(1);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(false);
@ -114,7 +108,6 @@ function PaginatedFileList({
setLoading(true);
try {
const { files, total } = await listDocsInDataSource({
projectId,
sourceId,
page,
limit: 10,
@ -134,7 +127,7 @@ function PaginatedFileList({
return () => {
ignore = true;
}
}, [projectId, sourceId, page]);
}, [sourceId, page]);
return (
<div className="space-y-4">
@ -154,10 +147,8 @@ function PaginatedFileList({
<div className="space-y-3">
{files.map(file => (
<FileListItem
key={file._id}
key={file.id}
file={file}
projectId={projectId}
sourceId={sourceId}
onDelete={onDelete}
/>
))}
@ -177,13 +168,11 @@ function PaginatedFileList({
}
export function FilesSource({
projectId,
dataSource,
handleReload,
type,
}: {
projectId: string,
dataSource: WithStringId<z.infer<typeof DataSource>>,
dataSource: z.infer<typeof DataSource>,
handleReload: () => void;
type: 'files_local' | 'files_s3';
}) {
@ -193,7 +182,7 @@ export function FilesSource({
const onDrop = useCallback(async (acceptedFiles: File[]) => {
setUploading(true);
try {
const urls = await getUploadUrlsForFilesDataSource(projectId, dataSource._id, acceptedFiles.map(file => ({
const urls = await getUploadUrlsForFilesDataSource(dataSource.id, acceptedFiles.map(file => ({
name: file.name,
type: file.type,
size: file.size,
@ -237,13 +226,13 @@ export function FilesSource({
name: file.name,
size: file.size,
mimeType: file.type,
path: urls[index].path,
},
}));
}
await addDocsToDataSource({
projectId,
sourceId: dataSource._id,
sourceId: dataSource.id,
docData,
});
@ -255,7 +244,7 @@ export function FilesSource({
} finally {
setUploading(false);
}
}, [projectId, dataSource._id, handleReload, type]);
}, [dataSource.id, handleReload, type]);
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
@ -299,14 +288,11 @@ export function FilesSource({
<PaginatedFileList
key={fileListKey}
projectId={projectId}
sourceId={dataSource._id}
sourceId={dataSource.id}
handleReload={handleReload}
onDelete={async (docId) => {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSource._id,
docIds: [docId],
await deleteDocFromDataSource({
docId: docId,
});
handleReload();
setFileListKey(prev => prev + 1);

View file

@ -1,9 +1,9 @@
"use client";
import { WithStringId } from "../../../../lib/types/types";
import { DataSourceDoc, DataSource } from "../../../../lib/types/datasource_types";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { Recrawl } from "./web-recrawl";
import { deleteDocsFromDataSource, listDocsInDataSource, recrawlWebDataSource, addDocsToDataSource } from "../../../../actions/data-source.actions";
import { deleteDocFromDataSource, listDocsInDataSource, recrawlWebDataSource, addDocsToDataSource } from "../../../../actions/data-source.actions";
import { useState, useEffect } from "react";
import { Spinner, Pagination } from "@heroui/react";
import { ExternalLinkIcon, PlusIcon } from "lucide-react";
@ -13,7 +13,7 @@ import { Textarea } from "@/components/ui/textarea";
import { Section } from "./section";
function UrlListItem({ file, onDelete }: {
file: WithStringId<z.infer<typeof DataSourceDoc>>,
file: z.infer<typeof DataSourceDoc>,
onDelete: (fileId: string) => Promise<void>;
}) {
const [isDeleting, setIsDeleting] = useState(false);
@ -37,7 +37,7 @@ function UrlListItem({ file, onDelete }: {
onClick={async () => {
setIsDeleting(true);
try {
await onDelete(file._id);
await onDelete(file.id);
} finally {
setIsDeleting(false);
}
@ -51,12 +51,11 @@ function UrlListItem({ file, onDelete }: {
);
}
function UrlList({ projectId, sourceId, onDelete }: {
projectId: string,
function UrlList({ sourceId, onDelete }: {
sourceId: string,
onDelete: (fileId: string) => Promise<void>,
}) {
const [files, setFiles] = useState<WithStringId<z.infer<typeof DataSourceDoc>>[]>([]);
const [files, setFiles] = useState<z.infer<typeof DataSourceDoc>[]>([]);
const [loading, setLoading] = useState(true);
const [page, setPage] = useState(1);
const [total, setTotal] = useState(0);
@ -69,7 +68,7 @@ function UrlList({ projectId, sourceId, onDelete }: {
async function fetchFiles() {
setLoading(true);
try {
const { files, total } = await listDocsInDataSource({ projectId, sourceId, page, limit: 10 });
const { files, total } = await listDocsInDataSource({ sourceId, page, limit: 10 });
if (!ignore) {
setFiles(files);
setTotal(total);
@ -86,7 +85,7 @@ function UrlList({ projectId, sourceId, onDelete }: {
return () => {
ignore = true;
};
}, [projectId, sourceId, page]);
}, [sourceId, page]);
return (
<div className="mt-6 space-y-4">
@ -102,7 +101,7 @@ function UrlList({ projectId, sourceId, onDelete }: {
) : (
<div className="space-y-2">
{files.map(file => (
<UrlListItem key={file._id} file={file} onDelete={onDelete} />
<UrlListItem key={file.id} file={file} onDelete={onDelete} />
))}
{Math.ceil(total / 10) > 1 && (
<div className="mt-4">
@ -120,12 +119,10 @@ function UrlList({ projectId, sourceId, onDelete }: {
}
export function ScrapeSource({
projectId,
dataSource,
handleReload,
}: {
projectId: string,
dataSource: WithStringId<z.infer<typeof DataSource>>,
dataSource: z.infer<typeof DataSource>,
handleReload: () => void;
}) {
const [fileListKey, setFileListKey] = useState(0);
@ -161,8 +158,7 @@ export function ScrapeSource({
const first100Urls = urlsArray.slice(0, 100);
await addDocsToDataSource({
projectId,
sourceId: dataSource._id,
sourceId: dataSource.id,
docData: first100Urls.map(url => ({
name: url,
data: {
@ -209,13 +205,10 @@ export function ScrapeSource({
<UrlList
key={fileListKey}
projectId={projectId}
sourceId={dataSource._id}
sourceId={dataSource.id}
onDelete={async (docId) => {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSource._id,
docIds: [docId],
await deleteDocFromDataSource({
docId: docId,
});
handleReload();
setFileListKey(prev => prev + 1);
@ -230,10 +223,8 @@ export function ScrapeSource({
description="Update the content by scraping the URLs again."
>
<Recrawl
projectId={projectId}
sourceId={dataSource._id}
handleRefresh={async () => {
await recrawlWebDataSource(projectId, dataSource._id);
await recrawlWebDataSource(dataSource.id);
handleReload();
setFileListKey(prev => prev + 1);
}}

View file

@ -1,17 +1,15 @@
'use client';
import { getDataSource } from "../../../../actions/data-source.actions";
import { DataSource } from "../../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { useEffect, useState } from "react";
import { z } from 'zod';
import { SourceStatus } from "./source-status";
export function SelfUpdatingSourceStatus({
projectId,
sourceId,
initialStatus,
compact = false,
}: {
projectId: string;
sourceId: string,
initialStatus: z.infer<typeof DataSource>['status'],
compact?: boolean;
@ -26,7 +24,7 @@ export function SelfUpdatingSourceStatus({
if (ignore) {
return;
}
const source = await getDataSource(projectId, sourceId);
const source = await getDataSource(sourceId);
setStatus(source.status);
timeoutId = setTimeout(check, 15 * 1000);
}
@ -41,7 +39,7 @@ export function SelfUpdatingSourceStatus({
clearTimeout(timeoutId);
}
};
}, [status, projectId, sourceId]);
}, [status, sourceId]);
return <SourceStatus status={status} compact={compact} projectId={projectId} />;
return <SourceStatus status={status} compact={compact} />;
}

View file

@ -1,15 +1,13 @@
import { DataSource } from "../../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { Spinner } from "@heroui/react";
import { z } from 'zod';
import { CheckCircleIcon, XCircleIcon, ClockIcon } from "lucide-react";
export function SourceStatus({
status,
projectId,
compact = false,
}: {
status: z.infer<typeof DataSource>['status'],
projectId: string,
compact?: boolean;
}) {
return (

View file

@ -6,15 +6,14 @@ import { ToggleSource } from "./toggle-source";
import { SelfUpdatingSourceStatus } from "./self-updating-source-status";
import { DataSourceIcon } from "../../../../lib/components/datasource-icon";
import { useEffect, useState } from "react";
import { WithStringId } from "../../../../lib/types/types";
import { DataSource } from "../../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { listDataSources } from "../../../../actions/data-source.actions";
import { Panel } from "@/components/common/panel-common";
import { PlusIcon } from "lucide-react";
export function SourcesList({ projectId }: { projectId: string }) {
const [sources, setSources] = useState<WithStringId<z.infer<typeof DataSource>>[]>([]);
const [sources, setSources] = useState<z.infer<typeof DataSource>[]>([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
@ -115,12 +114,12 @@ export function SourcesList({ projectId }: { projectId: string }) {
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
{sources.map((source) => (
<tr
key={source._id}
key={source.id}
className="hover:bg-gray-50 dark:hover:bg-gray-750 transition-colors"
>
<td className="px-6 py-4 text-left">
<Link
href={`/projects/${projectId}/sources/${source._id}`}
href={`/projects/${projectId}/sources/${source.id}`}
size="lg"
isBlock
className="text-sm text-gray-900 dark:text-gray-100 hover:text-blue-600 dark:hover:text-blue-400 truncate block"
@ -158,8 +157,7 @@ export function SourcesList({ projectId }: { projectId: string }) {
<td className="px-6 py-4 text-left">
<div className="text-sm">
<SelfUpdatingSourceStatus
sourceId={source._id}
projectId={projectId}
sourceId={source.id}
initialStatus={source.status}
compact={true}
/>
@ -168,8 +166,7 @@ export function SourcesList({ projectId }: { projectId: string }) {
)}
<td className="px-6 py-4 text-left">
<ToggleSource
projectId={projectId}
sourceId={source._id}
sourceId={source.id}
active={source.active}
compact={true}
className="bg-default-100"

View file

@ -1,21 +1,18 @@
"use client";
import { WithStringId } from "../../../../lib/types/types";
import { DataSource } from "../../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
import { useState, useEffect } from "react";
import { Textarea } from "@/components/ui/textarea";
import { FormStatusButton } from "../../../../lib/components/form-status-button";
import { Spinner } from "@heroui/react";
import { addDocsToDataSource, deleteDocsFromDataSource, listDocsInDataSource } from "../../../../actions/data-source.actions";
import { addDocsToDataSource, deleteDocFromDataSource, listDocsInDataSource } from "../../../../actions/data-source.actions";
import { Section } from "./section";
export function TextSource({
projectId,
dataSource,
handleReload,
}: {
projectId: string,
dataSource: WithStringId<z.infer<typeof DataSource>>,
dataSource: z.infer<typeof DataSource>,
handleReload: () => void;
}) {
const [content, setContent] = useState("");
@ -30,8 +27,7 @@ export function TextSource({
setIsLoading(true);
try {
const { files } = await listDocsInDataSource({
projectId,
sourceId: dataSource._id,
sourceId: dataSource.id,
limit: 1,
});
@ -41,7 +37,7 @@ export function TextSource({
const doc = files[0];
if (doc.data.type === 'text') {
setContent(doc.data.content);
setDocId(doc._id);
setDocId(doc.id);
}
}
} catch (error) {
@ -55,7 +51,7 @@ export function TextSource({
return () => {
ignore = true;
};
}, [projectId, dataSource._id]);
}, [dataSource.id]);
async function handleSubmit(formData: FormData) {
setIsSaving(true);
@ -64,17 +60,14 @@ export function TextSource({
// Delete existing doc if it exists
if (docId) {
await deleteDocsFromDataSource({
projectId,
sourceId: dataSource._id,
docIds: [docId],
await deleteDocFromDataSource({
docId: docId,
});
}
// Add new doc
await addDocsToDataSource({
projectId,
sourceId: dataSource._id,
sourceId: dataSource.id,
docData: [{
name: 'text',
data: {

View file

@ -4,13 +4,11 @@ import { Spinner } from "@heroui/react";
import { useState } from "react";
export function ToggleSource({
projectId,
sourceId,
active,
compact = false,
className
}: {
projectId: string;
sourceId: string;
active: boolean;
compact?: boolean;
@ -22,7 +20,7 @@ export function ToggleSource({
async function handleToggle() {
setLoading(true);
try {
await toggleDataSource(projectId, sourceId, !isActive);
await toggleDataSource(sourceId, !isActive);
setIsActive(!isActive);
} finally {
setLoading(false);

View file

@ -3,12 +3,8 @@ import { FormStatusButton } from "../../../../lib/components/form-status-button"
import { RefreshCwIcon } from "lucide-react";
export function Recrawl({
projectId,
sourceId,
handleRefresh,
}: {
projectId: string;
sourceId: string;
handleRefresh: () => void;
}) {
return <form action={handleRefresh}>

View file

@ -71,8 +71,7 @@ export function Form({
// pick first 100
const first100Urls = urlsArray.slice(0, 100);
await addDocsToDataSource({
projectId,
sourceId: source._id,
sourceId: source.id,
docData: first100Urls.map(url => ({
name: url,
data: {
@ -82,7 +81,7 @@ export function Form({
})),
});
if (onSuccess) {
onSuccess(source._id);
onSuccess(source.id);
}
}
@ -97,7 +96,7 @@ export function Form({
});
if (onSuccess) {
onSuccess(source._id);
onSuccess(source.id);
}
}
@ -114,8 +113,7 @@ export function Form({
const content = formData.get('content') as string;
await addDocsToDataSource({
projectId,
sourceId: source._id,
sourceId: source.id,
docData: [{
name: 'text',
data: {
@ -126,7 +124,7 @@ export function Form({
});
if (onSuccess) {
onSuccess(source._id);
onSuccess(source.id);
}
}

View file

@ -1,6 +1,6 @@
"use client";
import { MCPServer, WithStringId } from "../../../lib/types/types";
import { DataSource } from "../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { Project } from "../../../lib/types/project_types";
import { z } from "zod";
import { useCallback, useEffect, useState } from "react";
@ -32,7 +32,7 @@ export function App({
}) {
const [mode, setMode] = useState<'draft' | 'live'>('draft');
const [project, setProject] = useState<WithStringId<z.infer<typeof Project>> | null>(null);
const [dataSources, setDataSources] = useState<WithStringId<z.infer<typeof DataSource>>[] | null>(null);
const [dataSources, setDataSources] = useState<z.infer<typeof DataSource>[] | null>(null);
const [projectConfig, setProjectConfig] = useState<z.infer<typeof Project> | null>(null);
const [loading, setLoading] = useState(false);
const [eligibleModels, setEligibleModels] = useState<z.infer<typeof ModelsResponse> | "*">("*");

View file

@ -6,8 +6,7 @@ import { Button } from '@/components/ui/button';
import { Form } from '../../sources/new/form';
import { FilesSource } from '../../sources/components/files-source';
import { getDataSource } from '../../../../actions/data-source.actions';
import { WithStringId } from '../../../../lib/types/types';
import { DataSource } from '../../../../lib/types/datasource_types';
import { DataSource } from "@/src/entities/models/data-source";
import { z } from 'zod';
interface DataSourcesModalProps {
@ -30,11 +29,11 @@ export function DataSourcesModal({
useRagScraping
}: DataSourcesModalProps) {
const [currentView, setCurrentView] = useState<'form' | 'upload'>('form');
const [createdSource, setCreatedSource] = useState<WithStringId<z.infer<typeof DataSource>> | null>(null);
const [createdSource, setCreatedSource] = useState<z.infer<typeof DataSource> | null>(null);
const handleDataSourceCreated = async (sourceId: string) => {
// Get the created data source
const source = await getDataSource(projectId, sourceId);
const source = await getDataSource(sourceId);
// If it's a files data source, show the upload interface
if (source.data.type === 'files_local' || source.data.type === 'files_s3') {
@ -93,7 +92,6 @@ export function DataSourcesModal({
) : (
createdSource && (
<FilesSource
projectId={projectId}
dataSource={createdSource}
handleReload={handleFilesUploaded}
type={createdSource.data.type as 'files_local' | 'files_s3'}

View file

@ -2,7 +2,7 @@ import React, { forwardRef, useImperativeHandle } from "react";
import { z } from "zod";
import { WorkflowPrompt, WorkflowAgent, WorkflowTool, WorkflowPipeline, Workflow } from "../../../lib/types/workflow_types";
import { Project } from "../../../lib/types/project_types";
import { DataSource } from "../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { WithStringId } from "../../../lib/types/types";
import { Dropdown, DropdownItem, DropdownTrigger, DropdownMenu } from "@heroui/react";
import { useRef, useEffect, useState } from "react";
@ -48,7 +48,7 @@ interface EntityListProps {
tools: z.infer<typeof WorkflowTool>[];
prompts: z.infer<typeof WorkflowPrompt>[];
pipelines: z.infer<typeof WorkflowPipeline>[];
dataSources: WithStringId<z.infer<typeof DataSource>>[];
dataSources: z.infer<typeof DataSource>[];
workflow: z.infer<typeof Workflow>;
selectedEntity: {
type: "agent" | "tool" | "prompt" | "datasource" | "pipeline" | "visualise";
@ -1071,14 +1071,14 @@ export const EntityList = forwardRef<
className={clsx(
"flex items-center gap-2 px-3 py-2 rounded-md min-h-[24px] cursor-pointer",
{
"bg-indigo-50 dark:bg-indigo-950/30": selectedEntity?.type === "datasource" && selectedEntity.name === dataSource._id,
"hover:bg-zinc-50 dark:hover:bg-zinc-800": !(selectedEntity?.type === "datasource" && selectedEntity.name === dataSource._id)
"bg-indigo-50 dark:bg-indigo-950/30": selectedEntity?.type === "datasource" && selectedEntity.name === dataSource.id,
"hover:bg-zinc-50 dark:hover:bg-zinc-800": !(selectedEntity?.type === "datasource" && selectedEntity.name === dataSource.id)
}
)}
onClick={() => handleSelectDataSource(dataSource._id)}
onClick={() => handleSelectDataSource(dataSource.id)}
>
<div
ref={selectedEntity?.type === "datasource" && selectedEntity.name === dataSource._id ? selectedRef : undefined}
ref={selectedEntity?.type === "datasource" && selectedEntity.name === dataSource.id ? selectedRef : undefined}
className="flex-1 flex items-center gap-2 text-sm text-left"
>
<div className="shrink-0 flex items-center justify-center w-3 h-3">
@ -1097,7 +1097,7 @@ export const EntityList = forwardRef<
name={dataSource.name}
onDelete={async () => {
if (window.confirm(`Are you sure you want to delete the data source "${dataSource.name}"?`)) {
await deleteDataSource(projectId, dataSource._id);
await deleteDataSource(dataSource.id);
onDataSourcesUpdated?.();
}
}}

View file

@ -2,7 +2,7 @@
import React, { useReducer, Reducer, useState, useCallback, useEffect, useRef, createContext, useContext } from "react";
import { MCPServer, Message, WithStringId } from "../../../lib/types/types";
import { Workflow, WorkflowTool, WorkflowPrompt, WorkflowAgent, WorkflowPipeline } from "../../../lib/types/workflow_types";
import { DataSource } from "../../../lib/types/datasource_types";
import { DataSource } from "@/src/entities/models/data-source";
import { Project } from "../../../lib/types/project_types";
import { produce, applyPatches, enablePatches, produceWithPatches, Patch } from 'immer';
import { AgentConfig } from "../entities/agent_config";
@ -821,7 +821,7 @@ export function WorkflowEditor({
chatWidgetHost,
}: {
projectId: string;
dataSources: WithStringId<z.infer<typeof DataSource>>[];
dataSources: z.infer<typeof DataSource>[];
workflow: z.infer<typeof Workflow>;
useRag: boolean;
useRagUploads: boolean;

View file

@ -1,66 +1,50 @@
import '../lib/loadenv';
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import FirecrawlApp from '@mendable/firecrawl-js';
import { z } from 'zod';
import { dataSourceDocsCollection, dataSourcesCollection, projectsCollection, usersCollection } from '../lib/mongodb';
import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types";
import { ObjectId, WithId } from 'mongodb';
import { EmbeddingRecord } from "../lib/types/datasource_types";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { embedMany, generateText } from 'ai';
import { embeddingModel } from '../lib/embedding';
import { qdrantClient } from '../lib/qdrant';
import { PrefixLogger } from "../lib/utils";
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GetObjectCommand } from "@aws-sdk/client-s3";
import { uploadsS3Client } from '../lib/uploads_s3_client';
import fs from 'fs/promises';
import crypto from 'crypto';
import path from 'path';
import { createOpenAI } from '@ai-sdk/openai';
import { USE_BILLING, USE_GEMINI_FILE_PARSING } from '../lib/feature_flags';
import { authorize, getCustomerIdForProject, logUsage, UsageTracker } from '../lib/billing';
import { BillingError } from '@/src/entities/errors/common';
import { DataSource } from '@/src/entities/models/data-source';
import { IDataSourcesRepository } from '@/src/application/repositories/data-sources.repository.interface';
import { IDataSourceDocsRepository } from '@/src/application/repositories/data-source-docs.repository.interface';
import { IUploadsStorageService } from '@/src/application/services/uploads-storage.service.interface';
import { container } from '@/di/container';
const FILE_PARSING_PROVIDER_API_KEY = process.env.FILE_PARSING_PROVIDER_API_KEY || process.env.OPENAI_API_KEY || '';
const FILE_PARSING_PROVIDER_BASE_URL = process.env.FILE_PARSING_PROVIDER_BASE_URL || undefined;
const FILE_PARSING_MODEL = process.env.FILE_PARSING_MODEL || 'gpt-4o';
const dataSourcesRepository = container.resolve<IDataSourcesRepository>('dataSourcesRepository');
const dataSourceDocsRepository = container.resolve<IDataSourceDocsRepository>('dataSourceDocsRepository');
const localUploadsStorageService = container.resolve<IUploadsStorageService>('localUploadsStorageService');
const s3UploadsStorageService = container.resolve<IUploadsStorageService>('s3UploadsStorageService');
const firecrawl = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY || "test" });
const openai = createOpenAI({
apiKey: FILE_PARSING_PROVIDER_API_KEY,
baseURL: FILE_PARSING_PROVIDER_BASE_URL,
});
const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads';
const splitter = new RecursiveCharacterTextSplitter({
separators: ['\n\n', '\n', '. ', '.', ''],
chunkSize: 1024,
chunkOverlap: 20,
});
const second = 1000;
const minute = 60 * second;
const hour = 60 * minute;
const day = 24 * hour;
// Configure Google Gemini API
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
async function getLocalFileContent(path: string): Promise<Buffer> {
return await fs.readFile(path);
}
async function getS3FileContent(s3Key: string): Promise<Buffer> {
const command = new GetObjectCommand({
Bucket: process.env.RAG_UPLOADS_S3_BUCKET,
Key: s3Key,
});
const response = await uploadsS3Client.send(command);
const chunks: Uint8Array[] = [];
for await (const chunk of response.Body as any) {
chunks.push(chunk);
}
return Buffer.concat(chunks);
}
async function retryable<T>(fn: () => Promise<T>, maxAttempts: number = 3): Promise<T> {
let attempts = 0;
while (true) {
@ -75,19 +59,23 @@ async function retryable<T>(fn: () => Promise<T>, maxAttempts: number = 3): Prom
}
}
async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>> & { data: { type: "file_local" | "file_s3" } }) {
async function runProcessFilePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer<typeof DataSource>, doc: z.infer<typeof DataSourceDoc>) {
if (doc.data.type !== 'file_local' && doc.data.type !== 'file_s3') {
throw new Error("Invalid data source type");
}
const logger = _logger
.child(doc._id.toString())
.child(doc.id)
.child(doc.name);
// Get file content
let fileData: Buffer;
if (doc.data.type === 'file_local') {
logger.log("Fetching file from local");
fileData = await getLocalFileContent(path.join(UPLOADS_DIR, doc._id.toString()));
fileData = await localUploadsStorageService.getFileContents(doc.id);
} else {
logger.log("Fetching file from S3");
fileData = await getS3FileContent(doc.data.s3Key);
fileData = await s3UploadsStorageService.getFileContents(doc.id);
}
let markdown = "";
@ -167,8 +155,8 @@ async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTrac
vector: embedding,
payload: {
projectId: job.projectId,
sourceId: job._id.toString(),
docId: doc._id.toString(),
sourceId: job.id,
docId: doc.id,
content: splits[i].pageContent,
title: doc.name,
name: doc.name,
@ -180,21 +168,136 @@ async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTrac
// store content in doc record
logger.log("Storing content in doc record");
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
content: markdown,
status: "ready",
lastUpdatedAt: new Date().toISOString(),
}
await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, {
content: markdown,
status: "ready",
});
}
async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>): Promise<void> {
async function runScrapePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer<typeof DataSource>, doc: z.infer<typeof DataSourceDoc>) {
const logger = _logger
.child(doc._id.toString())
.child(doc.id)
.child(doc.name);
// scrape the url using firecrawl
logger.log("Scraping using Firecrawl");
const scrapeResult = await retryable(async () => {
if (doc.data.type !== 'url') {
throw new Error("Invalid data source type");
}
const scrapeResult = await firecrawl.scrapeUrl(doc.data.url, {
formats: ['markdown'],
onlyMainContent: true,
excludeTags: ['script', 'style', 'noscript', 'img',]
});
if (!scrapeResult.success) {
throw new Error("Unable to scrape URL: " + doc.data.url);
}
return scrapeResult;
}, 3); // Retry up to 3 times
usageTracker.track({
type: "FIRECRAWL_SCRAPE_USAGE",
context: "rag.urls.firecrawl_scrape",
});
// split into chunks
logger.log("Splitting into chunks");
const splits = await splitter.createDocuments([scrapeResult.markdown || '']);
// generate embeddings
logger.log("Generating embeddings");
const { embeddings, usage } = await embedMany({
model: embeddingModel,
values: splits.map((split) => split.pageContent)
});
usageTracker.track({
type: "EMBEDDING_MODEL_USAGE",
modelName: embeddingModel.modelId,
tokens: usage.tokens,
context: "rag.urls.embedding_usage",
});
// store embeddings in qdrant
logger.log("Storing embeddings in Qdrant");
const points: z.infer<typeof EmbeddingRecord>[] = embeddings.map((embedding, i) => ({
id: crypto.randomUUID(),
vector: embedding,
payload: {
projectId: job.projectId,
sourceId: job.id,
docId: doc.id,
content: splits[i].pageContent,
title: scrapeResult.metadata?.title || '',
name: doc.name,
},
}));
await qdrantClient.upsert("embeddings", {
points,
});
// store scraped markdown in doc record
logger.log("Storing scraped markdown in doc record");
await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, {
content: scrapeResult.markdown,
status: "ready",
});
}
async function runProcessTextPipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer<typeof DataSource>, doc: z.infer<typeof DataSourceDoc>) {
const logger = _logger
.child(doc.id)
.child(doc.name);
if (doc.data.type !== 'text') {
throw new Error("Invalid data source type");
}
// split into chunks
logger.log("Splitting into chunks");
const splits = await splitter.createDocuments([doc.data.content]);
// generate embeddings
logger.log("Generating embeddings");
const { embeddings, usage } = await embedMany({
model: embeddingModel,
values: splits.map((split) => split.pageContent)
});
usageTracker.track({
type: "EMBEDDING_MODEL_USAGE",
modelName: embeddingModel.modelId,
tokens: usage.tokens,
context: "rag.text.embedding_usage",
});
// store embeddings in qdrant
logger.log("Storing embeddings in Qdrant");
const points: z.infer<typeof EmbeddingRecord>[] = embeddings.map((embedding, i) => ({
id: crypto.randomUUID(),
vector: embedding,
payload: {
projectId: job.projectId,
sourceId: job.id,
docId: doc.id,
content: splits[i].pageContent,
title: doc.name,
name: doc.name,
},
}));
await qdrantClient.upsert("embeddings", {
points,
});
// store content in doc record
logger.log("Storing content in doc record");
await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, {
content: doc.data.content,
status: "ready",
});
}
async function runDeletionPipeline(_logger: PrefixLogger, job: z.infer<typeof DataSource>, doc: z.infer<typeof DataSourceDoc>): Promise<void> {
const logger = _logger
.child(doc.id)
.child(doc.name);
// Delete embeddings from qdrant
@ -211,13 +314,13 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
{
key: "sourceId",
match: {
value: job._id.toString(),
value: job.id,
}
},
{
key: "docId",
match: {
value: doc._id.toString(),
value: doc.id,
}
}
],
@ -226,85 +329,33 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
// Delete docs from db
logger.log("Deleting doc from db");
await dataSourceDocsCollection.deleteOne({ _id: doc._id });
await dataSourceDocsRepository.delete(doc.id);
}
// fetch next job from mongodb
(async () => {
while (true) {
const now = Date.now();
let job: WithId<z.infer<typeof DataSource>> | null = null;
let job: z.infer<typeof DataSource> | null = null;
// first try to find a job that needs deleting
job = await dataSourcesCollection.findOneAndUpdate({
status: "deleted",
"data.type": { $in: ["files_local", "files_s3"] },
$or: [
{ attempts: { $exists: false } },
{ attempts: { $lte: 3 } }
]
}, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } });
job = await dataSourcesRepository.pollDeleteJob();
if (job === null) {
job = await dataSourcesCollection.findOneAndUpdate(
{
$and: [
{ 'data.type': { $in: ["files_local", "files_s3"] } },
{
$or: [
// if the job has never been attempted
{
status: "pending",
attempts: 0,
},
// if the job was attempted but wasn't completed in the last hour
{
status: "pending",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
// if the job errored out but hasn't been retried 3 times yet
{
status: "error",
attempts: { $lt: 3 },
},
// if the job errored out but hasn't been retried in the last 5 minutes
{
status: "error",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
]
}
]
},
{
$set: {
status: "pending",
lastAttemptAt: new Date().toISOString(),
},
$inc: {
attempts: 1
},
},
{ returnDocument: "after", sort: { createdAt: 1 } }
);
job = await dataSourcesRepository.pollPendingJob();
}
if (job === null) {
// if no doc found, sleep for a bit and start again
await new Promise(resolve => setTimeout(resolve, 5 * second));
await new Promise(resolve => setTimeout(resolve, 5 * 1000));
continue;
}
const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`);
logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`);
const logger = new PrefixLogger(`${job.id}-${job.version}`);
logger.log(`Starting job ${job.id}. Type: ${job.data.type}. Status: ${job.status}`);
let errors = false;
try {
if (job.data.type !== 'files_local' && job.data.type !== 'files_s3') {
throw new Error("Invalid data source type");
}
if (job.status === "deleted") {
// delete all embeddings for this source
logger.log("Deleting embeddings from Qdrant");
@ -312,32 +363,33 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
filter: {
must: [
{ key: "projectId", match: { value: job.projectId } },
{ key: "sourceId", match: { value: job._id.toString() } },
{ key: "sourceId", match: { value: job.id } },
],
},
});
// delete all docs for this source
logger.log("Deleting docs from db");
await dataSourceDocsCollection.deleteMany({
sourceId: job._id.toString(),
});
await dataSourceDocsRepository.deleteBySourceId(job.id);
// delete the source record from db
logger.log("Deleting source record from db");
await dataSourcesCollection.deleteOne({
_id: job._id,
});
await dataSourcesRepository.delete(job.id);
logger.log("Job deleted");
continue;
}
// fetch docs that need updating
const pendingDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: { $in: ["pending", "error"] },
}).toArray();
const pendingDocs = [];
let cursor = undefined;
do {
const result = await dataSourceDocsRepository.list(job.id, {
status: ["pending", "error"],
}, cursor);
pendingDocs.push(...result.items);
cursor = result.nextCursor;
} while (cursor);
logger.log(`Found ${pendingDocs.length} docs to process`);
@ -365,21 +417,21 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
}
}
const ldoc = doc as WithId<z.infer<typeof DataSourceDoc>> & { data: { type: "file_local" | "file_s3" } };
const usageTracker = new UsageTracker();
try {
await runProcessPipeline(logger, usageTracker, job, ldoc);
if (doc.data.type === "file_local" || doc.data.type === "file_s3") {
await runProcessFilePipeline(logger, usageTracker, job, doc);
} else if (doc.data.type === "text") {
await runProcessTextPipeline(logger, usageTracker, job, doc);
} else if (doc.data.type === "url") {
await runScrapePipeline(logger, usageTracker, job, doc);
}
} catch (e: any) {
errors = true;
logger.log("Error processing doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, {
status: "error",
error: e.message,
});
} finally {
// log usage in billing
@ -392,10 +444,15 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
}
// fetch docs that need to be deleted
const deletedDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: "deleted",
}).toArray();
const deletedDocs = [];
cursor = undefined;
do {
const result = await dataSourceDocsRepository.list(job.id, {
status: ["deleted"],
}, cursor);
deletedDocs.push(...result.items);
cursor = result.nextCursor;
} while (cursor);
logger.log(`Found ${deletedDocs.length} docs to delete`);
@ -405,55 +462,32 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<ty
} catch (e: any) {
errors = true;
logger.log("Error deleting doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, {
status: "error",
error: e.message,
});
}
}
} catch (e) {
if (e instanceof BillingError) {
logger.log("Billing error:", e.message);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
billingError: e.message,
lastUpdatedAt: new Date().toISOString(),
}
await dataSourcesRepository.release(job.id, job.version, {
status: "error",
billingError: e.message,
});
}
logger.log("Error processing job; will retry:", e);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
lastUpdatedAt: new Date().toISOString(),
}
await dataSourcesRepository.release(job.id, job.version, {
status: "error",
});
continue;
}
// mark job as complete
logger.log("Marking job as completed...");
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: errors ? "error" : "ready",
...(errors ? { error: "There were some errors processing this job" } : {}),
lastUpdatedAt: new Date().toISOString(),
}
await dataSourcesRepository.release(job.id, job.version, {
status: errors ? "error" : "ready",
...(errors ? { error: "There were some errors processing this job" } : {}),
});
}
})();

View file

@ -1,345 +0,0 @@
import '../lib/loadenv';
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { z } from 'zod';
import { dataSourceDocsCollection, dataSourcesCollection } from '../lib/mongodb';
import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types";
import { WithId } from 'mongodb';
import { embedMany } from 'ai';
import { embeddingModel } from '../lib/embedding';
import { qdrantClient } from '../lib/qdrant';
import { PrefixLogger } from "../lib/utils";
import crypto from 'crypto';
import { USE_BILLING } from '../lib/feature_flags';
import { authorize, getCustomerIdForProject, logUsage, UsageTracker } from '../lib/billing';
import { BillingError } from '@/src/entities/errors/common';
const splitter = new RecursiveCharacterTextSplitter({
separators: ['\n\n', '\n', '. ', '.', ''],
chunkSize: 1024,
chunkOverlap: 20,
});
const second = 1000;
const minute = 60 * second;
const hour = 60 * minute;
async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>) {
const logger = _logger
.child(doc._id.toString())
.child(doc.name);
if (doc.data.type !== 'text') {
throw new Error("Invalid data source type");
}
// split into chunks
logger.log("Splitting into chunks");
const splits = await splitter.createDocuments([doc.data.content]);
// generate embeddings
logger.log("Generating embeddings");
const { embeddings, usage } = await embedMany({
model: embeddingModel,
values: splits.map((split) => split.pageContent)
});
usageTracker.track({
type: "EMBEDDING_MODEL_USAGE",
modelName: embeddingModel.modelId,
tokens: usage.tokens,
context: "rag.text.embedding_usage",
});
// store embeddings in qdrant
logger.log("Storing embeddings in Qdrant");
const points: z.infer<typeof EmbeddingRecord>[] = embeddings.map((embedding, i) => ({
id: crypto.randomUUID(),
vector: embedding,
payload: {
projectId: job.projectId,
sourceId: job._id.toString(),
docId: doc._id.toString(),
content: splits[i].pageContent,
title: doc.name,
name: doc.name,
},
}));
await qdrantClient.upsert("embeddings", {
points,
});
// store content in doc record
logger.log("Storing content in doc record");
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
content: doc.data.content,
status: "ready",
lastUpdatedAt: new Date().toISOString(),
}
});
}
async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>): Promise<void> {
const logger = _logger
.child(doc._id.toString())
.child(doc.name);
// Delete embeddings from qdrant
logger.log("Deleting embeddings from Qdrant");
await qdrantClient.delete("embeddings", {
filter: {
must: [
{
key: "projectId",
match: {
value: job.projectId,
}
},
{
key: "sourceId",
match: {
value: job._id.toString(),
}
},
{
key: "docId",
match: {
value: doc._id.toString(),
}
}
],
},
});
// Delete docs from db
logger.log("Deleting doc from db");
await dataSourceDocsCollection.deleteOne({ _id: doc._id });
}
// fetch next job from mongodb
(async () => {
while (true) {
const now = Date.now();
let job: WithId<z.infer<typeof DataSource>> | null = null;
// first try to find a job that needs deleting
job = await dataSourcesCollection.findOneAndUpdate({
status: "deleted",
"data.type": "text",
$or: [
{ attempts: { $exists: false } },
{ attempts: { $lte: 3 } }
]
}, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } });
if (job === null) {
job = await dataSourcesCollection.findOneAndUpdate(
{
$and: [
{ 'data.type': { $eq: "text" } },
{
$or: [
// if the job has never been attempted
{
status: "pending",
attempts: 0,
},
// if the job was attempted but wasn't completed in the last hour
{
status: "pending",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
// if the job errored out but hasn't been retried 3 times yet
{
status: "error",
attempts: { $lt: 3 },
},
// if the job errored out but hasn't been retried in the last 5 minutes
{
status: "error",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
]
}
]
},
{
$set: {
status: "pending",
lastAttemptAt: new Date().toISOString(),
},
$inc: {
attempts: 1
},
},
{ returnDocument: "after", sort: { createdAt: 1 } }
);
}
if (job === null) {
// if no doc found, sleep for a bit and start again
await new Promise(resolve => setTimeout(resolve, 5 * second));
continue;
}
const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`);
logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`);
let errors = false;
try {
if (job.data.type !== 'text') {
throw new Error("Invalid data source type");
}
if (job.status === "deleted") {
// delete all embeddings for this source
logger.log("Deleting embeddings from Qdrant");
await qdrantClient.delete("embeddings", {
filter: {
must: [
{ key: "projectId", match: { value: job.projectId } },
{ key: "sourceId", match: { value: job._id.toString() } },
],
},
});
// delete all docs for this source
logger.log("Deleting docs from db");
await dataSourceDocsCollection.deleteMany({
sourceId: job._id.toString(),
});
// delete the source record from db
logger.log("Deleting source record from db");
await dataSourcesCollection.deleteOne({
_id: job._id,
});
logger.log("Job deleted");
continue;
}
// fetch docs that need updating
const pendingDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: { $in: ["pending", "error"] },
}).toArray();
logger.log(`Found ${pendingDocs.length} docs to process`);
// fetch project, user and billing data
let billingCustomerId: string | null = null;
if (USE_BILLING) {
try {
billingCustomerId = await getCustomerIdForProject(job.projectId);
} catch (e) {
logger.log("Unable to fetch billing customer id:", e);
throw new Error("Unable to fetch billing customer id");
}
}
// for each doc
for (const doc of pendingDocs) {
// authorize with billing
if (USE_BILLING && billingCustomerId) {
const authResponse = await authorize(billingCustomerId, {
type: "use_credits",
});
if ('error' in authResponse) {
throw new BillingError(authResponse.error || "Unknown billing error")
}
}
const usageTracker = new UsageTracker();
try {
await runProcessPipeline(logger, usageTracker, job, doc);
} catch (e: any) {
errors = true;
logger.log("Error processing doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
});
} finally {
// log usage in billing
if (USE_BILLING && billingCustomerId) {
await logUsage(billingCustomerId, {
items: usageTracker.flush(),
});
}
}
}
// fetch docs that need to be deleted
const deletedDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: "deleted",
}).toArray();
logger.log(`Found ${deletedDocs.length} docs to delete`);
for (const doc of deletedDocs) {
try {
await runDeletionPipeline(logger, job, doc);
} catch (e: any) {
errors = true;
logger.log("Error deleting doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
});
}
}
} catch (e) {
if (e instanceof BillingError) {
logger.log("Billing error:", e.message);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
billingError: e.message,
lastUpdatedAt: new Date().toISOString(),
}
});
}
logger.log("Error processing job; will retry:", e);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
lastUpdatedAt: new Date().toISOString(),
}
});
continue;
}
// mark job as complete
logger.log("Marking job as completed...");
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: errors ? "error" : "ready",
...(errors ? { error: "There were some errors processing this job" } : {}),
}
});
}
})();

View file

@ -1,381 +0,0 @@
import '../lib/loadenv';
import FirecrawlApp from '@mendable/firecrawl-js';
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { z } from 'zod';
import { dataSourceDocsCollection, dataSourcesCollection } from '../lib/mongodb';
import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types";
import { WithId } from 'mongodb';
import { embedMany } from 'ai';
import { embeddingModel } from '../lib/embedding';
import { qdrantClient } from '../lib/qdrant';
import { PrefixLogger } from "../lib/utils";
import crypto from 'crypto';
import { USE_BILLING } from '../lib/feature_flags';
import { authorize, getCustomerIdForProject, logUsage, UsageTracker } from '../lib/billing';
import { BillingError } from '@/src/entities/errors/common';
const firecrawl = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
const splitter = new RecursiveCharacterTextSplitter({
separators: ['\n\n', '\n', '. ', '.', ''],
chunkSize: 1024,
chunkOverlap: 20,
});
const second = 1000;
const minute = 60 * second;
const hour = 60 * minute;
const day = 24 * hour;
async function retryable<T>(fn: () => Promise<T>, maxAttempts: number = 3): Promise<T> {
let attempts = 0;
while (true) {
try {
return await fn();
} catch (e) {
attempts++;
if (attempts >= maxAttempts) {
throw e;
}
}
}
}
async function runScrapePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>) {
const logger = _logger
.child(doc._id.toString())
.child(doc.name);
// scrape the url using firecrawl
logger.log("Scraping using Firecrawl");
const scrapeResult = await retryable(async () => {
if (doc.data.type !== 'url') {
throw new Error("Invalid data source type");
}
const scrapeResult = await firecrawl.scrapeUrl(doc.data.url, {
formats: ['markdown'],
onlyMainContent: true,
excludeTags: ['script', 'style', 'noscript', 'img',]
});
if (!scrapeResult.success) {
throw new Error("Unable to scrape URL: " + doc.data.url);
}
return scrapeResult;
}, 3); // Retry up to 3 times
usageTracker.track({
type: "FIRECRAWL_SCRAPE_USAGE",
context: "rag.urls.firecrawl_scrape",
});
// split into chunks
logger.log("Splitting into chunks");
const splits = await splitter.createDocuments([scrapeResult.markdown || '']);
// generate embeddings
logger.log("Generating embeddings");
const { embeddings, usage } = await embedMany({
model: embeddingModel,
values: splits.map((split) => split.pageContent)
});
usageTracker.track({
type: "EMBEDDING_MODEL_USAGE",
modelName: embeddingModel.modelId,
tokens: usage.tokens,
context: "rag.urls.embedding_usage",
});
// store embeddings in qdrant
logger.log("Storing embeddings in Qdrant");
const points: z.infer<typeof EmbeddingRecord>[] = embeddings.map((embedding, i) => ({
id: crypto.randomUUID(),
vector: embedding,
payload: {
projectId: job.projectId,
sourceId: job._id.toString(),
docId: doc._id.toString(),
content: splits[i].pageContent,
title: scrapeResult.metadata?.title || '',
name: doc.name,
},
}));
await qdrantClient.upsert("embeddings", {
points,
});
// store scraped markdown in doc record
logger.log("Storing scraped markdown in doc record");
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
content: scrapeResult.markdown,
status: "ready",
lastUpdatedAt: new Date().toISOString(),
}
});
}
async function runDeletionPipeline(_logger: PrefixLogger, job: WithId<z.infer<typeof DataSource>>, doc: WithId<z.infer<typeof DataSourceDoc>>): Promise<void> {
const logger = _logger
.child(doc._id.toString())
.child(doc.name);
// Delete embeddings from qdrant
logger.log("Deleting embeddings from Qdrant");
await qdrantClient.delete("embeddings", {
filter: {
must: [
{
key: "projectId",
match: {
value: job.projectId,
}
},
{
key: "sourceId",
match: {
value: job._id.toString(),
}
},
{
key: "docId",
match: {
value: doc._id.toString(),
}
}
],
},
});
// Delete docs from db
logger.log("Deleting doc from db");
await dataSourceDocsCollection.deleteOne({ _id: doc._id });
}
// fetch next job from mongodb
(async () => {
while (true) {
const now = Date.now();
let job: WithId<z.infer<typeof DataSource>> | null = null;
// first try to find a job that needs deleting
job = await dataSourcesCollection.findOneAndUpdate({
status: "deleted",
"data.type": "urls",
$or: [
{ attempts: { $exists: false } },
{ attempts: { $lte: 3 } }
]
}, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } });
if (job === null) {
job = await dataSourcesCollection.findOneAndUpdate(
{
$and: [
{ 'data.type': { $eq: "urls" } },
{
$or: [
// if the job has never been attempted
{
status: "pending",
attempts: 0,
},
// if the job was attempted but wasn't completed in the last hour
{
status: "pending",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
// if the job errored out but hasn't been retried 3 times yet
{
status: "error",
attempts: { $lt: 3 },
},
// if the job errored out but hasn't been retried in the last 5 minutes
{
status: "error",
lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() },
},
]
}
]
},
{
$set: {
status: "pending",
lastAttemptAt: new Date().toISOString(),
},
$inc: {
attempts: 1
},
},
{ returnDocument: "after", sort: { createdAt: 1 } }
);
}
if (job === null) {
// if no doc found, sleep for a bit and start again
await new Promise(resolve => setTimeout(resolve, 5 * second));
continue;
}
const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`);
logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`);
let errors = false;
try {
if (job.data.type !== 'urls') {
throw new Error("Invalid data source type");
}
if (job.status === "deleted") {
// delete all embeddings for this source
logger.log("Deleting embeddings from Qdrant");
await qdrantClient.delete("embeddings", {
filter: {
must: [
{ key: "projectId", match: { value: job.projectId } },
{ key: "sourceId", match: { value: job._id.toString() } },
],
},
});
// delete all docs for this source
logger.log("Deleting docs from db");
await dataSourceDocsCollection.deleteMany({
sourceId: job._id.toString(),
});
// delete the source record from db
logger.log("Deleting source record from db");
await dataSourcesCollection.deleteOne({
_id: job._id,
});
logger.log("Job deleted");
continue;
}
// fetch docs that need updating
const pendingDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: { $in: ["pending", "error"] },
}).toArray();
logger.log(`Found ${pendingDocs.length} docs to process`);
// fetch project, user and billing data
let billingCustomerId: string | null = null;
if (USE_BILLING) {
try {
billingCustomerId = await getCustomerIdForProject(job.projectId);
} catch (e) {
logger.log("Unable to fetch billing customer id:", e);
throw new Error("Unable to fetch billing customer id");
}
}
// for each doc
for (const doc of pendingDocs) {
// authorize with billing
if (USE_BILLING && billingCustomerId) {
const authResponse = await authorize(billingCustomerId, {
type: "use_credits",
});
if ('error' in authResponse) {
throw new BillingError(authResponse.error || "Unknown billing error")
}
}
const usageTracker = new UsageTracker();
try {
await runScrapePipeline(logger, usageTracker, job, doc);
} catch (e: any) {
errors = true;
logger.log("Error processing doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
});
} finally {
// log usage in billing
if (USE_BILLING && billingCustomerId) {
await logUsage(billingCustomerId, {
items: usageTracker.flush(),
});
}
}
}
// fetch docs that need to be deleted
const deletedDocs = await dataSourceDocsCollection.find({
sourceId: job._id.toString(),
status: "deleted",
}).toArray();
logger.log(`Found ${deletedDocs.length} docs to delete`);
for (const doc of deletedDocs) {
try {
await runDeletionPipeline(logger, job, doc);
} catch (e: any) {
errors = true;
logger.log("Error deleting doc:", e);
await dataSourceDocsCollection.updateOne({
_id: doc._id,
version: doc.version,
}, {
$set: {
status: "error",
error: e.message,
}
});
}
}
} catch (e) {
if (e instanceof BillingError) {
logger.log("Billing error:", e.message);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
billingError: e.message,
lastUpdatedAt: new Date().toISOString(),
}
});
}
logger.log("Error processing job; will retry:", e);
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: "error",
lastUpdatedAt: new Date().toISOString(),
}
});
continue;
}
// mark job as complete
logger.log("Marking job as completed...");
await dataSourcesCollection.updateOne({
_id: job._id,
version: job.version,
}, {
$set: {
status: errors ? "error" : "ready",
...(errors ? { error: "There were some errors processing this job" } : {}),
}
});
}
})();

View file

@ -1,7 +1,13 @@
import { asClass, createContainer, InjectionMode } from "awilix";
// Services
import { RedisPubSubService } from "@/src/infrastructure/services/redis.pub-sub.service";
import { S3UploadsStorageService } from "@/src/infrastructure/services/s3.uploads-storage.service";
import { LocalUploadsStorageService } from "@/src/infrastructure/services/local.uploads-storage.service";
import { RunConversationTurnUseCase } from "@/src/application/use-cases/conversations/run-conversation-turn.use-case";
import { MongoDBConversationsRepository } from "@/src/infrastructure/repositories/mongodb.conversations.repository";
import { RunCachedTurnController } from "@/src/interface-adapters/controllers/conversations/run-cached-turn.controller";
import { asClass, createContainer, InjectionMode } from "awilix";
import { CreatePlaygroundConversationController } from "@/src/interface-adapters/controllers/conversations/create-playground-conversation.controller";
import { CreateConversationUseCase } from "@/src/application/use-cases/conversations/create-conversation.use-case";
import { RedisCacheService } from "@/src/infrastructure/services/redis.cache.service";
@ -28,7 +34,6 @@ import { ListComposioTriggerDeploymentsController } from "@/src/interface-adapte
import { ListComposioTriggerTypesController } from "@/src/interface-adapters/controllers/composio-trigger-deployments/list-composio-trigger-types.controller";
import { DeleteComposioConnectedAccountController } from "@/src/interface-adapters/controllers/composio/delete-composio-connected-account.controller";
import { HandleComposioWebhookRequestController } from "@/src/interface-adapters/controllers/composio/webhook/handle-composio-webhook-request.controller";
import { RedisPubSubService } from "@/src/infrastructure/services/redis.pub-sub.service";
import { JobsWorker } from "@/src/application/workers/jobs.worker";
import { JobRulesWorker } from "@/src/application/workers/job-rules.worker";
import { ListJobsUseCase } from "@/src/application/use-cases/jobs/list-jobs.use-case";
@ -72,6 +77,34 @@ import { CreateApiKeyController } from "@/src/interface-adapters/controllers/api
import { ListApiKeysController } from "@/src/interface-adapters/controllers/api-keys/list-api-keys.controller";
import { DeleteApiKeyController } from "@/src/interface-adapters/controllers/api-keys/delete-api-key.controller";
// Data sources
import { MongoDBDataSourcesRepository } from "@/src/infrastructure/repositories/mongodb.data-sources.repository";
import { MongoDBDataSourceDocsRepository } from "@/src/infrastructure/repositories/mongodb.data-source-docs.repository";
import { CreateDataSourceUseCase } from "@/src/application/use-cases/data-sources/create-data-source.use-case";
import { FetchDataSourceUseCase } from "@/src/application/use-cases/data-sources/fetch-data-source.use-case";
import { ListDataSourcesUseCase } from "@/src/application/use-cases/data-sources/list-data-sources.use-case";
import { UpdateDataSourceUseCase } from "@/src/application/use-cases/data-sources/update-data-source.use-case";
import { DeleteDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-data-source.use-case";
import { ToggleDataSourceUseCase } from "@/src/application/use-cases/data-sources/toggle-data-source.use-case";
import { CreateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/create-data-source.controller";
import { FetchDataSourceController } from "@/src/interface-adapters/controllers/data-sources/fetch-data-source.controller";
import { ListDataSourcesController } from "@/src/interface-adapters/controllers/data-sources/list-data-sources.controller";
import { UpdateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/update-data-source.controller";
import { DeleteDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-data-source.controller";
import { ToggleDataSourceController } from "@/src/interface-adapters/controllers/data-sources/toggle-data-source.controller";
import { AddDocsToDataSourceUseCase } from "@/src/application/use-cases/data-sources/add-docs-to-data-source.use-case";
import { ListDocsInDataSourceUseCase } from "@/src/application/use-cases/data-sources/list-docs-in-data-source.use-case";
import { DeleteDocFromDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case";
import { RecrawlWebDataSourceUseCase } from "@/src/application/use-cases/data-sources/recrawl-web-data-source.use-case";
import { GetUploadUrlsForFilesUseCase } from "@/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case";
import { GetDownloadUrlForFileUseCase } from "@/src/application/use-cases/data-sources/get-download-url-for-file.use-case";
import { AddDocsToDataSourceController } from "@/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller";
import { ListDocsInDataSourceController } from "@/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller";
import { DeleteDocFromDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller";
import { RecrawlWebDataSourceController } from "@/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller";
import { GetUploadUrlsForFilesController } from "@/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller";
import { GetDownloadUrlForFileController } from "@/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller";
export const container = createContainer({
injectionMode: InjectionMode.PROXY,
strict: true,
@ -87,6 +120,8 @@ container.register({
// ---
cacheService: asClass(RedisCacheService).singleton(),
pubSubService: asClass(RedisPubSubService).singleton(),
s3UploadsStorageService: asClass(S3UploadsStorageService).singleton(),
localUploadsStorageService: asClass(LocalUploadsStorageService).singleton(),
// policies
// ---
@ -111,6 +146,35 @@ container.register({
listApiKeysController: asClass(ListApiKeysController).singleton(),
deleteApiKeyController: asClass(DeleteApiKeyController).singleton(),
// data sources
// ---
dataSourcesRepository: asClass(MongoDBDataSourcesRepository).singleton(),
dataSourceDocsRepository: asClass(MongoDBDataSourceDocsRepository).singleton(),
createDataSourceUseCase: asClass(CreateDataSourceUseCase).singleton(),
fetchDataSourceUseCase: asClass(FetchDataSourceUseCase).singleton(),
listDataSourcesUseCase: asClass(ListDataSourcesUseCase).singleton(),
updateDataSourceUseCase: asClass(UpdateDataSourceUseCase).singleton(),
deleteDataSourceUseCase: asClass(DeleteDataSourceUseCase).singleton(),
toggleDataSourceUseCase: asClass(ToggleDataSourceUseCase).singleton(),
createDataSourceController: asClass(CreateDataSourceController).singleton(),
fetchDataSourceController: asClass(FetchDataSourceController).singleton(),
listDataSourcesController: asClass(ListDataSourcesController).singleton(),
updateDataSourceController: asClass(UpdateDataSourceController).singleton(),
deleteDataSourceController: asClass(DeleteDataSourceController).singleton(),
toggleDataSourceController: asClass(ToggleDataSourceController).singleton(),
addDocsToDataSourceUseCase: asClass(AddDocsToDataSourceUseCase).singleton(),
listDocsInDataSourceUseCase: asClass(ListDocsInDataSourceUseCase).singleton(),
deleteDocFromDataSourceUseCase: asClass(DeleteDocFromDataSourceUseCase).singleton(),
recrawlWebDataSourceUseCase: asClass(RecrawlWebDataSourceUseCase).singleton(),
getUploadUrlsForFilesUseCase: asClass(GetUploadUrlsForFilesUseCase).singleton(),
getDownloadUrlForFileUseCase: asClass(GetDownloadUrlForFileUseCase).singleton(),
addDocsToDataSourceController: asClass(AddDocsToDataSourceController).singleton(),
listDocsInDataSourceController: asClass(ListDocsInDataSourceController).singleton(),
deleteDocFromDataSourceController: asClass(DeleteDocFromDataSourceController).singleton(),
recrawlWebDataSourceController: asClass(RecrawlWebDataSourceController).singleton(),
getUploadUrlsForFilesController: asClass(GetUploadUrlsForFilesController).singleton(),
getDownloadUrlForFileController: asClass(GetDownloadUrlForFileController).singleton(),
// jobs
// ---
jobsRepository: asClass(MongoDBJobsRepository).singleton(),

View file

@ -10,9 +10,7 @@
"lint": "next lint",
"setupQdrant": "tsx app/scripts/setup_qdrant.ts",
"deleteQdrant": "tsx app/scripts/delete_qdrant.ts",
"ragUrlsWorker": "tsx app/scripts/rag_urls_worker.ts",
"ragFilesWorker": "tsx app/scripts/rag_files_worker.ts",
"ragTextWorker": "tsx app/scripts/rag_text_worker.ts",
"rag-worker": "tsx app/scripts/rag-worker.ts",
"jobs-worker": "tsx app/scripts/jobs-worker.ts",
"job-rules-worker": "tsx app/scripts/job-rules.worker.ts"
},

View file

@ -0,0 +1,120 @@
import { PaginatedList } from "@/src/entities/common/paginated-list";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { z } from "zod";
/**
* Schema for creating a new DataSourceDoc. Requires projectId, sourceId, name, status, and data fields.
*/
export const CreateSchema = DataSourceDoc.pick({
name: true,
data: true,
});
/**
* Schema for updating an existing DataSourceDoc. Allows updating status, content, and error fields.
*/
export const UpdateSchema = DataSourceDoc
.pick({
status: true,
content: true,
error: true,
})
.partial();
/**
* Filters schema for listing DataSourceDocs. Supports optional filtering by one or more statuses.
*/
export const ListFiltersSchema = z.object({
status: z.array(DataSourceDoc.shape.status).optional(),
}).strict();
/**
* Repository interface for managing DataSourceDoc entities in the persistence layer.
*/
export interface IDataSourceDocsRepository {
/**
* Creates multiple DataSourceDocs with the provided data.
* @param projectId - The project ID to create the DataSourceDocs for.
* @param sourceId - The source ID to create the DataSourceDocs for.
* @param data - The data required to create a DataSourceDoc (see CreateSchema).
* @returns The IDs of the created DataSourceDocs.
*/
bulkCreate(
projectId: string,
sourceId: string,
data: z.infer<typeof CreateSchema>[]
): Promise<string[]>;
/**
* Fetches a DataSourceDoc by its unique identifier.
* @param id - The unique ID of the DataSourceDoc.
* @returns The DataSourceDoc object if found, otherwise null.
*/
fetch(id: string): Promise<z.infer<typeof DataSourceDoc> | null>;
/**
* Fetches multiple DataSourceDocs by their unique identifiers.
* @param ids - The unique IDs of the DataSourceDocs.
* @returns The DataSourceDocs objects that were found
*/
bulkFetch(ids: string[]): Promise<z.infer<typeof DataSourceDoc>[]>;
/**
* Lists DataSourceDocs for a given source, with optional filters, cursor, and limit for pagination.
* @param sourceId - The source ID to list DataSourceDocs for.
* @param filters - Optional filters (see ListFiltersSchema).
* @param cursor - Optional pagination cursor.
* @param limit - Optional maximum number of results to return.
* @returns A paginated list of DataSourceDocs.
*/
list(
sourceId: string,
filters?: z.infer<typeof ListFiltersSchema>,
cursor?: string,
limit?: number
): Promise<z.infer<ReturnType<typeof PaginatedList<typeof DataSourceDoc>>>>;
/**
* Marks all docs for a given source as pending.
* @param sourceId - The source ID to mark docs for.
*/
markSourceDocsPending(sourceId: string): Promise<void>;
/**
* Marks a DataSourceDoc as deleted.
* @param id - The unique ID of the DataSourceDoc to mark as deleted.
*/
markAsDeleted(id: string): Promise<void>;
/**
* Updates an existing DataSourceDoc by its ID and version with the provided data.
* @param id - The unique ID of the DataSourceDoc to update.
* @param version - Version of the DataSourceDoc for optimistic concurrency control.
* @param data - Fields to update (see UpdateSchema).
* @returns The updated DataSourceDoc object.
*/
updateByVersion(
id: string,
version: number,
data: z.infer<typeof UpdateSchema>
): Promise<z.infer<typeof DataSourceDoc>>;
/**
* Deletes a DataSourceDoc by its unique identifier.
* @param id - The unique ID of the DataSourceDoc to delete.
* @returns True if the DataSourceDoc was deleted, false otherwise.
*/
delete(id: string): Promise<boolean>;
/**
* Deletes all DataSourceDocs associated with a given source ID.
* @param sourceId - The source ID whose documents should be deleted.
*/
deleteBySourceId(sourceId: string): Promise<void>;
/**
* Deletes all DataSourceDocs associated with a given project ID.
* @param projectId - The project ID whose documents should be deleted.
*/
deleteByProjectId(projectId: string): Promise<void>;
}

View file

@ -0,0 +1,124 @@
import { PaginatedList } from "@/src/entities/common/paginated-list";
import { DataSource } from "@/src/entities/models/data-source";
import { z } from "zod";
/**
* Schema for creating a new DataSource. Requires projectId, name, description, and data fields.
*/
export const CreateSchema = DataSource.pick({
projectId: true,
name: true,
description: true,
data: true,
status: true,
});
/**
* Schema for updating an existing DataSource. Allows updating status, billingError, error, attempts, active, and description fields.
*/
export const UpdateSchema = DataSource
.pick({
billingError: true,
error: true,
description: true,
status: true,
active: true,
attempts: true,
})
.partial();
/**
* Filters schema for listing DataSources. Supports optional filtering by active and deleted status.
*/
export const ListFiltersSchema = z.object({
active: z.boolean().optional(),
deleted: z.boolean().optional(),
}).strict();
/**
* Schema for the payload of a release operation.
*/
export const ReleasePayloadSchema = DataSource
.pick({
status: true,
error: true,
billingError: true,
})
.partial();
/**
* Repository interface for managing DataSource entities in the persistence layer.
*/
export interface IDataSourcesRepository {
/**
* Creates a new DataSource with the provided data.
* @param data - The data required to create a DataSource (see CreateSchema).
* @returns The created DataSource object.
*/
create(data: z.infer<typeof CreateSchema>): Promise<z.infer<typeof DataSource>>;
/**
* Fetches a DataSource by its unique identifier.
* @param id - The unique ID of the DataSource.
* @returns The DataSource object if found, otherwise null.
*/
fetch(id: string): Promise<z.infer<typeof DataSource> | null>;
/**
* Lists DataSources for a given project, with optional filters, cursor, and limit for pagination.
* @param projectId - The project ID to list DataSources for.
* @param filters - Optional filters (see ListFiltersSchema).
* @param cursor - Optional pagination cursor.
* @param limit - Optional maximum number of results to return.
* @returns A paginated list of DataSources.
*/
list(
projectId: string,
filters?: z.infer<typeof ListFiltersSchema>,
cursor?: string,
limit?: number
): Promise<z.infer<ReturnType<typeof PaginatedList<typeof DataSource>>>>;
/**
* Updates an existing DataSource by its ID with the provided data.
* @param id - The unique ID of the DataSource to update.
* @param data - The fields to update (see UpdateSchema).
* @param bumpVersion - Optional flag to increment the version.
* @returns The updated DataSource object.
*/
update(id: string, data: z.infer<typeof UpdateSchema>, bumpVersion?: boolean): Promise<z.infer<typeof DataSource>>;
/**
* Deletes a DataSource by its unique identifier.
* @param id - The unique ID of the DataSource to delete.
* @returns True if the DataSource was deleted, false otherwise.
*/
delete(id: string): Promise<boolean>;
/**
* Deletes all DataSources associated with a given project ID.
* @param projectId - The project ID whose DataSources should be deleted.
* @returns A promise that resolves when the operation is complete.
*/
deleteByProjectId(projectId: string): Promise<void>;
/**
* Polls for a datasource that is pending delete and returns it
* @returns The datasource if found, otherwise null.
*/
pollDeleteJob(): Promise<z.infer<typeof DataSource> | null>;
/**
* Polls for a datasource that is pending processing and returns it
* @returns The datasource if found, otherwise null.
*/
pollPendingJob(): Promise<z.infer<typeof DataSource> | null>;
/**
* Releases a datasource by its ID and version.
* @param id - The unique ID of the datasource to release.
* @param version - The version of the datasource to release.
* @param updates - The updates to apply to the datasource (see ReleasePayloadSchema).
*/
release(id: string, version: number, updates: z.infer<typeof ReleasePayloadSchema>): Promise<void>;
}

View file

@ -0,0 +1,5 @@
export interface IUploadsStorageService {
getUploadUrl(key: string, contentType: string): Promise<string>;
getDownloadUrl(fileId: string): Promise<string>;
getFileContents(fileId: string): Promise<Buffer>;
}

View file

@ -0,0 +1,68 @@
import { z } from "zod";
import { IDataSourceDocsRepository, CreateSchema as DocCreateSchema } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
docs: z.array(DocCreateSchema),
});
export interface IAddDocsToDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class AddDocsToDataSourceUseCase implements IAddDocsToDataSourceUseCase {
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourceDocsRepository,
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const { sourceId, docs } = request;
const source = await this.dataSourcesRepository.fetch(sourceId);
if (!source) {
throw new NotFoundError('Data source not found');
}
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId: source.projectId,
});
await this.usageQuotaPolicy.assertAndConsume(source.projectId);
await this.dataSourceDocsRepository.bulkCreate(source.projectId, sourceId, docs);
await this.dataSourcesRepository.update(sourceId, {
status: "pending",
billingError: null,
attempts: 0,
}, true);
}
}

View file

@ -0,0 +1,60 @@
import { z } from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository, CreateSchema } from "@/src/application/repositories/data-sources.repository.interface";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
data: CreateSchema,
});
export interface ICreateDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class CreateDataSourceUseCase implements ICreateDataSourceUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const { projectId } = request.data;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
let _status = "pending";
// Only set status for non-file data sources
if (request.data.status && request.data.data.type !== 'files_local' && request.data.data.type !== 'files_s3') {
_status = request.data.status;
}
return await this.dataSourcesRepository.create({
...request.data,
status: _status as z.infer<typeof DataSource>['status'],
});
}
}

View file

@ -0,0 +1,60 @@
import { z } from "zod";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IDeleteDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class DeleteDataSourceUseCase implements IDeleteDataSourceUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const existing = await this.dataSourcesRepository.fetch(request.sourceId);
if (!existing) {
throw new NotFoundError(`Data source ${request.sourceId} not found`);
}
const { projectId } = existing;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
await this.dataSourcesRepository.update(request.sourceId, {
status: 'deleted',
attempts: 0,
billingError: null,
}, true);
}
}

View file

@ -0,0 +1,67 @@
import { z } from "zod";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
docId: z.string(),
});
export interface IDeleteDocFromDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class DeleteDocFromDataSourceUseCase implements IDeleteDocFromDataSourceUseCase {
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourceDocsRepository,
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const { docId } = request;
const doc = await this.dataSourceDocsRepository.fetch(docId);
if (!doc) {
throw new NotFoundError(`Doc ${docId} not found`);
}
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId: doc.projectId,
});
await this.usageQuotaPolicy.assertAndConsume(doc.projectId);
await this.dataSourceDocsRepository.markAsDeleted(docId);
await this.dataSourcesRepository.update(doc.sourceId, {
status: 'pending',
billingError: null,
attempts: 0,
}, true);
}
}

View file

@ -0,0 +1,57 @@
import { z } from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IFetchDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class FetchDataSourceUseCase implements IFetchDataSourceUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const source = await this.dataSourcesRepository.fetch(request.sourceId);
if (!source) {
throw new NotFoundError(`Data source ${request.sourceId} not found`);
}
const { projectId } = source;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
return source;
}
}

View file

@ -0,0 +1,72 @@
import { z } from "zod";
import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
fileId: z.string(),
});
export interface IGetDownloadUrlForFileUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<string>;
}
export class GetDownloadUrlForFileUseCase implements IGetDownloadUrlForFileUseCase {
private readonly s3UploadsStorageService: IUploadsStorageService;
private readonly localUploadsStorageService: IUploadsStorageService;
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
s3UploadsStorageService,
localUploadsStorageService,
dataSourceDocsRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
s3UploadsStorageService: IUploadsStorageService,
localUploadsStorageService: IUploadsStorageService,
dataSourceDocsRepository: IDataSourceDocsRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.s3UploadsStorageService = s3UploadsStorageService;
this.localUploadsStorageService = localUploadsStorageService;
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<string> {
const { fileId } = request;
const file = await this.dataSourceDocsRepository.fetch(fileId);
if (!file) {
throw new NotFoundError('File not found');
}
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId: file.projectId,
});
await this.usageQuotaPolicy.assertAndConsume(file.projectId);
if (file.data.type === 'file_local') {
// use the file id instead of path here
return await this.localUploadsStorageService.getDownloadUrl(file.id);
} else if (file.data.type === 'file_s3') {
return await this.s3UploadsStorageService.getDownloadUrl(file.id);
}
throw new NotFoundError('Invalid file type');
}
}

View file

@ -0,0 +1,82 @@
import { z } from "zod";
import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { ObjectId } from "mongodb";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
files: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })),
});
export interface IGetUploadUrlsForFilesUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<{ fileId: string, uploadUrl: string, path: string }[]>;
}
export class GetUploadUrlsForFilesUseCase implements IGetUploadUrlsForFilesUseCase {
private readonly s3UploadsStorageService: IUploadsStorageService;
private readonly localUploadsStorageService: IUploadsStorageService;
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
s3UploadsStorageService,
localUploadsStorageService,
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
s3UploadsStorageService: IUploadsStorageService,
localUploadsStorageService: IUploadsStorageService,
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.s3UploadsStorageService = s3UploadsStorageService;
this.localUploadsStorageService = localUploadsStorageService;
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<{ fileId: string, uploadUrl: string, path: string }[]> {
const { sourceId, files } = request;
const source = await this.dataSourcesRepository.fetch(sourceId);
if (!source) {
throw new NotFoundError('Data source not found');
}
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId: source.projectId,
});
await this.usageQuotaPolicy.assertAndConsume(source.projectId);
const urls: { fileId: string, uploadUrl: string, path: string }[] = [];
for (const file of files) {
const fileId = new ObjectId().toString();
if (source.data.type === 'files_s3') {
const projectIdPrefix = source.projectId.slice(0, 2);
const path = `datasources/files/${projectIdPrefix}/${source.projectId}/${sourceId}/${fileId}/${file.name}`;
const uploadUrl = await this.s3UploadsStorageService.getUploadUrl(path, file.type);
urls.push({ fileId, uploadUrl, path });
} else if (source.data.type === 'files_local') {
const uploadUrl = await this.localUploadsStorageService.getUploadUrl(fileId, file.type);
urls.push({ fileId, uploadUrl, path: uploadUrl });
}
}
return urls;
}
}

View file

@ -0,0 +1,60 @@
import { z } from "zod";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { DataSource } from "@/src/entities/models/data-source";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
projectId: z.string(),
});
export interface IListDataSourcesUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>[]>;
}
export class ListDataSourcesUseCase implements IListDataSourcesUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>[]> {
const { projectId } = request;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
// list all sources for now
const sources = [];
let cursor = undefined;
do {
const result = await this.dataSourcesRepository.list(projectId, undefined, cursor);
sources.push(...result.items);
cursor = result.nextCursor;
} while (cursor);
return sources;
}
}

View file

@ -0,0 +1,71 @@
import { z } from "zod";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IListDocsInDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSourceDoc>[]>;
}
export class ListDocsInDataSourceUseCase implements IListDocsInDataSourceUseCase {
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourceDocsRepository,
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSourceDoc>[]> {
const { sourceId } = request;
const source = await this.dataSourcesRepository.fetch(sourceId);
if (!source) {
throw new NotFoundError(`Data source ${sourceId} not found`);
}
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId: source.projectId,
});
await this.usageQuotaPolicy.assertAndConsume(source.projectId);
// fetch all docs
const docs = [];
let cursor = undefined;
do {
const result = await this.dataSourceDocsRepository.list(sourceId, undefined, cursor);
docs.push(...result.items);
cursor = result.nextCursor;
} while (cursor);
return docs;
}
}

View file

@ -0,0 +1,71 @@
import { z } from "zod";
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { NotFoundError, BadRequestError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IRecrawlWebDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class RecrawlWebDataSourceUseCase implements IRecrawlWebDataSourceUseCase {
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourceDocsRepository,
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const source = await this.dataSourcesRepository.fetch(request.sourceId);
if (!source) {
throw new NotFoundError(`Data source ${request.sourceId} not found`);
}
if (source.data.type !== 'urls') {
throw new BadRequestError('Invalid data source type');
}
const { projectId } = source;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
await this.dataSourceDocsRepository.markSourceDocsPending(request.sourceId);
await this.dataSourcesRepository.update(request.sourceId, {
status: 'pending',
billingError: null,
attempts: 0,
}, true);
}
}

View file

@ -0,0 +1,58 @@
import { z } from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
active: z.boolean(),
});
export interface IToggleDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class ToggleDataSourceUseCase implements IToggleDataSourceUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const existing = await this.dataSourcesRepository.fetch(request.sourceId);
if (!existing) {
throw new NotFoundError(`Data source ${request.sourceId} not found`);
}
const { projectId } = existing;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
return await this.dataSourcesRepository.update(request.sourceId, { active: request.active });
}
}

View file

@ -0,0 +1,62 @@
import { z } from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface";
import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy";
import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface";
import { NotFoundError } from "@/src/entities/errors/common";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
data: DataSource
.pick({
description: true,
})
.partial(),
});
export interface IUpdateDataSourceUseCase {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class UpdateDataSourceUseCase implements IUpdateDataSourceUseCase {
private readonly dataSourcesRepository: IDataSourcesRepository;
private readonly usageQuotaPolicy: IUsageQuotaPolicy;
private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy;
constructor({
dataSourcesRepository,
usageQuotaPolicy,
projectActionAuthorizationPolicy,
}: {
dataSourcesRepository: IDataSourcesRepository,
usageQuotaPolicy: IUsageQuotaPolicy,
projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy,
}) {
this.dataSourcesRepository = dataSourcesRepository;
this.usageQuotaPolicy = usageQuotaPolicy;
this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const source = await this.dataSourcesRepository.fetch(request.sourceId);
if (!source) {
throw new NotFoundError(`Data source ${request.sourceId} not found`);
}
const { projectId } = source;
await this.projectActionAuthorizationPolicy.authorize({
caller: request.caller,
userId: request.userId,
apiKey: request.apiKey,
projectId,
});
await this.usageQuotaPolicy.assertAndConsume(projectId);
return await this.dataSourcesRepository.update(request.sourceId, request.data, true);
}
}

View file

@ -0,0 +1,44 @@
import { z } from "zod";
export const DataSourceDoc = z.object({
id: z.string(),
sourceId: z.string(),
projectId: z.string(),
name: z.string(),
version: z.number(),
status: z.enum([
'pending',
'ready',
'error',
'deleted',
]),
content: z.string().nullable(),
createdAt: z.string().datetime(),
lastUpdatedAt: z.string().datetime().nullable(),
attempts: z.number(),
error: z.string().nullable(),
data: z.discriminatedUnion('type', [
z.object({
type: z.literal('url'),
url: z.string(),
}),
z.object({
type: z.literal('file_local'),
name: z.string(),
size: z.number(),
mimeType: z.string(),
path: z.string(),
}),
z.object({
type: z.literal('file_s3'),
name: z.string(),
size: z.number(),
mimeType: z.string(),
s3Key: z.string(),
}),
z.object({
type: z.literal('text'),
content: z.string(),
}),
]),
});

View file

@ -0,0 +1,36 @@
import { z } from "zod";
export const DataSource = z.object({
id: z.string(),
name: z.string(),
description: z.string(),
projectId: z.string(),
active: z.boolean().default(true),
status: z.enum([
'pending',
'ready',
'error',
'deleted',
]),
version: z.number(),
error: z.string().nullable(),
billingError: z.string().nullable(),
createdAt: z.string().datetime(),
lastUpdatedAt: z.string().datetime().nullable(),
attempts: z.number(),
lastAttemptAt: z.string().datetime().nullable(),
data: z.discriminatedUnion('type', [
z.object({
type: z.literal('urls'),
}),
z.object({
type: z.literal('files_local'),
}),
z.object({
type: z.literal('files_s3'),
}),
z.object({
type: z.literal('text'),
})
]),
});

View file

@ -0,0 +1,170 @@
import { z } from "zod";
import { Filter, ObjectId } from "mongodb";
import { db } from "@/app/lib/mongodb";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
import {
CreateSchema,
IDataSourceDocsRepository,
ListFiltersSchema,
UpdateSchema,
} from "@/src/application/repositories/data-source-docs.repository.interface";
import { PaginatedList } from "@/src/entities/common/paginated-list";
import { NotFoundError } from "@/src/entities/errors/common";
/**
* MongoDB document schema for DataSourceDoc.
* Excludes the 'id' field as it's represented by MongoDB's '_id'.
*/
const DocSchema = DataSourceDoc.omit({ id: true });
/**
* MongoDB implementation of the DataSourceDocs repository.
*/
export class MongoDBDataSourceDocsRepository implements IDataSourceDocsRepository {
private readonly collection = db.collection<z.infer<typeof DocSchema>>("source_docs");
async bulkCreate(projectId: string, sourceId: string, data: z.infer<typeof CreateSchema>[]): Promise<string[]> {
const now = new Date().toISOString();
const result = await this.collection.insertMany(data.map(doc => {
return {
projectId,
sourceId,
name: doc.name,
version: 1,
createdAt: now,
lastUpdatedAt: null,
content: null,
attempts: 0,
error: null,
data: doc.data,
status: "pending",
}
}));
return Object.values(result.insertedIds).map(id => id.toString());
}
async fetch(id: string): Promise<z.infer<typeof DataSourceDoc> | null> {
const result = await this.collection.findOne({ _id: new ObjectId(id) });
if (!result) return null;
const { _id, ...rest } = result;
return {
...rest,
id: _id.toString(),
};
}
async bulkFetch(ids: string[]): Promise<z.infer<typeof DataSourceDoc>[]> {
const results = await this.collection.find({ _id: { $in: ids.map(id => new ObjectId(id)) } }).toArray();
return results.map(result => {
const { _id, ...rest } = result;
return { ...rest, id: _id.toString() };
});
}
async list(
sourceId: string,
filters?: z.infer<typeof ListFiltersSchema>,
cursor?: string,
limit: number = 50
): Promise<z.infer<ReturnType<typeof PaginatedList<typeof DataSourceDoc>>>> {
const query: Filter<z.infer<typeof DocSchema>> = { sourceId, status: { $ne: "deleted" } };
if (filters?.status && filters.status.length > 0) {
query.status = { $in: filters.status };
}
if (cursor) {
query._id = { $lt: new ObjectId(cursor) };
}
const _limit = Math.min(limit, 50);
const results = await this.collection
.find(query)
.sort({ _id: -1 })
.limit(_limit + 1)
.toArray();
const hasNextPage = results.length > _limit;
const items = results.slice(0, _limit).map((doc) => {
const { _id, ...rest } = doc;
return {
...rest,
id: _id.toString(),
};
});
return {
items,
nextCursor: hasNextPage ? results[_limit - 1]._id.toString() : null,
};
}
async markSourceDocsPending(sourceId: string): Promise<void> {
await this.collection.updateMany(
{ sourceId },
{
$set: {
status: "pending",
lastUpdatedAt: new Date().toISOString(),
attempts: 0,
},
},
);
}
async markAsDeleted(id: string): Promise<void> {
await this.collection.updateOne(
{ _id: new ObjectId(id) },
{
$set: {
status: "deleted",
lastUpdatedAt: new Date().toISOString(),
},
},
);
}
async updateByVersion(
id: string,
version: number,
data: z.infer<typeof UpdateSchema>
): Promise<z.infer<typeof DataSourceDoc>> {
const result = await this.collection.findOneAndUpdate(
{ _id: new ObjectId(id), version },
{
$set: {
...data,
lastUpdatedAt: new Date().toISOString(),
},
},
{ returnDocument: "after" }
);
if (!result) {
throw new NotFoundError(`DataSourceDoc ${id} not found or version mismatch`);
}
const { _id, ...rest } = result;
return {
...rest,
id: _id.toString(),
};
}
async delete(id: string): Promise<boolean> {
const result = await this.collection.deleteOne({ _id: new ObjectId(id) });
return result.deletedCount > 0;
}
async deleteBySourceId(sourceId: string): Promise<void> {
await this.collection.deleteMany({ sourceId });
}
async deleteByProjectId(projectId: string): Promise<void> {
await this.collection.deleteMany({ projectId });
}
}

View file

@ -0,0 +1,218 @@
import { z } from "zod";
import { ObjectId } from "mongodb";
import { db } from "@/app/lib/mongodb";
import { DataSource } from "@/src/entities/models/data-source";
import {
CreateSchema,
IDataSourcesRepository,
ListFiltersSchema,
ReleasePayloadSchema,
UpdateSchema,
} from "@/src/application/repositories/data-sources.repository.interface";
import { PaginatedList } from "@/src/entities/common/paginated-list";
import { NotFoundError } from "@/src/entities/errors/common";
/**
* MongoDB document schema for DataSource.
* Excludes the 'id' field as it's represented by MongoDB's '_id'.
*/
const DocSchema = DataSource.omit({ id: true });
/**
* MongoDB implementation of the DataSources repository.
*/
export class MongoDBDataSourcesRepository implements IDataSourcesRepository {
private readonly collection = db.collection<z.infer<typeof DocSchema>>("sources");
async create(data: z.infer<typeof CreateSchema>): Promise<z.infer<typeof DataSource>> {
const now = new Date().toISOString();
const _id = new ObjectId();
const doc: z.infer<typeof DocSchema> = {
...data,
active: true,
attempts: 0,
version: 1,
createdAt: now,
error: null,
billingError: null,
lastAttemptAt: null,
lastUpdatedAt: null,
};
await this.collection.insertOne({
...doc,
_id,
});
return {
...doc,
id: _id.toString(),
};
}
async fetch(id: string): Promise<z.infer<typeof DataSource> | null> {
const result = await this.collection.findOne({ _id: new ObjectId(id) });
if (!result) return null;
const { _id, ...rest } = result;
return {
...rest,
id: _id.toString(),
};
}
async list(
projectId: string,
filters?: z.infer<typeof ListFiltersSchema>,
cursor?: string,
limit: number = 50
): Promise<z.infer<ReturnType<typeof PaginatedList<typeof DataSource>>>> {
const query: any = { projectId, status: { $ne: "deleted" } };
// Default behavior: exclude deleted unless explicitly asked for
if (filters?.deleted === true) {
query.status = "deleted";
}
if (typeof filters?.active === "boolean") {
query.active = filters.active;
}
if (cursor) {
query._id = { $lt: new ObjectId(cursor) };
}
const _limit = Math.min(limit, 50);
const results = await this.collection
.find(query)
.sort({ _id: -1 })
.limit(_limit + 1)
.toArray();
const hasNextPage = results.length > _limit;
const items = results.slice(0, _limit).map((doc: any) => {
const { _id, ...rest } = doc;
return {
...rest,
id: _id.toString(),
};
});
return {
items,
nextCursor: hasNextPage ? results[_limit - 1]._id.toString() : null,
};
}
async update(
id: string,
data: z.infer<typeof UpdateSchema>,
bumpVersion?: boolean
): Promise<z.infer<typeof DataSource>> {
const now = new Date().toISOString();
const result = await this.collection.findOneAndUpdate(
{ _id: new ObjectId(id) },
{
$set: {
...data,
lastUpdatedAt: now,
},
...(bumpVersion ? { $inc: { version: 1 } } : {}),
},
{ returnDocument: "after" }
);
if (!result) {
throw new NotFoundError(`DataSource ${id} not found`);
}
const { _id, ...rest } = result;
return {
...rest,
id: _id.toString(),
};
}
async delete(id: string): Promise<boolean> {
const result = await this.collection.deleteOne({ _id: new ObjectId(id) });
return result.deletedCount > 0;
}
async deleteByProjectId(projectId: string): Promise<void> {
await this.collection.deleteMany({ projectId });
}
async pollDeleteJob(): Promise<z.infer<typeof DataSource> | null> {
const result = await this.collection.findOneAndUpdate({
status: "deleted",
$or: [
{ attempts: { $exists: false } },
{ attempts: { $lte: 3 } }
]
}, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } });
if (!result) return null;
const { _id, ...rest } = result;
return { ...rest, id: _id.toString() };
}
async pollPendingJob(): Promise<z.infer<typeof DataSource> | null> {
const now = Date.now();
const result = await this.collection.findOneAndUpdate({
$and: [
{
$or: [
// if the job has never been attempted
{
status: "pending",
attempts: 0,
},
// if the job was attempted but wasn't completed in the last hour
{
status: "pending",
lastAttemptAt: { $lt: new Date(now - 60 * 60 * 1000).toISOString() },
},
// if the job errored out but hasn't been retried 3 times yet
{
status: "error",
attempts: { $lt: 3 },
},
// if the job errored out but hasn't been retried in the last hr
{
status: "error",
lastAttemptAt: { $lt: new Date(now - 60 * 60 * 1000).toISOString() },
},
]
}
]
}, {
$set: {
status: "pending",
lastAttemptAt: new Date().toISOString(),
},
$inc: {
attempts: 1
},
}, {
returnDocument: "after", sort: { createdAt: 1 }
});
if (!result) return null;
const { _id, ...rest } = result;
return { ...rest, id: _id.toString() };
}
async release(id: string, version: number, updates: z.infer<typeof ReleasePayloadSchema>): Promise<void> {
await this.collection.updateOne({
_id: new ObjectId(id),
version,
}, { $set: {
...updates,
lastUpdatedAt: new Date().toISOString(),
} });
}
}

View file

@ -0,0 +1,39 @@
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface";
import fs from "fs";
import path from "path";
import { NotFoundError } from "@/src/entities/errors/common";
const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads';
export class LocalUploadsStorageService implements IUploadsStorageService {
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
constructor({
dataSourceDocsRepository,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
}
async getUploadUrl(key: string, contentType: string): Promise<string> {
return `/api/uploads/${key}`;
}
async getDownloadUrl(fileId: string): Promise<string> {
return `/api/uploads/${fileId}`;
}
async getFileContents(fileId: string): Promise<Buffer> {
const file = await this.dataSourceDocsRepository.fetch(fileId);
if (!file) {
throw new NotFoundError('File not found');
}
if (file.data.type !== 'file_local') {
throw new NotFoundError('File is not a local file');
}
const filePath = file.data.path.split('/api/uploads/')[1];
return fs.readFileSync(path.join(UPLOADS_DIR, filePath));
}
}

View file

@ -0,0 +1,71 @@
import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface";
import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface";
import { NotFoundError } from "@/src/entities/errors/common";
import { S3Client, GetObjectCommand, PutObjectCommand } from "@aws-sdk/client-s3";
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
export class S3UploadsStorageService implements IUploadsStorageService {
private readonly s3Client: S3Client;
private readonly bucket: string;
private readonly dataSourceDocsRepository: IDataSourceDocsRepository;
constructor({
dataSourceDocsRepository,
}: {
dataSourceDocsRepository: IDataSourceDocsRepository,
}) {
this.dataSourceDocsRepository = dataSourceDocsRepository;
this.s3Client = new S3Client({
region: process.env.UPLOADS_AWS_REGION || 'us-east-1',
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID || '',
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || '',
},
});
this.bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
}
async getUploadUrl(key: string, contentType: string): Promise<string> {
const command = new PutObjectCommand({
Bucket: this.bucket,
Key: key,
ContentType: contentType,
});
return await getSignedUrl(this.s3Client, command, { expiresIn: 600 });
}
async getDownloadUrl(fileId: string): Promise<string> {
const file = await this.dataSourceDocsRepository.fetch(fileId);
if (!file) {
throw new NotFoundError('File not found');
}
if (file.data.type !== 'file_s3') {
throw new NotFoundError('File is not an S3 file');
}
const command = new GetObjectCommand({
Bucket: this.bucket,
Key: file.data.s3Key,
});
return await getSignedUrl(this.s3Client, command, { expiresIn: 60 });
}
async getFileContents(fileId: string): Promise<Buffer> {
const file = await this.dataSourceDocsRepository.fetch(fileId);
if (!file) {
throw new NotFoundError('File not found');
}
if (file.data.type !== 'file_s3') {
throw new NotFoundError('File is not an S3 file');
}
const command = new GetObjectCommand({
Bucket: this.bucket,
Key: file.data.s3Key,
});
const response = await this.s3Client.send(command);
const chunks: Uint8Array[] = [];
for await (const chunk of response.Body as any) {
chunks.push(chunk);
}
return Buffer.concat(chunks);
}
}

View file

@ -0,0 +1,33 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IAddDocsToDataSourceUseCase } from "@/src/application/use-cases/data-sources/add-docs-to-data-source.use-case";
import { CreateSchema as DocCreateSchema } from "@/src/application/repositories/data-source-docs.repository.interface";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
docs: z.array(DocCreateSchema),
});
export interface IAddDocsToDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class AddDocsToDataSourceController implements IAddDocsToDataSourceController {
private readonly addDocsToDataSourceUseCase: IAddDocsToDataSourceUseCase;
constructor({ addDocsToDataSourceUseCase }: { addDocsToDataSourceUseCase: IAddDocsToDataSourceUseCase }) {
this.addDocsToDataSourceUseCase = addDocsToDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId, docs } = result.data;
return await this.addDocsToDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, docs });
}
}

View file

@ -0,0 +1,33 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { ICreateDataSourceUseCase } from "@/src/application/use-cases/data-sources/create-data-source.use-case";
import { CreateSchema } from "@/src/application/repositories/data-sources.repository.interface";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
data: CreateSchema,
});
export interface ICreateDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class CreateDataSourceController implements ICreateDataSourceController {
private readonly createDataSourceUseCase: ICreateDataSourceUseCase;
constructor({ createDataSourceUseCase }: { createDataSourceUseCase: ICreateDataSourceUseCase }) {
this.createDataSourceUseCase = createDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, data } = result.data;
return await this.createDataSourceUseCase.execute({ caller, userId, apiKey, data });
}
}

View file

@ -0,0 +1,31 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IDeleteDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IDeleteDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class DeleteDataSourceController implements IDeleteDataSourceController {
private readonly deleteDataSourceUseCase: IDeleteDataSourceUseCase;
constructor({ deleteDataSourceUseCase }: { deleteDataSourceUseCase: IDeleteDataSourceUseCase }) {
this.deleteDataSourceUseCase = deleteDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId } = result.data;
return await this.deleteDataSourceUseCase.execute({ caller, userId, apiKey, sourceId });
}
}

View file

@ -0,0 +1,31 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IDeleteDocFromDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
docId: z.string(),
});
export interface IDeleteDocFromDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class DeleteDocFromDataSourceController implements IDeleteDocFromDataSourceController {
private readonly deleteDocFromDataSourceUseCase: IDeleteDocFromDataSourceUseCase;
constructor({ deleteDocFromDataSourceUseCase }: { deleteDocFromDataSourceUseCase: IDeleteDocFromDataSourceUseCase }) {
this.deleteDocFromDataSourceUseCase = deleteDocFromDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, docId } = result.data;
return await this.deleteDocFromDataSourceUseCase.execute({ caller, userId, apiKey, docId });
}
}

View file

@ -0,0 +1,33 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IFetchDataSourceUseCase } from "@/src/application/use-cases/data-sources/fetch-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IFetchDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class FetchDataSourceController implements IFetchDataSourceController {
private readonly fetchDataSourceUseCase: IFetchDataSourceUseCase;
constructor({ fetchDataSourceUseCase }: { fetchDataSourceUseCase: IFetchDataSourceUseCase }) {
this.fetchDataSourceUseCase = fetchDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId } = result.data;
return await this.fetchDataSourceUseCase.execute({ caller, userId, apiKey, sourceId });
}
}

View file

@ -0,0 +1,31 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IGetDownloadUrlForFileUseCase } from "@/src/application/use-cases/data-sources/get-download-url-for-file.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
fileId: z.string(),
});
export interface IGetDownloadUrlForFileController {
execute(request: z.infer<typeof inputSchema>): Promise<string>;
}
export class GetDownloadUrlForFileController implements IGetDownloadUrlForFileController {
private readonly getDownloadUrlForFileUseCase: IGetDownloadUrlForFileUseCase;
constructor({ getDownloadUrlForFileUseCase }: { getDownloadUrlForFileUseCase: IGetDownloadUrlForFileUseCase }) {
this.getDownloadUrlForFileUseCase = getDownloadUrlForFileUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<string> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, fileId } = result.data;
return await this.getDownloadUrlForFileUseCase.execute({ caller, userId, apiKey, fileId });
}
}

View file

@ -0,0 +1,32 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IGetUploadUrlsForFilesUseCase } from "@/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
files: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })),
});
export interface IGetUploadUrlsForFilesController {
execute(request: z.infer<typeof inputSchema>): Promise<{ fileId: string, uploadUrl: string, path: string }[]>;
}
export class GetUploadUrlsForFilesController implements IGetUploadUrlsForFilesController {
private readonly getUploadUrlsForFilesUseCase: IGetUploadUrlsForFilesUseCase;
constructor({ getUploadUrlsForFilesUseCase }: { getUploadUrlsForFilesUseCase: IGetUploadUrlsForFilesUseCase }) {
this.getUploadUrlsForFilesUseCase = getUploadUrlsForFilesUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<{ fileId: string, uploadUrl: string, path: string }[]> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId, files } = result.data;
return await this.getUploadUrlsForFilesUseCase.execute({ caller, userId, apiKey, sourceId, files });
}
}

View file

@ -0,0 +1,32 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IListDataSourcesUseCase } from "@/src/application/use-cases/data-sources/list-data-sources.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
projectId: z.string(),
});
export interface IListDataSourcesController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>[]>;
}
export class ListDataSourcesController implements IListDataSourcesController {
private readonly listDataSourcesUseCase: IListDataSourcesUseCase;
constructor({ listDataSourcesUseCase }: { listDataSourcesUseCase: IListDataSourcesUseCase }) {
this.listDataSourcesUseCase = listDataSourcesUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>[]> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, projectId} = result.data;
return await this.listDataSourcesUseCase.execute({ caller, userId, apiKey, projectId });
}
}

View file

@ -0,0 +1,32 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IListDocsInDataSourceUseCase } from "@/src/application/use-cases/data-sources/list-docs-in-data-source.use-case";
import { DataSourceDoc } from "@/src/entities/models/data-source-doc";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IListDocsInDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSourceDoc>[]>;
}
export class ListDocsInDataSourceController implements IListDocsInDataSourceController {
private readonly listDocsInDataSourceUseCase: IListDocsInDataSourceUseCase;
constructor({ listDocsInDataSourceUseCase }: { listDocsInDataSourceUseCase: IListDocsInDataSourceUseCase }) {
this.listDocsInDataSourceUseCase = listDocsInDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSourceDoc>[]> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId } = result.data;
return await this.listDocsInDataSourceUseCase.execute({ caller, userId, apiKey, sourceId });
}
}

View file

@ -0,0 +1,31 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { IRecrawlWebDataSourceUseCase } from "@/src/application/use-cases/data-sources/recrawl-web-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
});
export interface IRecrawlWebDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<void>;
}
export class RecrawlWebDataSourceController implements IRecrawlWebDataSourceController {
private readonly recrawlWebDataSourceUseCase: IRecrawlWebDataSourceUseCase;
constructor({ recrawlWebDataSourceUseCase }: { recrawlWebDataSourceUseCase: IRecrawlWebDataSourceUseCase }) {
this.recrawlWebDataSourceUseCase = recrawlWebDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<void> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId } = result.data;
return await this.recrawlWebDataSourceUseCase.execute({ caller, userId, apiKey, sourceId });
}
}

View file

@ -0,0 +1,33 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IToggleDataSourceUseCase } from "@/src/application/use-cases/data-sources/toggle-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
active: z.boolean(),
});
export interface IToggleDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class ToggleDataSourceController implements IToggleDataSourceController {
private readonly toggleDataSourceUseCase: IToggleDataSourceUseCase;
constructor({ toggleDataSourceUseCase }: { toggleDataSourceUseCase: IToggleDataSourceUseCase }) {
this.toggleDataSourceUseCase = toggleDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId, active } = result.data;
return await this.toggleDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, active });
}
}

View file

@ -0,0 +1,38 @@
import { BadRequestError } from "@/src/entities/errors/common";
import z from "zod";
import { DataSource } from "@/src/entities/models/data-source";
import { IUpdateDataSourceUseCase } from "@/src/application/use-cases/data-sources/update-data-source.use-case";
const inputSchema = z.object({
caller: z.enum(["user", "api"]),
userId: z.string().optional(),
apiKey: z.string().optional(),
sourceId: z.string(),
data: DataSource
.pick({
description: true,
})
.partial(),
});
export interface IUpdateDataSourceController {
execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>>;
}
export class UpdateDataSourceController implements IUpdateDataSourceController {
private readonly updateDataSourceUseCase: IUpdateDataSourceUseCase;
constructor({ updateDataSourceUseCase }: { updateDataSourceUseCase: IUpdateDataSourceUseCase }) {
this.updateDataSourceUseCase = updateDataSourceUseCase;
}
async execute(request: z.infer<typeof inputSchema>): Promise<z.infer<typeof DataSource>> {
const result = inputSchema.safeParse(request);
if (!result.success) {
throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`);
}
const { caller, userId, apiKey, sourceId, data } = result.data;
return await this.updateDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, data });
}
}

View file

@ -149,28 +149,29 @@ services:
- QDRANT_API_KEY=${QDRANT_API_KEY}
restart: no
rag_files_worker:
rag-worker:
build:
context: ./apps/rowboat
dockerfile: scripts.Dockerfile
command: ["npm", "run", "ragFilesWorker"]
profiles: [ "rag_files_worker" ]
command: ["npm", "run", "rag-worker"]
profiles: [ "rag-worker" ]
environment:
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
- RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION}
- RAG_UPLOADS_DIR=/app/uploads
- USE_GEMINI_FILE_PARSING=${USE_GEMINI_FILE_PARSING}
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
- REDIS_URL=redis://redis:6379
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET}
- RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION}
- QDRANT_URL=http://qdrant:6333
- QDRANT_API_KEY=${QDRANT_API_KEY}
- RAG_UPLOADS_DIR=/app/uploads
- USE_GEMINI_FILE_PARSING=${USE_GEMINI_FILE_PARSING}
- USE_BILLING=${USE_BILLING}
- BILLING_API_URL=${BILLING_API_URL}
- BILLING_API_KEY=${BILLING_API_KEY}
@ -178,47 +179,6 @@ services:
volumes:
- uploads:/app/uploads
rag_urls_worker:
build:
context: ./apps/rowboat
dockerfile: scripts.Dockerfile
command: ["npm", "run", "ragUrlsWorker"]
profiles: [ "rag_urls_worker" ]
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
- REDIS_URL=redis://redis:6379
- FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY}
- QDRANT_URL=http://qdrant:6333
- QDRANT_API_KEY=${QDRANT_API_KEY}
- USE_BILLING=${USE_BILLING}
- BILLING_API_URL=${BILLING_API_URL}
- BILLING_API_KEY=${BILLING_API_KEY}
restart: unless-stopped
rag_text_worker:
build:
context: ./apps/rowboat
dockerfile: scripts.Dockerfile
command: ["npm", "run", "ragTextWorker"]
profiles: [ "rag_text_worker" ]
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL}
- EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY}
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
- MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat
- REDIS_URL=redis://redis:6379
- QDRANT_URL=http://qdrant:6333
- QDRANT_API_KEY=${QDRANT_API_KEY}
- USE_BILLING=${USE_BILLING}
- BILLING_API_URL=${BILLING_API_URL}
- BILLING_API_KEY=${BILLING_API_KEY}
restart: unless-stopped
jobs-worker:
build:
context: ./apps/rowboat

View file

@ -26,13 +26,7 @@ export USE_KLAVIS_TOOLS=true
CMD="docker compose"
CMD="$CMD --profile setup_qdrant"
CMD="$CMD --profile qdrant"
CMD="$CMD --profile rag_text_worker"
CMD="$CMD --profile rag_files_worker"
# enable rag urls worker
if [ "$USE_RAG_SCRAPING" = "true" ]; then
CMD="$CMD --profile rag_urls_worker"
fi
CMD="$CMD --profile rag-worker"
# Add more mappings as needed
# if [ "$SOME_OTHER_ENV" = "true" ]; then