From 4b33b20e760c14802743c85d16ac88885d34daac Mon Sep 17 00:00:00 2001 From: Ramnique Singh <30795890+ramnique@users.noreply.github.com> Date: Sun, 17 Aug 2025 08:06:17 +0530 Subject: [PATCH] DDD refactor: data-sources (#205) --- apps/rowboat/app/actions/copilot.actions.ts | 5 +- .../app/actions/data-source.actions.ts | 413 +++++------------- apps/rowboat/app/actions/project.actions.ts | 34 +- .../rowboat/app/api/uploads/[fileId]/route.ts | 13 +- apps/rowboat/app/lib/agent-tools.ts | 35 +- apps/rowboat/app/lib/copilot/copilot.ts | 9 +- apps/rowboat/app/lib/mongodb.ts | 4 - apps/rowboat/app/lib/types/copilot_types.ts | 6 +- .../rowboat/app/lib/types/datasource_types.ts | 85 ---- .../app/projects/[projectId]/copilot/app.tsx | 7 +- .../[projectId]/copilot/use-copilot.tsx | 4 +- .../[projectId]/entities/agent_config.tsx | 13 +- .../entities/datasource_config.tsx | 87 ++-- .../sources/[sourceId]/source-page.tsx | 22 +- .../[projectId]/sources/components/delete.tsx | 4 +- .../sources/components/files-source.tsx | 48 +- .../sources/components/scrape-source.tsx | 41 +- .../self-updating-source-status.tsx | 10 +- .../sources/components/source-status.tsx | 4 +- .../sources/components/sources-list.tsx | 15 +- .../sources/components/text-source.tsx | 25 +- .../sources/components/toggle-source.tsx | 4 +- .../sources/components/web-recrawl.tsx | 4 - .../projects/[projectId]/sources/new/form.tsx | 12 +- .../app/projects/[projectId]/workflow/app.tsx | 4 +- .../workflow/components/DataSourcesModal.tsx | 8 +- .../[projectId]/workflow/entity_list.tsx | 14 +- .../[projectId]/workflow/workflow_editor.tsx | 4 +- .../{rag_files_worker.ts => rag-worker.ts} | 370 +++++++++------- apps/rowboat/app/scripts/rag_text_worker.ts | 345 --------------- apps/rowboat/app/scripts/rag_urls_worker.ts | 381 ---------------- apps/rowboat/di/container.ts | 68 ++- apps/rowboat/package.json | 4 +- .../data-source-docs.repository.interface.ts | 120 +++++ .../data-sources.repository.interface.ts | 124 ++++++ .../uploads-storage.service.interface.ts | 5 + .../add-docs-to-data-source.use-case.ts | 68 +++ .../create-data-source.use-case.ts | 60 +++ .../delete-data-source.use-case.ts | 60 +++ .../delete-doc-from-data-source.use-case.ts | 67 +++ .../fetch-data-source.use-case.ts | 57 +++ .../get-download-url-for-file.use-case.ts | 72 +++ .../get-upload-urls-for-files.use-case.ts | 82 ++++ .../list-data-sources.use-case.ts | 60 +++ .../list-docs-in-data-source.use-case.ts | 71 +++ .../recrawl-web-data-source.use-case.ts | 71 +++ .../toggle-data-source.use-case.ts | 58 +++ .../update-data-source.use-case.ts | 62 +++ .../src/entities/models/data-source-doc.ts | 44 ++ .../src/entities/models/data-source.ts | 36 ++ .../mongodb.data-source-docs.repository.ts | 170 +++++++ .../mongodb.data-sources.repository.ts | 218 +++++++++ .../services/local.uploads-storage.service.ts | 39 ++ .../services/s3.uploads-storage.service.ts | 71 +++ .../add-docs-to-data-source.controller.ts | 33 ++ .../create-data-source.controller.ts | 33 ++ .../delete-data-source.controller.ts | 31 ++ .../delete-doc-from-data-source.controller.ts | 31 ++ .../fetch-data-source.controller.ts | 33 ++ .../get-download-url-for-file.controller.ts | 31 ++ .../get-upload-urls-for-files.controller.ts | 32 ++ .../list-data-sources.controller.ts | 32 ++ .../list-docs-in-data-source.controller.ts | 32 ++ .../recrawl-web-data-source.controller.ts | 31 ++ .../toggle-data-source.controller.ts | 33 ++ .../update-data-source.controller.ts | 38 ++ docker-compose.yml | 62 +-- start.sh | 8 +- 68 files changed, 2589 insertions(+), 1588 deletions(-) rename apps/rowboat/app/scripts/{rag_files_worker.ts => rag-worker.ts} (53%) delete mode 100644 apps/rowboat/app/scripts/rag_text_worker.ts delete mode 100644 apps/rowboat/app/scripts/rag_urls_worker.ts create mode 100644 apps/rowboat/src/application/repositories/data-source-docs.repository.interface.ts create mode 100644 apps/rowboat/src/application/repositories/data-sources.repository.interface.ts create mode 100644 apps/rowboat/src/application/services/uploads-storage.service.interface.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/add-docs-to-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/create-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/delete-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/fetch-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/get-download-url-for-file.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/list-data-sources.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/list-docs-in-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/recrawl-web-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/toggle-data-source.use-case.ts create mode 100644 apps/rowboat/src/application/use-cases/data-sources/update-data-source.use-case.ts create mode 100644 apps/rowboat/src/entities/models/data-source-doc.ts create mode 100644 apps/rowboat/src/entities/models/data-source.ts create mode 100644 apps/rowboat/src/infrastructure/repositories/mongodb.data-source-docs.repository.ts create mode 100644 apps/rowboat/src/infrastructure/repositories/mongodb.data-sources.repository.ts create mode 100644 apps/rowboat/src/infrastructure/services/local.uploads-storage.service.ts create mode 100644 apps/rowboat/src/infrastructure/services/s3.uploads-storage.service.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/create-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/delete-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/fetch-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/list-data-sources.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/toggle-data-source.controller.ts create mode 100644 apps/rowboat/src/interface-adapters/controllers/data-sources/update-data-source.controller.ts diff --git a/apps/rowboat/app/actions/copilot.actions.ts b/apps/rowboat/app/actions/copilot.actions.ts index 32bf7445..94550b16 100644 --- a/apps/rowboat/app/actions/copilot.actions.ts +++ b/apps/rowboat/app/actions/copilot.actions.ts @@ -5,13 +5,12 @@ import { } from "../lib/types/copilot_types"; import { Workflow} from "../lib/types/workflow_types"; -import { DataSource } from "../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from 'zod'; import { projectAuthCheck } from "./project.actions"; import { redisClient } from "../lib/redis"; import { authorizeUserAction, logUsage } from "./billing.actions"; import { USE_BILLING } from "../lib/feature_flags"; -import { WithStringId } from "../lib/types/types"; import { getEditAgentInstructionsResponse } from "../lib/copilot/copilot"; import { container } from "@/di/container"; import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; @@ -24,7 +23,7 @@ export async function getCopilotResponseStream( messages: z.infer[], current_workflow_config: z.infer, context: z.infer | null, - dataSources?: WithStringId>[] + dataSources?: z.infer[] ): Promise<{ streamId: string; } | { billingError: string }> { diff --git a/apps/rowboat/app/actions/data-source.actions.ts b/apps/rowboat/app/actions/data-source.actions.ts index 390c0e59..4c7b75fc 100644 --- a/apps/rowboat/app/actions/data-source.actions.ts +++ b/apps/rowboat/app/actions/data-source.actions.ts @@ -1,41 +1,53 @@ 'use server'; -import { ObjectId, WithId } from "mongodb"; -import { dataSourcesCollection, dataSourceDocsCollection } from "../lib/mongodb"; import { z } from 'zod'; -import { GetObjectCommand, PutObjectCommand } from "@aws-sdk/client-s3"; -import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; -import { projectAuthCheck } from "./project.actions"; -import { WithStringId } from "../lib/types/types"; -import { DataSourceDoc } from "../lib/types/datasource_types"; -import { DataSource } from "../lib/types/datasource_types"; -import { uploadsS3Client } from "../lib/uploads_s3_client"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { DataSource } from "@/src/entities/models/data-source"; +import { container } from "@/di/container"; +import { IFetchDataSourceController } from "@/src/interface-adapters/controllers/data-sources/fetch-data-source.controller"; +import { authCheck } from "./auth.actions"; +import { IListDataSourcesController } from "@/src/interface-adapters/controllers/data-sources/list-data-sources.controller"; +import { ICreateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/create-data-source.controller"; +import { IRecrawlWebDataSourceController } from "@/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller"; +import { IDeleteDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-data-source.controller"; +import { IToggleDataSourceController } from "@/src/interface-adapters/controllers/data-sources/toggle-data-source.controller"; +import { IAddDocsToDataSourceController } from "@/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller"; +import { IListDocsInDataSourceController } from "@/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller"; +import { IDeleteDocFromDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller"; +import { IGetDownloadUrlForFileController } from "@/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller"; +import { IGetUploadUrlsForFilesController } from "@/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller"; +import { IUpdateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/update-data-source.controller"; -export async function getDataSource(projectId: string, sourceId: string): Promise>> { - await projectAuthCheck(projectId); - const source = await dataSourcesCollection.findOne({ - _id: new ObjectId(sourceId), - projectId, +const fetchDataSourceController = container.resolve("fetchDataSourceController"); +const listDataSourcesController = container.resolve("listDataSourcesController"); +const createDataSourceController = container.resolve("createDataSourceController"); +const recrawlWebDataSourceController = container.resolve("recrawlWebDataSourceController"); +const deleteDataSourceController = container.resolve("deleteDataSourceController"); +const toggleDataSourceController = container.resolve("toggleDataSourceController"); +const addDocsToDataSourceController = container.resolve("addDocsToDataSourceController"); +const listDocsInDataSourceController = container.resolve("listDocsInDataSourceController"); +const deleteDocFromDataSourceController = container.resolve("deleteDocFromDataSourceController"); +const getDownloadUrlForFileController = container.resolve("getDownloadUrlForFileController"); +const getUploadUrlsForFilesController = container.resolve("getUploadUrlsForFilesController"); +const updateDataSourceController = container.resolve("updateDataSourceController"); + +export async function getDataSource(sourceId: string): Promise> { + const user = await authCheck(); + + return await fetchDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, }); - if (!source) { - throw new Error('Invalid data source'); - } - const { _id, ...rest } = source; - return { - ...rest, - _id: _id.toString(), - }; } -export async function listDataSources(projectId: string): Promise>[]> { - await projectAuthCheck(projectId); - const sources = await dataSourcesCollection.find({ - projectId: projectId, - status: { $ne: 'deleted' }, - }).toArray(); - return sources.map((s) => ({ - ...s, - _id: s._id.toString(), - })); +export async function listDataSources(projectId: string): Promise[]> { + const user = await authCheck(); + + return await listDataSourcesController.execute({ + caller: 'user', + userId: user._id, + projectId, + }); } export async function createDataSource({ @@ -50,272 +62,124 @@ export async function createDataSource({ description?: string, data: z.infer['data'], status?: 'pending' | 'ready', -}): Promise>> { - await projectAuthCheck(projectId); - - const source: z.infer = { - projectId: projectId, - active: true, - name: name, - description, - createdAt: (new Date()).toISOString(), - attempts: 0, - version: 1, - data, - }; - - // Only set status for non-file data sources - if (data.type !== 'files_local' && data.type !== 'files_s3') { - source.status = status; - } - - await dataSourcesCollection.insertOne(source); - - const { _id, ...rest } = source as WithId>; - return { - ...rest, - _id: _id.toString(), - }; -} - -export async function recrawlWebDataSource(projectId: string, sourceId: string) { - await projectAuthCheck(projectId); - - const source = await getDataSource(projectId, sourceId); - if (source.data.type !== 'urls') { - throw new Error('Invalid data source type'); - } - - // mark all files as queued - await dataSourceDocsCollection.updateMany({ - sourceId: sourceId, - }, { - $set: { - status: 'pending', - lastUpdatedAt: (new Date()).toISOString(), - attempts: 0, - } - }); - - // mark data source as pending - await dataSourcesCollection.updateOne({ - _id: new ObjectId(sourceId), - }, { - $set: { - status: 'pending', - billingError: undefined, - lastUpdatedAt: (new Date()).toISOString(), - attempts: 0, - }, - $inc: { - version: 1, +}): Promise> { + const user = await authCheck(); + return await createDataSourceController.execute({ + caller: 'user', + userId: user._id, + data: { + projectId, + name, + description: description || '', + status, + data, }, }); } -export async function deleteDataSource(projectId: string, sourceId: string) { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); +export async function recrawlWebDataSource(sourceId: string) { + const user = await authCheck(); - // mark data source as deleted - await dataSourcesCollection.updateOne({ - _id: new ObjectId(sourceId), - }, { - $set: { - status: 'deleted', - billingError: undefined, - lastUpdatedAt: (new Date()).toISOString(), - attempts: 0, - }, - $inc: { - version: 1, - }, + return await recrawlWebDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, }); } -export async function toggleDataSource(projectId: string, sourceId: string, active: boolean) { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); +export async function deleteDataSource(sourceId: string) { + const user = await authCheck(); - await dataSourcesCollection.updateOne({ - "_id": new ObjectId(sourceId), - "projectId": projectId, - }, { - $set: { - "active": active, - } + return await deleteDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, + }); +} + +export async function toggleDataSource(sourceId: string, active: boolean) { + const user = await authCheck(); + + return await toggleDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, + active, }); } export async function addDocsToDataSource({ - projectId, sourceId, docData, }: { - projectId: string, sourceId: string, docData: { - _id?: string, name: string, data: z.infer['data'] }[] }): Promise { - await projectAuthCheck(projectId); - const source = await getDataSource(projectId, sourceId); + const user = await authCheck(); - await dataSourceDocsCollection.insertMany(docData.map(doc => { - const record: z.infer = { - sourceId, - name: doc.name, - status: 'pending', - createdAt: new Date().toISOString(), - data: doc.data, - version: 1, - }; - if (!doc._id) { - return record; - } - const recordWithId = record as WithId>; - recordWithId._id = new ObjectId(doc._id); - return recordWithId; - })); - - // Only set status to pending when files are added - if (docData.length > 0 && (source.data.type === 'files_local' || source.data.type === 'files_s3')) { - await dataSourcesCollection.updateOne( - { _id: new ObjectId(sourceId) }, - { - $set: { - status: 'pending', - billingError: undefined, - attempts: 0, - lastUpdatedAt: new Date().toISOString(), - }, - $inc: { - version: 1, - }, - } - ); - } + return await addDocsToDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, + docs: docData, + }); } export async function listDocsInDataSource({ - projectId, sourceId, page = 1, limit = 10, }: { - projectId: string, sourceId: string, page?: number, limit?: number, }): Promise<{ - files: WithStringId>[], + files: z.infer[], total: number }> { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); + const user = await authCheck(); - // Get total count - const total = await dataSourceDocsCollection.countDocuments({ + const docs = await listDocsInDataSourceController.execute({ + caller: 'user', + userId: user._id, sourceId, - status: { $ne: 'deleted' }, }); - // Fetch docs with pagination - const docs = await dataSourceDocsCollection.find({ - sourceId, - status: { $ne: 'deleted' }, - }) - .skip((page - 1) * limit) - .limit(limit) - .toArray(); - return { - files: docs.map(f => ({ ...f, _id: f._id.toString() })), - total + files: docs, + total: docs.length, }; } -export async function deleteDocsFromDataSource({ - projectId, - sourceId, - docIds, +export async function deleteDocFromDataSource({ + docId, }: { - projectId: string, - sourceId: string, - docIds: string[], + docId: string, }): Promise { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); - - // mark for deletion - await dataSourceDocsCollection.updateMany( - { - sourceId, - _id: { - $in: docIds.map(id => new ObjectId(id)) - } - }, - { - $set: { - status: "deleted", - lastUpdatedAt: new Date().toISOString(), - }, - $inc: { - version: 1, - }, - } - ); - - // mark data source as pending - await dataSourcesCollection.updateOne({ - _id: new ObjectId(sourceId), - }, { - $set: { - status: 'pending', - billingError: undefined, - attempts: 0, - lastUpdatedAt: new Date().toISOString(), - }, - $inc: { - version: 1, - }, + const user = await authCheck(); + return await deleteDocFromDataSourceController.execute({ + caller: 'user', + userId: user._id, + docId, }); } export async function getDownloadUrlForFile( - projectId: string, - sourceId: string, fileId: string ): Promise { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); - const file = await dataSourceDocsCollection.findOne({ - sourceId, - _id: new ObjectId(fileId), - 'data.type': { $in: ['file_local', 'file_s3'] }, + const user = await authCheck(); + + return await getDownloadUrlForFileController.execute({ + caller: 'user', + userId: user._id, + fileId, }); - if (!file) { - throw new Error('File not found'); - } - - // if local, return path - if (file.data.type === 'file_local') { - return `/api/uploads/${fileId}`; - } else if (file.data.type === 'file_s3') { - const command = new GetObjectCommand({ - Bucket: process.env.RAG_UPLOADS_S3_BUCKET, - Key: file.data.s3Key, - }); - return await getSignedUrl(uploadsS3Client, command, { expiresIn: 60 }); // URL valid for 1 minute - } - - throw new Error('Invalid file type'); } export async function getUploadUrlsForFilesDataSource( - projectId: string, sourceId: string, files: { name: string; type: string; size: number }[] ): Promise<{ @@ -323,70 +187,31 @@ export async function getUploadUrlsForFilesDataSource( uploadUrl: string, path: string, }[]> { - await projectAuthCheck(projectId); - const source = await getDataSource(projectId, sourceId); - if (source.data.type !== 'files_local' && source.data.type !== 'files_s3') { - throw new Error('Invalid files data source'); - } + const user = await authCheck(); - const urls: { - fileId: string, - uploadUrl: string, - path: string, - }[] = []; - - for (const file of files) { - const fileId = new ObjectId().toString(); - - if (source.data.type === 'files_s3') { - // Generate presigned URL - const projectIdPrefix = projectId.slice(0, 2); // 2 characters from the start of the projectId - const path = `datasources/files/${projectIdPrefix}/${projectId}/${sourceId}/${fileId}/${file.name}`; - const command = new PutObjectCommand({ - Bucket: process.env.RAG_UPLOADS_S3_BUCKET, - Key: path, - ContentType: file.type, - }); - const uploadUrl = await getSignedUrl(uploadsS3Client, command, { expiresIn: 10 * 60 }); // valid for 10 minutes - urls.push({ - fileId, - uploadUrl, - path, - }); - } else if (source.data.type === 'files_local') { - // Generate local upload URL - urls.push({ - fileId, - uploadUrl: '/api/uploads/' + fileId, - path: '/api/uploads/' + fileId, - }); - } - } - - return urls; + return await getUploadUrlsForFilesController.execute({ + caller: 'user', + userId: user._id, + sourceId, + files, + }); } export async function updateDataSource({ - projectId, sourceId, description, }: { - projectId: string, sourceId: string, description: string, }) { - await projectAuthCheck(projectId); - await getDataSource(projectId, sourceId); + const user = await authCheck(); - await dataSourcesCollection.updateOne({ - _id: new ObjectId(sourceId), - }, { - $set: { + return await updateDataSourceController.execute({ + caller: 'user', + userId: user._id, + sourceId, + data: { description, - lastUpdatedAt: (new Date()).toISOString(), - }, - $inc: { - version: 1, }, }); } diff --git a/apps/rowboat/app/actions/project.actions.ts b/apps/rowboat/app/actions/project.actions.ts index ed0c24de..9da94c80 100644 --- a/apps/rowboat/app/actions/project.actions.ts +++ b/apps/rowboat/app/actions/project.actions.ts @@ -1,6 +1,6 @@ 'use server'; import { redirect } from "next/navigation"; -import { db, dataSourcesCollection, projectsCollection } from "../lib/mongodb"; +import { db, projectsCollection } from "../lib/mongodb"; import { z } from 'zod'; import crypto from 'crypto'; import { revalidatePath } from "next/cache"; @@ -12,13 +12,16 @@ import { Project } from "../lib/types/project_types"; import { USE_AUTH } from "../lib/feature_flags"; import { authorizeUserAction } from "./billing.actions"; import { Workflow } from "../lib/types/workflow_types"; -import { container } from "@/di/container"; import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; import { ICreateApiKeyController } from "@/src/interface-adapters/controllers/api-keys/create-api-key.controller"; import { IListApiKeysController } from "@/src/interface-adapters/controllers/api-keys/list-api-keys.controller"; import { IDeleteApiKeyController } from "@/src/interface-adapters/controllers/api-keys/delete-api-key.controller"; import { IApiKeysRepository } from "@/src/application/repositories/api-keys.repository.interface"; import { IProjectMembersRepository } from "@/src/application/repositories/project-members.repository.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { container } from "@/di/container"; +import { qdrantClient } from "../lib/qdrant"; const projectActionAuthorizationPolicy = container.resolve('projectActionAuthorizationPolicy'); const createApiKeyController = container.resolve('createApiKeyController'); @@ -26,6 +29,8 @@ const listApiKeysController = container.resolve('listApi const deleteApiKeyController = container.resolve('deleteApiKeyController'); const apiKeysRepository = container.resolve('apiKeysRepository'); const projectMembersRepository = container.resolve('projectMembersRepository'); +const dataSourcesRepository = container.resolve('dataSourcesRepository'); +const dataSourceDocsRepository = container.resolve('dataSourceDocsRepository'); export async function listTemplates() { const templatesArray = Object.entries(templates) @@ -234,22 +239,15 @@ export async function deleteProject(projectId: string) { // delete api keys await apiKeysRepository.deleteAll(projectId); - // delete embeddings - const sources = await dataSourcesCollection.find({ - projectId, - }, { - projection: { - _id: true, - } - }).toArray(); - - const ids = sources.map(s => s._id); - - // delete data sources - await dataSourcesCollection.deleteMany({ - _id: { - $in: ids, - } + // delete data sources data + await dataSourceDocsRepository.deleteByProjectId(projectId); + await dataSourcesRepository.deleteByProjectId(projectId); + await qdrantClient.delete("embeddings", { + filter: { + must: [ + { key: "projectId", match: { value: projectId } }, + ], + }, }); // delete project members diff --git a/apps/rowboat/app/api/uploads/[fileId]/route.ts b/apps/rowboat/app/api/uploads/[fileId]/route.ts index 1e76223e..84aae959 100644 --- a/apps/rowboat/app/api/uploads/[fileId]/route.ts +++ b/apps/rowboat/app/api/uploads/[fileId]/route.ts @@ -2,11 +2,13 @@ import { NextRequest, NextResponse } from 'next/server'; import path from 'path'; import fs from 'fs/promises'; import fsSync from 'fs'; -import { dataSourceDocsCollection } from '@/app/lib/mongodb'; -import { ObjectId } from 'mongodb'; +import { container } from '@/di/container'; +import { IDataSourceDocsRepository } from '@/src/application/repositories/data-source-docs.repository.interface'; const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads'; +const dataSourceDocsRepository = container.resolve('dataSourceDocsRepository'); + // PUT endpoint to handle file uploads export async function PUT(request: NextRequest, props: { params: Promise<{ fileId: string }> }) { const params = await props.params; @@ -39,10 +41,8 @@ export async function GET(request: NextRequest, props: { params: Promise<{ fileI return NextResponse.json({ error: 'Missing file ID' }, { status: 400 }); } - const filePath = path.join(UPLOADS_DIR, fileId); - // get mimetype from database - const doc = await dataSourceDocsCollection.findOne({ _id: new ObjectId(fileId) }); + const doc = await dataSourceDocsRepository.fetch(fileId); if (!doc) { return NextResponse.json({ error: 'File not found' }, { status: 404 }); } @@ -54,6 +54,9 @@ export async function GET(request: NextRequest, props: { params: Promise<{ fileI const fileName = doc.data.name; try { + // strip uploads dir from path + const filePath = path.join(UPLOADS_DIR, doc.data.path.split('/api/uploads/')[1]); + // Check if file exists await fs.access(filePath); // Create a readable stream diff --git a/apps/rowboat/app/lib/agent-tools.ts b/apps/rowboat/app/lib/agent-tools.ts index 05f8d31e..9981e846 100644 --- a/apps/rowboat/app/lib/agent-tools.ts +++ b/apps/rowboat/app/lib/agent-tools.ts @@ -2,7 +2,6 @@ import { tool, Tool } from "@openai/agents"; import { createOpenAI } from "@ai-sdk/openai"; import { embed, generateText } from "ai"; -import { ObjectId } from "mongodb"; import { z } from "zod"; import { composio } from "./composio/composio"; import { SignJWT } from "jose"; @@ -11,12 +10,16 @@ import crypto from "crypto"; // Internal dependencies import { embeddingModel } from '../lib/embedding'; import { getMcpClient } from "./mcp"; -import { dataSourceDocsCollection, dataSourcesCollection, projectsCollection } from "./mongodb"; +import { projectsCollection } from "./mongodb"; import { qdrantClient } from '../lib/qdrant'; import { EmbeddingRecord } from "./types/datasource_types"; import { WorkflowAgent, WorkflowTool } from "./types/workflow_types"; import { PrefixLogger } from "./utils"; import { UsageTracker } from "./billing"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { container } from "@/di/container"; // Provider configuration const PROVIDER_API_KEY = process.env.PROVIDER_API_KEY || process.env.OPENAI_API_KEY || ''; @@ -92,6 +95,9 @@ export async function invokeRagTool( logger.log(`returnType: ${returnType}`); logger.log(`k: ${k}`); + const dataSourcesRepository = container.resolve('dataSourcesRepository'); + const dataSourceDocsRepository = container.resolve('dataSourceDocsRepository'); + // Create embedding for question const { embedding, usage } = await embed({ model: embeddingModel, @@ -109,14 +115,19 @@ export async function invokeRagTool( }); // Fetch all data sources for this project - const sources = await dataSourcesCollection.find({ - projectId: projectId, - active: true, - }).toArray(); + const sources: z.infer[] = []; + let cursor = undefined; + do { + const resp = await dataSourcesRepository.list(projectId, { + active: true, + }, cursor); + sources.push(...resp.items); + cursor = resp.nextCursor; + } while(cursor); + const validSourceIds = sources - .filter(s => sourceIds.includes(s._id.toString())) // id should be in sourceIds - .filter(s => s.active) // should be active - .map(s => s._id.toString()); + .filter(s => sourceIds.includes(s.id)) // id should be in sourceIds + .map(s => s.id); logger.log(`valid source ids: ${validSourceIds.join(', ')}`); // if no sources found, return empty response @@ -157,14 +168,12 @@ export async function invokeRagTool( } // otherwise, fetch the doc contents from mongodb - const docs = await dataSourceDocsCollection.find({ - _id: { $in: results.map(r => new ObjectId(r.docId)) }, - }).toArray(); + const docs = await dataSourceDocsRepository.bulkFetch(results.map(r => r.docId)); logger.log(`fetched docs: ${docs.length}`); // map the results to the docs results = results.map(r => { - const doc = docs.find(d => d._id.toString() === r.docId); + const doc = docs.find(d => d.id === r.docId); return { ...r, content: doc?.content || '', diff --git a/apps/rowboat/app/lib/copilot/copilot.ts b/apps/rowboat/app/lib/copilot/copilot.ts index e729caf5..e2313a75 100644 --- a/apps/rowboat/app/lib/copilot/copilot.ts +++ b/apps/rowboat/app/lib/copilot/copilot.ts @@ -1,10 +1,9 @@ import z from "zod"; import { createOpenAI } from "@ai-sdk/openai"; import { generateObject, streamText, tool } from "ai"; -import { WithStringId } from "../types/types"; import { Workflow, WorkflowTool } from "../types/workflow_types"; import { CopilotChatContext, CopilotMessage } from "../types/copilot_types"; -import { DataSource } from "../types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { PrefixLogger } from "../utils"; import zodToJsonSchema from "zod-to-json-schema"; import { COPILOT_INSTRUCTIONS_EDIT_AGENT } from "./copilot_edit_agent"; @@ -102,11 +101,11 @@ ${JSON.stringify(workflow)} `; } -function getDataSourcesPrompt(dataSources: WithStringId>[]): string { +function getDataSourcesPrompt(dataSources: z.infer[]): string { let prompt = ''; if (dataSources.length > 0) { const simplifiedDataSources = dataSources.map(ds => ({ - id: ds._id, + id: ds.id, name: ds.name, description: ds.description, data: ds.data, @@ -274,7 +273,7 @@ export async function* streamMultiAgentResponse( context: z.infer | null, messages: z.infer[], workflow: z.infer, - dataSources: WithStringId>[] + dataSources: z.infer[] ): AsyncIterable> { const logger = new PrefixLogger('copilot /stream'); logger.log('context', context); diff --git a/apps/rowboat/app/lib/mongodb.ts b/apps/rowboat/app/lib/mongodb.ts index 1d599192..f3555861 100644 --- a/apps/rowboat/app/lib/mongodb.ts +++ b/apps/rowboat/app/lib/mongodb.ts @@ -2,8 +2,6 @@ import { MongoClient } from "mongodb"; import { User } from "./types/types"; import { Workflow } from "./types/workflow_types"; import { Project } from "./types/project_types"; -import { DataSourceDoc } from "./types/datasource_types"; -import { DataSource } from "./types/datasource_types"; import { TwilioConfig, TwilioInboundCall } from "./types/voice_types"; import { z } from 'zod'; import { apiV1 } from "rowboat-shared"; @@ -11,8 +9,6 @@ import { apiV1 } from "rowboat-shared"; const client = new MongoClient(process.env["MONGODB_CONNECTION_STRING"] || "mongodb://localhost:27017"); export const db = client.db("rowboat"); -export const dataSourcesCollection = db.collection>("sources"); -export const dataSourceDocsCollection = db.collection>("source_docs"); export const projectsCollection = db.collection>("projects"); export const agentWorkflowsCollection = db.collection>("agent_workflows"); export const chatsCollection = db.collection>("chats"); diff --git a/apps/rowboat/app/lib/types/copilot_types.ts b/apps/rowboat/app/lib/types/copilot_types.ts index 57688dd0..19fdb6d4 100644 --- a/apps/rowboat/app/lib/types/copilot_types.ts +++ b/apps/rowboat/app/lib/types/copilot_types.ts @@ -1,7 +1,7 @@ import { z } from "zod"; import { Workflow } from "./workflow_types"; import { Message } from "./types"; -import { DataSource } from "./datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; export const CopilotUserMessage = z.object({ role: z.literal('user'), @@ -52,9 +52,7 @@ export const CopilotAPIRequest = z.object({ messages: z.array(CopilotMessage), workflow: Workflow, context: CopilotChatContext.nullable(), - dataSources: z.array(DataSource.extend({ - _id: z.string(), - })).optional(), + dataSources: z.array(DataSource).optional(), }); export const CopilotAPIResponse = z.union([ z.object({ diff --git a/apps/rowboat/app/lib/types/datasource_types.ts b/apps/rowboat/app/lib/types/datasource_types.ts index 9fe72d6f..89323f9b 100644 --- a/apps/rowboat/app/lib/types/datasource_types.ts +++ b/apps/rowboat/app/lib/types/datasource_types.ts @@ -1,90 +1,5 @@ import { z } from "zod"; -export const DataSource = z.object({ - name: z.string(), - description: z.string().optional(), - projectId: z.string(), - active: z.boolean().default(true), - status: z.union([ - z.literal('pending'), - z.literal('ready'), - z.literal('error'), - z.literal('deleted'), - ]).optional(), - version: z.number(), - error: z.string().optional(), - billingError: z.string().optional(), - createdAt: z.string().datetime(), - lastUpdatedAt: z.string().datetime().optional(), - attempts: z.number(), - lastAttemptAt: z.string().datetime().optional(), - pendingRefresh: z.boolean().default(false).optional(), - data: z.discriminatedUnion('type', [ - z.object({ - type: z.literal('urls'), - }), - z.object({ - type: z.literal('files_local'), - }), - z.object({ - type: z.literal('files_s3'), - }), - z.object({ - type: z.literal('text'), - }) - ]), -}); - -export const DataSourceDoc = z.object({ - sourceId: z.string(), - name: z.string(), - version: z.number(), - status: z.union([ - z.literal('pending'), - z.literal('ready'), - z.literal('error'), - z.literal('deleted'), - ]), - content: z.string().optional(), - createdAt: z.string().datetime(), - lastUpdatedAt: z.string().datetime().optional(), - error: z.string().optional(), - data: z.discriminatedUnion('type', [ - z.object({ - type: z.literal('url'), - url: z.string(), - }), - z.object({ - type: z.literal('file_local'), - name: z.string(), - size: z.number(), - mimeType: z.string(), - }), - z.object({ - type: z.literal('file_s3'), - name: z.string(), - size: z.number(), - mimeType: z.string(), - s3Key: z.string(), - }), - z.object({ - type: z.literal('text'), - content: z.string(), - }), - ]), -}); - -export const EmbeddingDoc = z.object({ - content: z.string(), - sourceId: z.string(), - embeddings: z.array(z.number()), - metadata: z.object({ - sourceURL: z.string(), - title: z.string(), - score: z.number().optional(), - }), -}); - export const EmbeddingRecord = z.object({ id: z.string().uuid(), vector: z.array(z.number()), diff --git a/apps/rowboat/app/projects/[projectId]/copilot/app.tsx b/apps/rowboat/app/projects/[projectId]/copilot/app.tsx index 743f6a55..ed2b28bf 100644 --- a/apps/rowboat/app/projects/[projectId]/copilot/app.tsx +++ b/apps/rowboat/app/projects/[projectId]/copilot/app.tsx @@ -5,7 +5,7 @@ import { useRef, useState, createContext, useContext, useCallback, forwardRef, u import { CopilotChatContext } from "../../../lib/types/copilot_types"; import { CopilotMessage } from "../../../lib/types/copilot_types"; import { Workflow } from "@/app/lib/types/workflow_types"; -import { DataSource } from "@/app/lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { Action as WorkflowDispatch } from "@/app/projects/[projectId]/workflow/workflow_editor"; import { Panel } from "@/components/common/panel-common"; @@ -14,7 +14,6 @@ import { Messages } from "./components/messages"; import { CopyIcon, CheckIcon, PlusIcon, XIcon, InfoIcon, Sparkles } from "lucide-react"; import { useCopilot } from "./use-copilot"; import { BillingUpgradeModal } from "@/components/common/billing-upgrade-modal"; -import { WithStringId } from "@/app/lib/types/types"; const CopilotContext = createContext<{ workflow: z.infer | null; @@ -33,7 +32,7 @@ interface AppProps { onCopyJson?: (data: { messages: any[] }) => void; onMessagesChange?: (messages: z.infer[]) => void; isInitialState?: boolean; - dataSources?: WithStringId>[]; + dataSources?: z.infer[]; } const App = forwardRef<{ handleCopyChat: () => void; handleUserMessage: (message: string) => void }, AppProps>(function App({ @@ -277,7 +276,7 @@ export const Copilot = forwardRef<{ handleUserMessage: (message: string) => void chatContext?: z.infer; dispatch: (action: WorkflowDispatch) => void; isInitialState?: boolean; - dataSources?: WithStringId>[]; + dataSources?: z.infer[]; }>(({ projectId, workflow, diff --git a/apps/rowboat/app/projects/[projectId]/copilot/use-copilot.tsx b/apps/rowboat/app/projects/[projectId]/copilot/use-copilot.tsx index eddc041b..b35d98ec 100644 --- a/apps/rowboat/app/projects/[projectId]/copilot/use-copilot.tsx +++ b/apps/rowboat/app/projects/[projectId]/copilot/use-copilot.tsx @@ -2,7 +2,7 @@ import { useCallback, useRef, useState } from "react"; import { getCopilotResponseStream } from "@/app/actions/copilot.actions"; import { CopilotMessage } from "@/app/lib/types/copilot_types"; import { Workflow } from "@/app/lib/types/workflow_types"; -import { DataSource } from "@/app/lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { WithStringId } from "@/app/lib/types/types"; @@ -10,7 +10,7 @@ interface UseCopilotParams { projectId: string; workflow: z.infer; context: any; - dataSources?: WithStringId>[]; + dataSources?: z.infer[]; } interface UseCopilotResult { diff --git a/apps/rowboat/app/projects/[projectId]/entities/agent_config.tsx b/apps/rowboat/app/projects/[projectId]/entities/agent_config.tsx index 4a7ea306..69876b9d 100644 --- a/apps/rowboat/app/projects/[projectId]/entities/agent_config.tsx +++ b/apps/rowboat/app/projects/[projectId]/entities/agent_config.tsx @@ -1,7 +1,6 @@ "use client"; -import { WithStringId } from "../../../lib/types/types"; import { WorkflowPrompt, WorkflowAgent, Workflow, WorkflowTool } from "../../../lib/types/workflow_types"; -import { DataSource } from "../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { PlusIcon, Sparkles, X as XIcon, ChevronDown, ChevronRight, Trash2, Maximize2, Minimize2, StarIcon, DatabaseIcon, UserIcon, Settings, Info } from "lucide-react"; import { useState, useEffect, useRef } from "react"; @@ -59,7 +58,7 @@ export function AgentConfig({ agents: z.infer[], tools: z.infer[], prompts: z.infer[], - dataSources: WithStringId>[], + dataSources: z.infer[], handleUpdate: (agent: z.infer) => void, handleClose: () => void, useRag: boolean, @@ -726,12 +725,12 @@ export function AgentConfig({ startContent={} > {dataSources - .filter((ds) => !(agent.ragDataSources || []).includes(ds._id)) + .filter((ds) => !(agent.ragDataSources || []).includes(ds.id)) .length > 0 ? ( dataSources - .filter((ds) => !(agent.ragDataSources || []).includes(ds._id)) + .filter((ds) => !(agent.ragDataSources || []).includes(ds.id)) .map((ds) => ( - + {ds.name} )) @@ -775,7 +774,7 @@ export function AgentConfig({ {agent.ragDataSources !== undefined && agent.ragDataSources.length > 0 && (
{(agent.ragDataSources || []).map((source) => { - const ds = dataSources.find((ds) => ds._id === source); + const ds = dataSources.find((ds) => ds.id === source); return (
void, onDataSourceUpdate?: () => void }) { - const [dataSource, setDataSource] = useState> | null>(null); + const [dataSource, setDataSource] = useState | null>(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); // Files-related state - const [files, setFiles] = useState>[]>([]); + const [files, setFiles] = useState[]>([]); const [filesLoading, setFilesLoading] = useState(false); const [filesPage, setFilesPage] = useState(1); const [filesTotal, setFilesTotal] = useState(0); @@ -44,22 +43,22 @@ export function DataSourceConfig({ const currentProjectId = pathParts[2]; // /projects/[projectId]/workflow setProjectId(currentProjectId); - const ds = await getDataSource(currentProjectId, dataSourceId); + const ds = await getDataSource(dataSourceId); setDataSource(ds); // Load files if it's a files data source if (ds.data.type === 'files_local' || ds.data.type === 'files_s3') { - await loadFiles(currentProjectId, dataSourceId, 1); + await loadFiles(dataSourceId, 1); } // Load URLs if it's a URLs data source if (ds.data.type === 'urls') { - await loadUrls(currentProjectId, dataSourceId, 1); + await loadUrls(dataSourceId, 1); } // Load text content if it's a text data source if (ds.data.type === 'text') { - await loadTextContent(currentProjectId, dataSourceId); + await loadTextContent(dataSourceId); } } catch (err) { console.error('Failed to load data source:', err); @@ -91,7 +90,7 @@ export function DataSourceConfig({ } try { - const updatedSource = await getDataSource(projectId, dataSourceId); + const updatedSource = await getDataSource(dataSourceId); if (!ignore) { setDataSource(updatedSource); onDataSourceUpdate?.(); // Notify parent of status change @@ -124,20 +123,19 @@ export function DataSourceConfig({ // Helper function to update data source and notify parent const updateDataSourceAndNotify = useCallback(async () => { try { - const updatedSource = await getDataSource(projectId, dataSourceId); + const updatedSource = await getDataSource(dataSourceId); setDataSource(updatedSource); onDataSourceUpdate?.(); } catch (err) { console.error('Failed to reload data source:', err); } - }, [projectId, dataSourceId, onDataSourceUpdate]); + }, [dataSourceId, onDataSourceUpdate]); // Load files function - const loadFiles = async (projectId: string, sourceId: string, page: number) => { + const loadFiles = async (sourceId: string, page: number) => { try { setFilesLoading(true); const { files, total } = await listDocsInDataSource({ - projectId, sourceId, page, limit: 10, @@ -153,7 +151,7 @@ export function DataSourceConfig({ }; // URLs-related state - const [urls, setUrls] = useState>[]>([]); + const [urls, setUrls] = useState[]>([]); const [urlsLoading, setUrlsLoading] = useState(false); const [urlsPage, setUrlsPage] = useState(1); const [urlsTotal, setUrlsTotal] = useState(0); @@ -171,11 +169,10 @@ export function DataSourceConfig({ const [uploadingFiles, setUploadingFiles] = useState(false); // Load URLs function - const loadUrls = async (projectId: string, sourceId: string, page: number) => { + const loadUrls = async (sourceId: string, page: number) => { try { setUrlsLoading(true); const { files, total } = await listDocsInDataSource({ - projectId, sourceId, page, limit: 10, @@ -191,11 +188,10 @@ export function DataSourceConfig({ }; // Load text content function - const loadTextContent = async (projectId: string, sourceId: string) => { + const loadTextContent = async (sourceId: string) => { try { setTextLoading(true); const { files } = await listDocsInDataSource({ - projectId, sourceId, limit: 1, }); @@ -218,13 +214,11 @@ export function DataSourceConfig({ if (!window.confirm('Are you sure you want to delete this file?')) return; try { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSourceId, - docIds: [fileId], + await deleteDocFromDataSource({ + docId: fileId, }); // Reload files - await loadFiles(projectId, dataSourceId, filesPage); + await loadFiles(dataSourceId, filesPage); // Reload data source to get updated status await updateDataSourceAndNotify(); @@ -236,7 +230,7 @@ export function DataSourceConfig({ // Handle file download const handleDownloadFile = async (fileId: string) => { try { - const url = await getDownloadUrlForFile(projectId, dataSourceId, fileId); + const url = await getDownloadUrlForFile(fileId); window.open(url, '_blank'); } catch (err) { console.error('Failed to download file:', err); @@ -245,7 +239,7 @@ export function DataSourceConfig({ // Handle page change const handlePageChange = (page: number) => { - loadFiles(projectId, dataSourceId, page); + loadFiles(dataSourceId, page); }; // Handle URL deletion @@ -253,13 +247,11 @@ export function DataSourceConfig({ if (!window.confirm('Are you sure you want to delete this URL?')) return; try { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSourceId, - docIds: [urlId], + await deleteDocFromDataSource({ + docId: urlId, }); // Reload URLs - await loadUrls(projectId, dataSourceId, urlsPage); + await loadUrls(dataSourceId, urlsPage); // Reload data source to get updated status await updateDataSourceAndNotify(); @@ -270,7 +262,7 @@ export function DataSourceConfig({ // Handle URL page change const handleUrlPageChange = (page: number) => { - loadUrls(projectId, dataSourceId, page); + loadUrls(dataSourceId, page); }; // Handle text content update @@ -279,22 +271,18 @@ export function DataSourceConfig({ try { // Delete existing text doc if it exists const { files } = await listDocsInDataSource({ - projectId, sourceId: dataSourceId, limit: 1, }); if (files.length > 0) { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSourceId, - docIds: [files[0]._id], + await deleteDocFromDataSource({ + docId: files[0].id, }); } // Add new text doc await addDocsToDataSource({ - projectId, sourceId: dataSourceId, docData: [{ name: 'text', @@ -327,7 +315,6 @@ export function DataSourceConfig({ const first100Urls = urlsArray.slice(0, 100); await addDocsToDataSource({ - projectId, sourceId: dataSourceId, docData: first100Urls.map(url => ({ name: url, @@ -339,7 +326,7 @@ export function DataSourceConfig({ }); setShowAddUrlForm(false); - await loadUrls(projectId, dataSourceId, urlsPage); + await loadUrls(dataSourceId, urlsPage); // Reload data source to get updated status await updateDataSourceAndNotify(); @@ -356,7 +343,7 @@ export function DataSourceConfig({ setUploadingFiles(true); try { - const urls = await getUploadUrlsForFilesDataSource(projectId, dataSourceId, acceptedFiles.map(file => ({ + const urls = await getUploadUrlsForFilesDataSource(dataSourceId, acceptedFiles.map(file => ({ name: file.name, type: file.type, size: file.size, @@ -403,17 +390,17 @@ export function DataSourceConfig({ name: file.name, size: file.size, mimeType: file.type, + path: urls[index].path, }, })); } await addDocsToDataSource({ - projectId, sourceId: dataSourceId, docData, }); - await loadFiles(projectId, dataSourceId, filesPage); + await loadFiles(dataSourceId, filesPage); // Reload data source to get updated status await updateDataSourceAndNotify(); @@ -422,7 +409,7 @@ export function DataSourceConfig({ } finally { setUploadingFiles(false); } - }, [projectId, dataSourceId, dataSource, filesPage, updateDataSourceAndNotify]); + }, [dataSourceId, dataSource, filesPage, updateDataSourceAndNotify]); const { getRootProps, getInputProps, isDragActive } = useDropzone({ onDrop: onFileDrop, @@ -676,7 +663,7 @@ export function DataSourceConfig({
{files.map((file) => (
@@ -696,7 +683,7 @@ export function DataSourceConfig({ {(file.data.type === 'file_local' || file.data.type === 'file_s3') && (
diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/delete.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/delete.tsx index 2ef8f497..9b5323bd 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/delete.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/delete.tsx @@ -4,15 +4,13 @@ import { deleteDataSource } from "../../../../actions/data-source.actions"; import { FormStatusButton } from "../../../../lib/components/form-status-button"; export function DeleteSource({ - projectId, sourceId, }: { - projectId: string; sourceId: string; }) { function handleDelete() { if (window.confirm('Are you sure you want to delete this data source?')) { - deleteDataSource(projectId, sourceId); + deleteDataSource(sourceId); } } diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx index c394875f..897f84e6 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/files-source.tsx @@ -1,24 +1,20 @@ "use client"; -import { WithStringId } from "../../../../lib/types/types"; -import { DataSourceDoc, DataSource } from "../../../../lib/types/datasource_types"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { useCallback, useEffect, useState } from "react"; import { useDropzone } from "react-dropzone"; -import { deleteDocsFromDataSource, getUploadUrlsForFilesDataSource, addDocsToDataSource, getDownloadUrlForFile, listDocsInDataSource } from "../../../../actions/data-source.actions"; +import { deleteDocFromDataSource, getUploadUrlsForFilesDataSource, addDocsToDataSource, getDownloadUrlForFile, listDocsInDataSource } from "../../../../actions/data-source.actions"; import { RelativeTime } from "@primer/react"; import { Pagination, Spinner } from "@heroui/react"; import { DownloadIcon } from "lucide-react"; import { Section } from "./section"; function FileListItem({ - projectId, - sourceId, file, onDelete, }: { - projectId: string, - sourceId: string, - file: WithStringId>, + file: z.infer, onDelete: (fileId: string) => Promise; }) { const [isDeleting, setIsDeleting] = useState(false); @@ -27,7 +23,7 @@ function FileListItem({ const handleDeleteClick = async () => { setIsDeleting(true); try { - await onDelete(file._id); + await onDelete(file.id); } finally { setIsDeleting(false); } @@ -36,7 +32,7 @@ function FileListItem({ const handleDownloadClick = async () => { setIsDownloading(true); try { - const url = await getDownloadUrlForFile(projectId, sourceId, file._id); + const url = await getDownloadUrlForFile(file.id); window.open(url, '_blank'); } catch (error) { console.error('Download failed:', error); @@ -90,17 +86,15 @@ function FileListItem({ } function PaginatedFileList({ - projectId, sourceId, handleReload, onDelete, }: { - projectId: string, sourceId: string, handleReload: () => void; onDelete: (fileId: string) => Promise; }) { - const [files, setFiles] = useState>[]>([]); + const [files, setFiles] = useState[]>([]); const [page, setPage] = useState(1); const [total, setTotal] = useState(0); const [loading, setLoading] = useState(false); @@ -114,7 +108,6 @@ function PaginatedFileList({ setLoading(true); try { const { files, total } = await listDocsInDataSource({ - projectId, sourceId, page, limit: 10, @@ -134,7 +127,7 @@ function PaginatedFileList({ return () => { ignore = true; } - }, [projectId, sourceId, page]); + }, [sourceId, page]); return (
@@ -154,10 +147,8 @@ function PaginatedFileList({
{files.map(file => ( ))} @@ -177,13 +168,11 @@ function PaginatedFileList({ } export function FilesSource({ - projectId, dataSource, handleReload, type, }: { - projectId: string, - dataSource: WithStringId>, + dataSource: z.infer, handleReload: () => void; type: 'files_local' | 'files_s3'; }) { @@ -193,7 +182,7 @@ export function FilesSource({ const onDrop = useCallback(async (acceptedFiles: File[]) => { setUploading(true); try { - const urls = await getUploadUrlsForFilesDataSource(projectId, dataSource._id, acceptedFiles.map(file => ({ + const urls = await getUploadUrlsForFilesDataSource(dataSource.id, acceptedFiles.map(file => ({ name: file.name, type: file.type, size: file.size, @@ -237,13 +226,13 @@ export function FilesSource({ name: file.name, size: file.size, mimeType: file.type, + path: urls[index].path, }, })); } await addDocsToDataSource({ - projectId, - sourceId: dataSource._id, + sourceId: dataSource.id, docData, }); @@ -255,7 +244,7 @@ export function FilesSource({ } finally { setUploading(false); } - }, [projectId, dataSource._id, handleReload, type]); + }, [dataSource.id, handleReload, type]); const { getRootProps, getInputProps, isDragActive } = useDropzone({ onDrop, @@ -299,14 +288,11 @@ export function FilesSource({ { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSource._id, - docIds: [docId], + await deleteDocFromDataSource({ + docId: docId, }); handleReload(); setFileListKey(prev => prev + 1); diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/scrape-source.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/scrape-source.tsx index 1fdcbd5e..84f0694d 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/scrape-source.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/scrape-source.tsx @@ -1,9 +1,9 @@ "use client"; -import { WithStringId } from "../../../../lib/types/types"; -import { DataSourceDoc, DataSource } from "../../../../lib/types/datasource_types"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { Recrawl } from "./web-recrawl"; -import { deleteDocsFromDataSource, listDocsInDataSource, recrawlWebDataSource, addDocsToDataSource } from "../../../../actions/data-source.actions"; +import { deleteDocFromDataSource, listDocsInDataSource, recrawlWebDataSource, addDocsToDataSource } from "../../../../actions/data-source.actions"; import { useState, useEffect } from "react"; import { Spinner, Pagination } from "@heroui/react"; import { ExternalLinkIcon, PlusIcon } from "lucide-react"; @@ -13,7 +13,7 @@ import { Textarea } from "@/components/ui/textarea"; import { Section } from "./section"; function UrlListItem({ file, onDelete }: { - file: WithStringId>, + file: z.infer, onDelete: (fileId: string) => Promise; }) { const [isDeleting, setIsDeleting] = useState(false); @@ -37,7 +37,7 @@ function UrlListItem({ file, onDelete }: { onClick={async () => { setIsDeleting(true); try { - await onDelete(file._id); + await onDelete(file.id); } finally { setIsDeleting(false); } @@ -51,12 +51,11 @@ function UrlListItem({ file, onDelete }: { ); } -function UrlList({ projectId, sourceId, onDelete }: { - projectId: string, +function UrlList({ sourceId, onDelete }: { sourceId: string, onDelete: (fileId: string) => Promise, }) { - const [files, setFiles] = useState>[]>([]); + const [files, setFiles] = useState[]>([]); const [loading, setLoading] = useState(true); const [page, setPage] = useState(1); const [total, setTotal] = useState(0); @@ -69,7 +68,7 @@ function UrlList({ projectId, sourceId, onDelete }: { async function fetchFiles() { setLoading(true); try { - const { files, total } = await listDocsInDataSource({ projectId, sourceId, page, limit: 10 }); + const { files, total } = await listDocsInDataSource({ sourceId, page, limit: 10 }); if (!ignore) { setFiles(files); setTotal(total); @@ -86,7 +85,7 @@ function UrlList({ projectId, sourceId, onDelete }: { return () => { ignore = true; }; - }, [projectId, sourceId, page]); + }, [sourceId, page]); return (
@@ -102,7 +101,7 @@ function UrlList({ projectId, sourceId, onDelete }: { ) : (
{files.map(file => ( - + ))} {Math.ceil(total / 10) > 1 && (
@@ -120,12 +119,10 @@ function UrlList({ projectId, sourceId, onDelete }: { } export function ScrapeSource({ - projectId, dataSource, handleReload, }: { - projectId: string, - dataSource: WithStringId>, + dataSource: z.infer, handleReload: () => void; }) { const [fileListKey, setFileListKey] = useState(0); @@ -161,8 +158,7 @@ export function ScrapeSource({ const first100Urls = urlsArray.slice(0, 100); await addDocsToDataSource({ - projectId, - sourceId: dataSource._id, + sourceId: dataSource.id, docData: first100Urls.map(url => ({ name: url, data: { @@ -209,13 +205,10 @@ export function ScrapeSource({ { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSource._id, - docIds: [docId], + await deleteDocFromDataSource({ + docId: docId, }); handleReload(); setFileListKey(prev => prev + 1); @@ -230,10 +223,8 @@ export function ScrapeSource({ description="Update the content by scraping the URLs again." > { - await recrawlWebDataSource(projectId, dataSource._id); + await recrawlWebDataSource(dataSource.id); handleReload(); setFileListKey(prev => prev + 1); }} diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/self-updating-source-status.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/self-updating-source-status.tsx index d4d1a602..773d3662 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/self-updating-source-status.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/self-updating-source-status.tsx @@ -1,17 +1,15 @@ 'use client'; import { getDataSource } from "../../../../actions/data-source.actions"; -import { DataSource } from "../../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { useEffect, useState } from "react"; import { z } from 'zod'; import { SourceStatus } from "./source-status"; export function SelfUpdatingSourceStatus({ - projectId, sourceId, initialStatus, compact = false, }: { - projectId: string; sourceId: string, initialStatus: z.infer['status'], compact?: boolean; @@ -26,7 +24,7 @@ export function SelfUpdatingSourceStatus({ if (ignore) { return; } - const source = await getDataSource(projectId, sourceId); + const source = await getDataSource(sourceId); setStatus(source.status); timeoutId = setTimeout(check, 15 * 1000); } @@ -41,7 +39,7 @@ export function SelfUpdatingSourceStatus({ clearTimeout(timeoutId); } }; - }, [status, projectId, sourceId]); + }, [status, sourceId]); - return ; + return ; } \ No newline at end of file diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/source-status.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/source-status.tsx index e2dc7789..eb641bd1 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/source-status.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/source-status.tsx @@ -1,15 +1,13 @@ -import { DataSource } from "../../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { Spinner } from "@heroui/react"; import { z } from 'zod'; import { CheckCircleIcon, XCircleIcon, ClockIcon } from "lucide-react"; export function SourceStatus({ status, - projectId, compact = false, }: { status: z.infer['status'], - projectId: string, compact?: boolean; }) { return ( diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx index 5e0ee0e6..ca1dce58 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/sources-list.tsx @@ -6,15 +6,14 @@ import { ToggleSource } from "./toggle-source"; import { SelfUpdatingSourceStatus } from "./self-updating-source-status"; import { DataSourceIcon } from "../../../../lib/components/datasource-icon"; import { useEffect, useState } from "react"; -import { WithStringId } from "../../../../lib/types/types"; -import { DataSource } from "../../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from "zod"; import { listDataSources } from "../../../../actions/data-source.actions"; import { Panel } from "@/components/common/panel-common"; import { PlusIcon } from "lucide-react"; export function SourcesList({ projectId }: { projectId: string }) { - const [sources, setSources] = useState>[]>([]); + const [sources, setSources] = useState[]>([]); const [loading, setLoading] = useState(true); useEffect(() => { @@ -115,12 +114,12 @@ export function SourcesList({ projectId }: { projectId: string }) { {sources.map((source) => (
@@ -168,8 +166,7 @@ export function SourcesList({ projectId }: { projectId: string }) { )} >, + dataSource: z.infer, handleReload: () => void; }) { const [content, setContent] = useState(""); @@ -30,8 +27,7 @@ export function TextSource({ setIsLoading(true); try { const { files } = await listDocsInDataSource({ - projectId, - sourceId: dataSource._id, + sourceId: dataSource.id, limit: 1, }); @@ -41,7 +37,7 @@ export function TextSource({ const doc = files[0]; if (doc.data.type === 'text') { setContent(doc.data.content); - setDocId(doc._id); + setDocId(doc.id); } } } catch (error) { @@ -55,7 +51,7 @@ export function TextSource({ return () => { ignore = true; }; - }, [projectId, dataSource._id]); + }, [dataSource.id]); async function handleSubmit(formData: FormData) { setIsSaving(true); @@ -64,17 +60,14 @@ export function TextSource({ // Delete existing doc if it exists if (docId) { - await deleteDocsFromDataSource({ - projectId, - sourceId: dataSource._id, - docIds: [docId], + await deleteDocFromDataSource({ + docId: docId, }); } // Add new doc await addDocsToDataSource({ - projectId, - sourceId: dataSource._id, + sourceId: dataSource.id, docData: [{ name: 'text', data: { diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/toggle-source.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/toggle-source.tsx index a8b3e88d..08d3614f 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/toggle-source.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/toggle-source.tsx @@ -4,13 +4,11 @@ import { Spinner } from "@heroui/react"; import { useState } from "react"; export function ToggleSource({ - projectId, sourceId, active, compact = false, className }: { - projectId: string; sourceId: string; active: boolean; compact?: boolean; @@ -22,7 +20,7 @@ export function ToggleSource({ async function handleToggle() { setLoading(true); try { - await toggleDataSource(projectId, sourceId, !isActive); + await toggleDataSource(sourceId, !isActive); setIsActive(!isActive); } finally { setLoading(false); diff --git a/apps/rowboat/app/projects/[projectId]/sources/components/web-recrawl.tsx b/apps/rowboat/app/projects/[projectId]/sources/components/web-recrawl.tsx index fc4c7a43..b583b904 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/components/web-recrawl.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/components/web-recrawl.tsx @@ -3,12 +3,8 @@ import { FormStatusButton } from "../../../../lib/components/form-status-button" import { RefreshCwIcon } from "lucide-react"; export function Recrawl({ - projectId, - sourceId, handleRefresh, }: { - projectId: string; - sourceId: string; handleRefresh: () => void; }) { return
diff --git a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx index 9b67ff44..d9537565 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx @@ -71,8 +71,7 @@ export function Form({ // pick first 100 const first100Urls = urlsArray.slice(0, 100); await addDocsToDataSource({ - projectId, - sourceId: source._id, + sourceId: source.id, docData: first100Urls.map(url => ({ name: url, data: { @@ -82,7 +81,7 @@ export function Form({ })), }); if (onSuccess) { - onSuccess(source._id); + onSuccess(source.id); } } @@ -97,7 +96,7 @@ export function Form({ }); if (onSuccess) { - onSuccess(source._id); + onSuccess(source.id); } } @@ -114,8 +113,7 @@ export function Form({ const content = formData.get('content') as string; await addDocsToDataSource({ - projectId, - sourceId: source._id, + sourceId: source.id, docData: [{ name: 'text', data: { @@ -126,7 +124,7 @@ export function Form({ }); if (onSuccess) { - onSuccess(source._id); + onSuccess(source.id); } } diff --git a/apps/rowboat/app/projects/[projectId]/workflow/app.tsx b/apps/rowboat/app/projects/[projectId]/workflow/app.tsx index 0cf6066b..e65bf822 100644 --- a/apps/rowboat/app/projects/[projectId]/workflow/app.tsx +++ b/apps/rowboat/app/projects/[projectId]/workflow/app.tsx @@ -1,6 +1,6 @@ "use client"; import { MCPServer, WithStringId } from "../../../lib/types/types"; -import { DataSource } from "../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { Project } from "../../../lib/types/project_types"; import { z } from "zod"; import { useCallback, useEffect, useState } from "react"; @@ -32,7 +32,7 @@ export function App({ }) { const [mode, setMode] = useState<'draft' | 'live'>('draft'); const [project, setProject] = useState> | null>(null); - const [dataSources, setDataSources] = useState>[] | null>(null); + const [dataSources, setDataSources] = useState[] | null>(null); const [projectConfig, setProjectConfig] = useState | null>(null); const [loading, setLoading] = useState(false); const [eligibleModels, setEligibleModels] = useState | "*">("*"); diff --git a/apps/rowboat/app/projects/[projectId]/workflow/components/DataSourcesModal.tsx b/apps/rowboat/app/projects/[projectId]/workflow/components/DataSourcesModal.tsx index 6ffeeada..e3760ca5 100644 --- a/apps/rowboat/app/projects/[projectId]/workflow/components/DataSourcesModal.tsx +++ b/apps/rowboat/app/projects/[projectId]/workflow/components/DataSourcesModal.tsx @@ -6,8 +6,7 @@ import { Button } from '@/components/ui/button'; import { Form } from '../../sources/new/form'; import { FilesSource } from '../../sources/components/files-source'; import { getDataSource } from '../../../../actions/data-source.actions'; -import { WithStringId } from '../../../../lib/types/types'; -import { DataSource } from '../../../../lib/types/datasource_types'; +import { DataSource } from "@/src/entities/models/data-source"; import { z } from 'zod'; interface DataSourcesModalProps { @@ -30,11 +29,11 @@ export function DataSourcesModal({ useRagScraping }: DataSourcesModalProps) { const [currentView, setCurrentView] = useState<'form' | 'upload'>('form'); - const [createdSource, setCreatedSource] = useState> | null>(null); + const [createdSource, setCreatedSource] = useState | null>(null); const handleDataSourceCreated = async (sourceId: string) => { // Get the created data source - const source = await getDataSource(projectId, sourceId); + const source = await getDataSource(sourceId); // If it's a files data source, show the upload interface if (source.data.type === 'files_local' || source.data.type === 'files_s3') { @@ -93,7 +92,6 @@ export function DataSourcesModal({ ) : ( createdSource && ( []; prompts: z.infer[]; pipelines: z.infer[]; - dataSources: WithStringId>[]; + dataSources: z.infer[]; workflow: z.infer; selectedEntity: { type: "agent" | "tool" | "prompt" | "datasource" | "pipeline" | "visualise"; @@ -1071,14 +1071,14 @@ export const EntityList = forwardRef< className={clsx( "flex items-center gap-2 px-3 py-2 rounded-md min-h-[24px] cursor-pointer", { - "bg-indigo-50 dark:bg-indigo-950/30": selectedEntity?.type === "datasource" && selectedEntity.name === dataSource._id, - "hover:bg-zinc-50 dark:hover:bg-zinc-800": !(selectedEntity?.type === "datasource" && selectedEntity.name === dataSource._id) + "bg-indigo-50 dark:bg-indigo-950/30": selectedEntity?.type === "datasource" && selectedEntity.name === dataSource.id, + "hover:bg-zinc-50 dark:hover:bg-zinc-800": !(selectedEntity?.type === "datasource" && selectedEntity.name === dataSource.id) } )} - onClick={() => handleSelectDataSource(dataSource._id)} + onClick={() => handleSelectDataSource(dataSource.id)} >
@@ -1097,7 +1097,7 @@ export const EntityList = forwardRef< name={dataSource.name} onDelete={async () => { if (window.confirm(`Are you sure you want to delete the data source "${dataSource.name}"?`)) { - await deleteDataSource(projectId, dataSource._id); + await deleteDataSource(dataSource.id); onDataSourcesUpdated?.(); } }} diff --git a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx index 174b42b3..8f26ec38 100644 --- a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx +++ b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx @@ -2,7 +2,7 @@ import React, { useReducer, Reducer, useState, useCallback, useEffect, useRef, createContext, useContext } from "react"; import { MCPServer, Message, WithStringId } from "../../../lib/types/types"; import { Workflow, WorkflowTool, WorkflowPrompt, WorkflowAgent, WorkflowPipeline } from "../../../lib/types/workflow_types"; -import { DataSource } from "../../../lib/types/datasource_types"; +import { DataSource } from "@/src/entities/models/data-source"; import { Project } from "../../../lib/types/project_types"; import { produce, applyPatches, enablePatches, produceWithPatches, Patch } from 'immer'; import { AgentConfig } from "../entities/agent_config"; @@ -821,7 +821,7 @@ export function WorkflowEditor({ chatWidgetHost, }: { projectId: string; - dataSources: WithStringId>[]; + dataSources: z.infer[]; workflow: z.infer; useRag: boolean; useRagUploads: boolean; diff --git a/apps/rowboat/app/scripts/rag_files_worker.ts b/apps/rowboat/app/scripts/rag-worker.ts similarity index 53% rename from apps/rowboat/app/scripts/rag_files_worker.ts rename to apps/rowboat/app/scripts/rag-worker.ts index f310663a..c57457fb 100644 --- a/apps/rowboat/app/scripts/rag_files_worker.ts +++ b/apps/rowboat/app/scripts/rag-worker.ts @@ -1,66 +1,50 @@ import '../lib/loadenv'; import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; +import FirecrawlApp from '@mendable/firecrawl-js'; import { z } from 'zod'; -import { dataSourceDocsCollection, dataSourcesCollection, projectsCollection, usersCollection } from '../lib/mongodb'; -import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types"; -import { ObjectId, WithId } from 'mongodb'; +import { EmbeddingRecord } from "../lib/types/datasource_types"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; import { embedMany, generateText } from 'ai'; import { embeddingModel } from '../lib/embedding'; import { qdrantClient } from '../lib/qdrant'; import { PrefixLogger } from "../lib/utils"; import { GoogleGenerativeAI } from "@google/generative-ai"; -import { GetObjectCommand } from "@aws-sdk/client-s3"; -import { uploadsS3Client } from '../lib/uploads_s3_client'; -import fs from 'fs/promises'; import crypto from 'crypto'; -import path from 'path'; import { createOpenAI } from '@ai-sdk/openai'; import { USE_BILLING, USE_GEMINI_FILE_PARSING } from '../lib/feature_flags'; import { authorize, getCustomerIdForProject, logUsage, UsageTracker } from '../lib/billing'; import { BillingError } from '@/src/entities/errors/common'; +import { DataSource } from '@/src/entities/models/data-source'; +import { IDataSourcesRepository } from '@/src/application/repositories/data-sources.repository.interface'; +import { IDataSourceDocsRepository } from '@/src/application/repositories/data-source-docs.repository.interface'; +import { IUploadsStorageService } from '@/src/application/services/uploads-storage.service.interface'; +import { container } from '@/di/container'; const FILE_PARSING_PROVIDER_API_KEY = process.env.FILE_PARSING_PROVIDER_API_KEY || process.env.OPENAI_API_KEY || ''; const FILE_PARSING_PROVIDER_BASE_URL = process.env.FILE_PARSING_PROVIDER_BASE_URL || undefined; const FILE_PARSING_MODEL = process.env.FILE_PARSING_MODEL || 'gpt-4o'; +const dataSourcesRepository = container.resolve('dataSourcesRepository'); +const dataSourceDocsRepository = container.resolve('dataSourceDocsRepository'); +const localUploadsStorageService = container.resolve('localUploadsStorageService'); +const s3UploadsStorageService = container.resolve('s3UploadsStorageService'); + +const firecrawl = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY || "test" }); + const openai = createOpenAI({ apiKey: FILE_PARSING_PROVIDER_API_KEY, baseURL: FILE_PARSING_PROVIDER_BASE_URL, }); -const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads'; - const splitter = new RecursiveCharacterTextSplitter({ separators: ['\n\n', '\n', '. ', '.', ''], chunkSize: 1024, chunkOverlap: 20, }); -const second = 1000; -const minute = 60 * second; -const hour = 60 * minute; -const day = 24 * hour; - // Configure Google Gemini API const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || ''); -async function getLocalFileContent(path: string): Promise { - return await fs.readFile(path); -} - -async function getS3FileContent(s3Key: string): Promise { - const command = new GetObjectCommand({ - Bucket: process.env.RAG_UPLOADS_S3_BUCKET, - Key: s3Key, - }); - const response = await uploadsS3Client.send(command); - const chunks: Uint8Array[] = []; - for await (const chunk of response.Body as any) { - chunks.push(chunk); - } - return Buffer.concat(chunks); -} - async function retryable(fn: () => Promise, maxAttempts: number = 3): Promise { let attempts = 0; while (true) { @@ -75,19 +59,23 @@ async function retryable(fn: () => Promise, maxAttempts: number = 3): Prom } } -async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: WithId>, doc: WithId> & { data: { type: "file_local" | "file_s3" } }) { +async function runProcessFilePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer, doc: z.infer) { + if (doc.data.type !== 'file_local' && doc.data.type !== 'file_s3') { + throw new Error("Invalid data source type"); + } + const logger = _logger - .child(doc._id.toString()) + .child(doc.id) .child(doc.name); // Get file content let fileData: Buffer; if (doc.data.type === 'file_local') { logger.log("Fetching file from local"); - fileData = await getLocalFileContent(path.join(UPLOADS_DIR, doc._id.toString())); + fileData = await localUploadsStorageService.getFileContents(doc.id); } else { logger.log("Fetching file from S3"); - fileData = await getS3FileContent(doc.data.s3Key); + fileData = await s3UploadsStorageService.getFileContents(doc.id); } let markdown = ""; @@ -167,8 +155,8 @@ async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTrac vector: embedding, payload: { projectId: job.projectId, - sourceId: job._id.toString(), - docId: doc._id.toString(), + sourceId: job.id, + docId: doc.id, content: splits[i].pageContent, title: doc.name, name: doc.name, @@ -180,21 +168,136 @@ async function runProcessPipeline(_logger: PrefixLogger, usageTracker: UsageTrac // store content in doc record logger.log("Storing content in doc record"); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - content: markdown, - status: "ready", - lastUpdatedAt: new Date().toISOString(), - } + await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, { + content: markdown, + status: "ready", }); } -async function runDeletionPipeline(_logger: PrefixLogger, job: WithId>, doc: WithId>): Promise { +async function runScrapePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer, doc: z.infer) { const logger = _logger - .child(doc._id.toString()) + .child(doc.id) + .child(doc.name); + + // scrape the url using firecrawl + logger.log("Scraping using Firecrawl"); + const scrapeResult = await retryable(async () => { + if (doc.data.type !== 'url') { + throw new Error("Invalid data source type"); + } + const scrapeResult = await firecrawl.scrapeUrl(doc.data.url, { + formats: ['markdown'], + onlyMainContent: true, + excludeTags: ['script', 'style', 'noscript', 'img',] + }); + if (!scrapeResult.success) { + throw new Error("Unable to scrape URL: " + doc.data.url); + } + return scrapeResult; + }, 3); // Retry up to 3 times + usageTracker.track({ + type: "FIRECRAWL_SCRAPE_USAGE", + context: "rag.urls.firecrawl_scrape", + }); + + // split into chunks + logger.log("Splitting into chunks"); + const splits = await splitter.createDocuments([scrapeResult.markdown || '']); + + // generate embeddings + logger.log("Generating embeddings"); + const { embeddings, usage } = await embedMany({ + model: embeddingModel, + values: splits.map((split) => split.pageContent) + }); + usageTracker.track({ + type: "EMBEDDING_MODEL_USAGE", + modelName: embeddingModel.modelId, + tokens: usage.tokens, + context: "rag.urls.embedding_usage", + }); + + // store embeddings in qdrant + logger.log("Storing embeddings in Qdrant"); + const points: z.infer[] = embeddings.map((embedding, i) => ({ + id: crypto.randomUUID(), + vector: embedding, + payload: { + projectId: job.projectId, + sourceId: job.id, + docId: doc.id, + content: splits[i].pageContent, + title: scrapeResult.metadata?.title || '', + name: doc.name, + }, + })); + await qdrantClient.upsert("embeddings", { + points, + }); + + // store scraped markdown in doc record + logger.log("Storing scraped markdown in doc record"); + await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, { + content: scrapeResult.markdown, + status: "ready", + }); +} + +async function runProcessTextPipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: z.infer, doc: z.infer) { + const logger = _logger + .child(doc.id) + .child(doc.name); + + if (doc.data.type !== 'text') { + throw new Error("Invalid data source type"); + } + + // split into chunks + logger.log("Splitting into chunks"); + const splits = await splitter.createDocuments([doc.data.content]); + + // generate embeddings + logger.log("Generating embeddings"); + const { embeddings, usage } = await embedMany({ + model: embeddingModel, + values: splits.map((split) => split.pageContent) + }); + usageTracker.track({ + type: "EMBEDDING_MODEL_USAGE", + modelName: embeddingModel.modelId, + tokens: usage.tokens, + context: "rag.text.embedding_usage", + }); + + // store embeddings in qdrant + logger.log("Storing embeddings in Qdrant"); + const points: z.infer[] = embeddings.map((embedding, i) => ({ + id: crypto.randomUUID(), + vector: embedding, + payload: { + projectId: job.projectId, + sourceId: job.id, + docId: doc.id, + content: splits[i].pageContent, + title: doc.name, + name: doc.name, + }, + })); + await qdrantClient.upsert("embeddings", { + points, + }); + + // store content in doc record + logger.log("Storing content in doc record"); + await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, { + content: doc.data.content, + status: "ready", + }); +} + +async function runDeletionPipeline(_logger: PrefixLogger, job: z.infer, doc: z.infer): Promise { + const logger = _logger + .child(doc.id) .child(doc.name); // Delete embeddings from qdrant @@ -211,13 +314,13 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId { while (true) { const now = Date.now(); - let job: WithId> | null = null; + let job: z.infer | null = null; // first try to find a job that needs deleting - job = await dataSourcesCollection.findOneAndUpdate({ - status: "deleted", - "data.type": { $in: ["files_local", "files_s3"] }, - $or: [ - { attempts: { $exists: false } }, - { attempts: { $lte: 3 } } - ] - }, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } }); + job = await dataSourcesRepository.pollDeleteJob(); if (job === null) { - - job = await dataSourcesCollection.findOneAndUpdate( - { - $and: [ - { 'data.type': { $in: ["files_local", "files_s3"] } }, - { - $or: [ - // if the job has never been attempted - { - status: "pending", - attempts: 0, - }, - // if the job was attempted but wasn't completed in the last hour - { - status: "pending", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - // if the job errored out but hasn't been retried 3 times yet - { - status: "error", - attempts: { $lt: 3 }, - }, - // if the job errored out but hasn't been retried in the last 5 minutes - { - status: "error", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - ] - } - ] - }, - { - $set: { - status: "pending", - lastAttemptAt: new Date().toISOString(), - }, - $inc: { - attempts: 1 - }, - }, - { returnDocument: "after", sort: { createdAt: 1 } } - ); + job = await dataSourcesRepository.pollPendingJob(); } if (job === null) { // if no doc found, sleep for a bit and start again - await new Promise(resolve => setTimeout(resolve, 5 * second)); + await new Promise(resolve => setTimeout(resolve, 5 * 1000)); continue; } - const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`); - logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`); + const logger = new PrefixLogger(`${job.id}-${job.version}`); + logger.log(`Starting job ${job.id}. Type: ${job.data.type}. Status: ${job.status}`); let errors = false; try { - if (job.data.type !== 'files_local' && job.data.type !== 'files_s3') { - throw new Error("Invalid data source type"); - } - if (job.status === "deleted") { // delete all embeddings for this source logger.log("Deleting embeddings from Qdrant"); @@ -312,32 +363,33 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId> & { data: { type: "file_local" | "file_s3" } }; const usageTracker = new UsageTracker(); try { - await runProcessPipeline(logger, usageTracker, job, ldoc); + if (doc.data.type === "file_local" || doc.data.type === "file_s3") { + await runProcessFilePipeline(logger, usageTracker, job, doc); + } else if (doc.data.type === "text") { + await runProcessTextPipeline(logger, usageTracker, job, doc); + } else if (doc.data.type === "url") { + await runScrapePipeline(logger, usageTracker, job, doc); + } } catch (e: any) { errors = true; logger.log("Error processing doc:", e); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - status: "error", - error: e.message, - } + await dataSourceDocsRepository.updateByVersion(doc.id, doc.version, { + status: "error", + error: e.message, }); } finally { // log usage in billing @@ -392,10 +444,15 @@ async function runDeletionPipeline(_logger: PrefixLogger, job: WithId>, doc: WithId>) { - const logger = _logger - .child(doc._id.toString()) - .child(doc.name); - - if (doc.data.type !== 'text') { - throw new Error("Invalid data source type"); - } - - // split into chunks - logger.log("Splitting into chunks"); - const splits = await splitter.createDocuments([doc.data.content]); - - // generate embeddings - logger.log("Generating embeddings"); - const { embeddings, usage } = await embedMany({ - model: embeddingModel, - values: splits.map((split) => split.pageContent) - }); - usageTracker.track({ - type: "EMBEDDING_MODEL_USAGE", - modelName: embeddingModel.modelId, - tokens: usage.tokens, - context: "rag.text.embedding_usage", - }); - - // store embeddings in qdrant - logger.log("Storing embeddings in Qdrant"); - const points: z.infer[] = embeddings.map((embedding, i) => ({ - id: crypto.randomUUID(), - vector: embedding, - payload: { - projectId: job.projectId, - sourceId: job._id.toString(), - docId: doc._id.toString(), - content: splits[i].pageContent, - title: doc.name, - name: doc.name, - }, - })); - await qdrantClient.upsert("embeddings", { - points, - }); - - // store content in doc record - logger.log("Storing content in doc record"); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - content: doc.data.content, - status: "ready", - lastUpdatedAt: new Date().toISOString(), - } - }); -} - -async function runDeletionPipeline(_logger: PrefixLogger, job: WithId>, doc: WithId>): Promise { - const logger = _logger - .child(doc._id.toString()) - .child(doc.name); - - // Delete embeddings from qdrant - logger.log("Deleting embeddings from Qdrant"); - await qdrantClient.delete("embeddings", { - filter: { - must: [ - { - key: "projectId", - match: { - value: job.projectId, - } - }, - { - key: "sourceId", - match: { - value: job._id.toString(), - } - }, - { - key: "docId", - match: { - value: doc._id.toString(), - } - } - ], - }, - }); - - // Delete docs from db - logger.log("Deleting doc from db"); - await dataSourceDocsCollection.deleteOne({ _id: doc._id }); -} - -// fetch next job from mongodb -(async () => { - while (true) { - const now = Date.now(); - let job: WithId> | null = null; - - // first try to find a job that needs deleting - job = await dataSourcesCollection.findOneAndUpdate({ - status: "deleted", - "data.type": "text", - $or: [ - { attempts: { $exists: false } }, - { attempts: { $lte: 3 } } - ] - }, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } }); - - if (job === null) { - job = await dataSourcesCollection.findOneAndUpdate( - { - $and: [ - { 'data.type': { $eq: "text" } }, - { - $or: [ - // if the job has never been attempted - { - status: "pending", - attempts: 0, - }, - // if the job was attempted but wasn't completed in the last hour - { - status: "pending", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - // if the job errored out but hasn't been retried 3 times yet - { - status: "error", - attempts: { $lt: 3 }, - }, - // if the job errored out but hasn't been retried in the last 5 minutes - { - status: "error", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - ] - } - ] - }, - { - $set: { - status: "pending", - lastAttemptAt: new Date().toISOString(), - }, - $inc: { - attempts: 1 - }, - }, - { returnDocument: "after", sort: { createdAt: 1 } } - ); - } - - if (job === null) { - // if no doc found, sleep for a bit and start again - await new Promise(resolve => setTimeout(resolve, 5 * second)); - continue; - } - - const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`); - logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`); - let errors = false; - - try { - if (job.data.type !== 'text') { - throw new Error("Invalid data source type"); - } - - if (job.status === "deleted") { - // delete all embeddings for this source - logger.log("Deleting embeddings from Qdrant"); - await qdrantClient.delete("embeddings", { - filter: { - must: [ - { key: "projectId", match: { value: job.projectId } }, - { key: "sourceId", match: { value: job._id.toString() } }, - ], - }, - }); - - // delete all docs for this source - logger.log("Deleting docs from db"); - await dataSourceDocsCollection.deleteMany({ - sourceId: job._id.toString(), - }); - - // delete the source record from db - logger.log("Deleting source record from db"); - await dataSourcesCollection.deleteOne({ - _id: job._id, - }); - - logger.log("Job deleted"); - continue; - } - - // fetch docs that need updating - const pendingDocs = await dataSourceDocsCollection.find({ - sourceId: job._id.toString(), - status: { $in: ["pending", "error"] }, - }).toArray(); - - logger.log(`Found ${pendingDocs.length} docs to process`); - - // fetch project, user and billing data - let billingCustomerId: string | null = null; - if (USE_BILLING) { - try { - billingCustomerId = await getCustomerIdForProject(job.projectId); - } catch (e) { - logger.log("Unable to fetch billing customer id:", e); - throw new Error("Unable to fetch billing customer id"); - } - } - - // for each doc - for (const doc of pendingDocs) { - // authorize with billing - if (USE_BILLING && billingCustomerId) { - const authResponse = await authorize(billingCustomerId, { - type: "use_credits", - }); - - if ('error' in authResponse) { - throw new BillingError(authResponse.error || "Unknown billing error") - } - } - - const usageTracker = new UsageTracker(); - try { - await runProcessPipeline(logger, usageTracker, job, doc); - } catch (e: any) { - errors = true; - logger.log("Error processing doc:", e); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - status: "error", - error: e.message, - } - }); - } finally { - // log usage in billing - if (USE_BILLING && billingCustomerId) { - await logUsage(billingCustomerId, { - items: usageTracker.flush(), - }); - } - } - } - - // fetch docs that need to be deleted - const deletedDocs = await dataSourceDocsCollection.find({ - sourceId: job._id.toString(), - status: "deleted", - }).toArray(); - - logger.log(`Found ${deletedDocs.length} docs to delete`); - - for (const doc of deletedDocs) { - try { - await runDeletionPipeline(logger, job, doc); - } catch (e: any) { - errors = true; - logger.log("Error deleting doc:", e); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - status: "error", - error: e.message, - } - }); - } - } - } catch (e) { - if (e instanceof BillingError) { - logger.log("Billing error:", e.message); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: "error", - billingError: e.message, - lastUpdatedAt: new Date().toISOString(), - } - }); - } - logger.log("Error processing job; will retry:", e); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: "error", - lastUpdatedAt: new Date().toISOString(), - } - }); - continue; - } - - // mark job as complete - logger.log("Marking job as completed..."); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: errors ? "error" : "ready", - ...(errors ? { error: "There were some errors processing this job" } : {}), - } - }); - } -})(); diff --git a/apps/rowboat/app/scripts/rag_urls_worker.ts b/apps/rowboat/app/scripts/rag_urls_worker.ts deleted file mode 100644 index a79da5af..00000000 --- a/apps/rowboat/app/scripts/rag_urls_worker.ts +++ /dev/null @@ -1,381 +0,0 @@ -import '../lib/loadenv'; -import FirecrawlApp from '@mendable/firecrawl-js'; -import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; -import { z } from 'zod'; -import { dataSourceDocsCollection, dataSourcesCollection } from '../lib/mongodb'; -import { EmbeddingRecord, DataSourceDoc, DataSource } from "../lib/types/datasource_types"; -import { WithId } from 'mongodb'; -import { embedMany } from 'ai'; -import { embeddingModel } from '../lib/embedding'; -import { qdrantClient } from '../lib/qdrant'; -import { PrefixLogger } from "../lib/utils"; -import crypto from 'crypto'; -import { USE_BILLING } from '../lib/feature_flags'; -import { authorize, getCustomerIdForProject, logUsage, UsageTracker } from '../lib/billing'; -import { BillingError } from '@/src/entities/errors/common'; - -const firecrawl = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY }); - -const splitter = new RecursiveCharacterTextSplitter({ - separators: ['\n\n', '\n', '. ', '.', ''], - chunkSize: 1024, - chunkOverlap: 20, -}); - -const second = 1000; -const minute = 60 * second; -const hour = 60 * minute; -const day = 24 * hour; - -async function retryable(fn: () => Promise, maxAttempts: number = 3): Promise { - let attempts = 0; - while (true) { - try { - return await fn(); - } catch (e) { - attempts++; - if (attempts >= maxAttempts) { - throw e; - } - } - } -} - -async function runScrapePipeline(_logger: PrefixLogger, usageTracker: UsageTracker, job: WithId>, doc: WithId>) { - const logger = _logger - .child(doc._id.toString()) - .child(doc.name); - - // scrape the url using firecrawl - logger.log("Scraping using Firecrawl"); - const scrapeResult = await retryable(async () => { - if (doc.data.type !== 'url') { - throw new Error("Invalid data source type"); - } - const scrapeResult = await firecrawl.scrapeUrl(doc.data.url, { - formats: ['markdown'], - onlyMainContent: true, - excludeTags: ['script', 'style', 'noscript', 'img',] - }); - if (!scrapeResult.success) { - throw new Error("Unable to scrape URL: " + doc.data.url); - } - return scrapeResult; - }, 3); // Retry up to 3 times - usageTracker.track({ - type: "FIRECRAWL_SCRAPE_USAGE", - context: "rag.urls.firecrawl_scrape", - }); - - // split into chunks - logger.log("Splitting into chunks"); - const splits = await splitter.createDocuments([scrapeResult.markdown || '']); - - // generate embeddings - logger.log("Generating embeddings"); - const { embeddings, usage } = await embedMany({ - model: embeddingModel, - values: splits.map((split) => split.pageContent) - }); - usageTracker.track({ - type: "EMBEDDING_MODEL_USAGE", - modelName: embeddingModel.modelId, - tokens: usage.tokens, - context: "rag.urls.embedding_usage", - }); - - // store embeddings in qdrant - logger.log("Storing embeddings in Qdrant"); - const points: z.infer[] = embeddings.map((embedding, i) => ({ - id: crypto.randomUUID(), - vector: embedding, - payload: { - projectId: job.projectId, - sourceId: job._id.toString(), - docId: doc._id.toString(), - content: splits[i].pageContent, - title: scrapeResult.metadata?.title || '', - name: doc.name, - }, - })); - await qdrantClient.upsert("embeddings", { - points, - }); - - // store scraped markdown in doc record - logger.log("Storing scraped markdown in doc record"); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - content: scrapeResult.markdown, - status: "ready", - lastUpdatedAt: new Date().toISOString(), - } - }); -} - -async function runDeletionPipeline(_logger: PrefixLogger, job: WithId>, doc: WithId>): Promise { - const logger = _logger - .child(doc._id.toString()) - .child(doc.name); - - // Delete embeddings from qdrant - logger.log("Deleting embeddings from Qdrant"); - await qdrantClient.delete("embeddings", { - filter: { - must: [ - { - key: "projectId", - match: { - value: job.projectId, - } - }, - { - key: "sourceId", - match: { - value: job._id.toString(), - } - }, - { - key: "docId", - match: { - value: doc._id.toString(), - } - } - ], - }, - }); - - // Delete docs from db - logger.log("Deleting doc from db"); - await dataSourceDocsCollection.deleteOne({ _id: doc._id }); -} - -// fetch next job from mongodb -(async () => { - while (true) { - const now = Date.now(); - let job: WithId> | null = null; - - // first try to find a job that needs deleting - job = await dataSourcesCollection.findOneAndUpdate({ - status: "deleted", - "data.type": "urls", - $or: [ - { attempts: { $exists: false } }, - { attempts: { $lte: 3 } } - ] - }, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } }); - - if (job === null) { - - job = await dataSourcesCollection.findOneAndUpdate( - { - $and: [ - { 'data.type': { $eq: "urls" } }, - { - $or: [ - // if the job has never been attempted - { - status: "pending", - attempts: 0, - }, - // if the job was attempted but wasn't completed in the last hour - { - status: "pending", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - // if the job errored out but hasn't been retried 3 times yet - { - status: "error", - attempts: { $lt: 3 }, - }, - // if the job errored out but hasn't been retried in the last 5 minutes - { - status: "error", - lastAttemptAt: { $lt: new Date(now - 1 * hour).toISOString() }, - }, - ] - } - ] - }, - { - $set: { - status: "pending", - lastAttemptAt: new Date().toISOString(), - }, - $inc: { - attempts: 1 - }, - }, - { returnDocument: "after", sort: { createdAt: 1 } } - ); - } - - if (job === null) { - // if no doc found, sleep for a bit and start again - await new Promise(resolve => setTimeout(resolve, 5 * second)); - continue; - } - - const logger = new PrefixLogger(`${job._id.toString()}-${job.version}`); - logger.log(`Starting job ${job._id}. Type: ${job.data.type}. Status: ${job.status}`); - let errors = false; - - try { - if (job.data.type !== 'urls') { - throw new Error("Invalid data source type"); - } - - if (job.status === "deleted") { - // delete all embeddings for this source - logger.log("Deleting embeddings from Qdrant"); - await qdrantClient.delete("embeddings", { - filter: { - must: [ - { key: "projectId", match: { value: job.projectId } }, - { key: "sourceId", match: { value: job._id.toString() } }, - ], - }, - }); - - // delete all docs for this source - logger.log("Deleting docs from db"); - await dataSourceDocsCollection.deleteMany({ - sourceId: job._id.toString(), - }); - - // delete the source record from db - logger.log("Deleting source record from db"); - await dataSourcesCollection.deleteOne({ - _id: job._id, - }); - - logger.log("Job deleted"); - continue; - } - - // fetch docs that need updating - const pendingDocs = await dataSourceDocsCollection.find({ - sourceId: job._id.toString(), - status: { $in: ["pending", "error"] }, - }).toArray(); - - logger.log(`Found ${pendingDocs.length} docs to process`); - - // fetch project, user and billing data - let billingCustomerId: string | null = null; - if (USE_BILLING) { - try { - billingCustomerId = await getCustomerIdForProject(job.projectId); - } catch (e) { - logger.log("Unable to fetch billing customer id:", e); - throw new Error("Unable to fetch billing customer id"); - } - } - - // for each doc - for (const doc of pendingDocs) { - // authorize with billing - if (USE_BILLING && billingCustomerId) { - const authResponse = await authorize(billingCustomerId, { - type: "use_credits", - }); - - if ('error' in authResponse) { - throw new BillingError(authResponse.error || "Unknown billing error") - } - } - - const usageTracker = new UsageTracker(); - try { - await runScrapePipeline(logger, usageTracker, job, doc); - } catch (e: any) { - errors = true; - logger.log("Error processing doc:", e); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - status: "error", - error: e.message, - } - }); - } finally { - // log usage in billing - if (USE_BILLING && billingCustomerId) { - await logUsage(billingCustomerId, { - items: usageTracker.flush(), - }); - } - } - } - - // fetch docs that need to be deleted - const deletedDocs = await dataSourceDocsCollection.find({ - sourceId: job._id.toString(), - status: "deleted", - }).toArray(); - - logger.log(`Found ${deletedDocs.length} docs to delete`); - - for (const doc of deletedDocs) { - try { - await runDeletionPipeline(logger, job, doc); - } catch (e: any) { - errors = true; - logger.log("Error deleting doc:", e); - await dataSourceDocsCollection.updateOne({ - _id: doc._id, - version: doc.version, - }, { - $set: { - status: "error", - error: e.message, - } - }); - } - } - } catch (e) { - if (e instanceof BillingError) { - logger.log("Billing error:", e.message); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: "error", - billingError: e.message, - lastUpdatedAt: new Date().toISOString(), - } - }); - } - logger.log("Error processing job; will retry:", e); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: "error", - lastUpdatedAt: new Date().toISOString(), - } - }); - continue; - } - - // mark job as complete - logger.log("Marking job as completed..."); - await dataSourcesCollection.updateOne({ - _id: job._id, - version: job.version, - }, { - $set: { - status: errors ? "error" : "ready", - ...(errors ? { error: "There were some errors processing this job" } : {}), - } - }); - } -})(); \ No newline at end of file diff --git a/apps/rowboat/di/container.ts b/apps/rowboat/di/container.ts index ab40d5db..ae55e52a 100644 --- a/apps/rowboat/di/container.ts +++ b/apps/rowboat/di/container.ts @@ -1,7 +1,13 @@ +import { asClass, createContainer, InjectionMode } from "awilix"; + +// Services +import { RedisPubSubService } from "@/src/infrastructure/services/redis.pub-sub.service"; +import { S3UploadsStorageService } from "@/src/infrastructure/services/s3.uploads-storage.service"; +import { LocalUploadsStorageService } from "@/src/infrastructure/services/local.uploads-storage.service"; + import { RunConversationTurnUseCase } from "@/src/application/use-cases/conversations/run-conversation-turn.use-case"; import { MongoDBConversationsRepository } from "@/src/infrastructure/repositories/mongodb.conversations.repository"; import { RunCachedTurnController } from "@/src/interface-adapters/controllers/conversations/run-cached-turn.controller"; -import { asClass, createContainer, InjectionMode } from "awilix"; import { CreatePlaygroundConversationController } from "@/src/interface-adapters/controllers/conversations/create-playground-conversation.controller"; import { CreateConversationUseCase } from "@/src/application/use-cases/conversations/create-conversation.use-case"; import { RedisCacheService } from "@/src/infrastructure/services/redis.cache.service"; @@ -28,7 +34,6 @@ import { ListComposioTriggerDeploymentsController } from "@/src/interface-adapte import { ListComposioTriggerTypesController } from "@/src/interface-adapters/controllers/composio-trigger-deployments/list-composio-trigger-types.controller"; import { DeleteComposioConnectedAccountController } from "@/src/interface-adapters/controllers/composio/delete-composio-connected-account.controller"; import { HandleComposioWebhookRequestController } from "@/src/interface-adapters/controllers/composio/webhook/handle-composio-webhook-request.controller"; -import { RedisPubSubService } from "@/src/infrastructure/services/redis.pub-sub.service"; import { JobsWorker } from "@/src/application/workers/jobs.worker"; import { JobRulesWorker } from "@/src/application/workers/job-rules.worker"; import { ListJobsUseCase } from "@/src/application/use-cases/jobs/list-jobs.use-case"; @@ -72,6 +77,34 @@ import { CreateApiKeyController } from "@/src/interface-adapters/controllers/api import { ListApiKeysController } from "@/src/interface-adapters/controllers/api-keys/list-api-keys.controller"; import { DeleteApiKeyController } from "@/src/interface-adapters/controllers/api-keys/delete-api-key.controller"; +// Data sources +import { MongoDBDataSourcesRepository } from "@/src/infrastructure/repositories/mongodb.data-sources.repository"; +import { MongoDBDataSourceDocsRepository } from "@/src/infrastructure/repositories/mongodb.data-source-docs.repository"; +import { CreateDataSourceUseCase } from "@/src/application/use-cases/data-sources/create-data-source.use-case"; +import { FetchDataSourceUseCase } from "@/src/application/use-cases/data-sources/fetch-data-source.use-case"; +import { ListDataSourcesUseCase } from "@/src/application/use-cases/data-sources/list-data-sources.use-case"; +import { UpdateDataSourceUseCase } from "@/src/application/use-cases/data-sources/update-data-source.use-case"; +import { DeleteDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-data-source.use-case"; +import { ToggleDataSourceUseCase } from "@/src/application/use-cases/data-sources/toggle-data-source.use-case"; +import { CreateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/create-data-source.controller"; +import { FetchDataSourceController } from "@/src/interface-adapters/controllers/data-sources/fetch-data-source.controller"; +import { ListDataSourcesController } from "@/src/interface-adapters/controllers/data-sources/list-data-sources.controller"; +import { UpdateDataSourceController } from "@/src/interface-adapters/controllers/data-sources/update-data-source.controller"; +import { DeleteDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-data-source.controller"; +import { ToggleDataSourceController } from "@/src/interface-adapters/controllers/data-sources/toggle-data-source.controller"; +import { AddDocsToDataSourceUseCase } from "@/src/application/use-cases/data-sources/add-docs-to-data-source.use-case"; +import { ListDocsInDataSourceUseCase } from "@/src/application/use-cases/data-sources/list-docs-in-data-source.use-case"; +import { DeleteDocFromDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case"; +import { RecrawlWebDataSourceUseCase } from "@/src/application/use-cases/data-sources/recrawl-web-data-source.use-case"; +import { GetUploadUrlsForFilesUseCase } from "@/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case"; +import { GetDownloadUrlForFileUseCase } from "@/src/application/use-cases/data-sources/get-download-url-for-file.use-case"; +import { AddDocsToDataSourceController } from "@/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller"; +import { ListDocsInDataSourceController } from "@/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller"; +import { DeleteDocFromDataSourceController } from "@/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller"; +import { RecrawlWebDataSourceController } from "@/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller"; +import { GetUploadUrlsForFilesController } from "@/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller"; +import { GetDownloadUrlForFileController } from "@/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller"; + export const container = createContainer({ injectionMode: InjectionMode.PROXY, strict: true, @@ -87,6 +120,8 @@ container.register({ // --- cacheService: asClass(RedisCacheService).singleton(), pubSubService: asClass(RedisPubSubService).singleton(), + s3UploadsStorageService: asClass(S3UploadsStorageService).singleton(), + localUploadsStorageService: asClass(LocalUploadsStorageService).singleton(), // policies // --- @@ -111,6 +146,35 @@ container.register({ listApiKeysController: asClass(ListApiKeysController).singleton(), deleteApiKeyController: asClass(DeleteApiKeyController).singleton(), + // data sources + // --- + dataSourcesRepository: asClass(MongoDBDataSourcesRepository).singleton(), + dataSourceDocsRepository: asClass(MongoDBDataSourceDocsRepository).singleton(), + createDataSourceUseCase: asClass(CreateDataSourceUseCase).singleton(), + fetchDataSourceUseCase: asClass(FetchDataSourceUseCase).singleton(), + listDataSourcesUseCase: asClass(ListDataSourcesUseCase).singleton(), + updateDataSourceUseCase: asClass(UpdateDataSourceUseCase).singleton(), + deleteDataSourceUseCase: asClass(DeleteDataSourceUseCase).singleton(), + toggleDataSourceUseCase: asClass(ToggleDataSourceUseCase).singleton(), + createDataSourceController: asClass(CreateDataSourceController).singleton(), + fetchDataSourceController: asClass(FetchDataSourceController).singleton(), + listDataSourcesController: asClass(ListDataSourcesController).singleton(), + updateDataSourceController: asClass(UpdateDataSourceController).singleton(), + deleteDataSourceController: asClass(DeleteDataSourceController).singleton(), + toggleDataSourceController: asClass(ToggleDataSourceController).singleton(), + addDocsToDataSourceUseCase: asClass(AddDocsToDataSourceUseCase).singleton(), + listDocsInDataSourceUseCase: asClass(ListDocsInDataSourceUseCase).singleton(), + deleteDocFromDataSourceUseCase: asClass(DeleteDocFromDataSourceUseCase).singleton(), + recrawlWebDataSourceUseCase: asClass(RecrawlWebDataSourceUseCase).singleton(), + getUploadUrlsForFilesUseCase: asClass(GetUploadUrlsForFilesUseCase).singleton(), + getDownloadUrlForFileUseCase: asClass(GetDownloadUrlForFileUseCase).singleton(), + addDocsToDataSourceController: asClass(AddDocsToDataSourceController).singleton(), + listDocsInDataSourceController: asClass(ListDocsInDataSourceController).singleton(), + deleteDocFromDataSourceController: asClass(DeleteDocFromDataSourceController).singleton(), + recrawlWebDataSourceController: asClass(RecrawlWebDataSourceController).singleton(), + getUploadUrlsForFilesController: asClass(GetUploadUrlsForFilesController).singleton(), + getDownloadUrlForFileController: asClass(GetDownloadUrlForFileController).singleton(), + // jobs // --- jobsRepository: asClass(MongoDBJobsRepository).singleton(), diff --git a/apps/rowboat/package.json b/apps/rowboat/package.json index 18c0874b..c1f371d8 100644 --- a/apps/rowboat/package.json +++ b/apps/rowboat/package.json @@ -10,9 +10,7 @@ "lint": "next lint", "setupQdrant": "tsx app/scripts/setup_qdrant.ts", "deleteQdrant": "tsx app/scripts/delete_qdrant.ts", - "ragUrlsWorker": "tsx app/scripts/rag_urls_worker.ts", - "ragFilesWorker": "tsx app/scripts/rag_files_worker.ts", - "ragTextWorker": "tsx app/scripts/rag_text_worker.ts", + "rag-worker": "tsx app/scripts/rag-worker.ts", "jobs-worker": "tsx app/scripts/jobs-worker.ts", "job-rules-worker": "tsx app/scripts/job-rules.worker.ts" }, diff --git a/apps/rowboat/src/application/repositories/data-source-docs.repository.interface.ts b/apps/rowboat/src/application/repositories/data-source-docs.repository.interface.ts new file mode 100644 index 00000000..dafcdaa1 --- /dev/null +++ b/apps/rowboat/src/application/repositories/data-source-docs.repository.interface.ts @@ -0,0 +1,120 @@ +import { PaginatedList } from "@/src/entities/common/paginated-list"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { z } from "zod"; + +/** + * Schema for creating a new DataSourceDoc. Requires projectId, sourceId, name, status, and data fields. + */ +export const CreateSchema = DataSourceDoc.pick({ + name: true, + data: true, +}); + +/** + * Schema for updating an existing DataSourceDoc. Allows updating status, content, and error fields. + */ +export const UpdateSchema = DataSourceDoc + .pick({ + status: true, + content: true, + error: true, + }) + .partial(); + +/** + * Filters schema for listing DataSourceDocs. Supports optional filtering by one or more statuses. + */ +export const ListFiltersSchema = z.object({ + status: z.array(DataSourceDoc.shape.status).optional(), +}).strict(); + +/** + * Repository interface for managing DataSourceDoc entities in the persistence layer. + */ +export interface IDataSourceDocsRepository { + /** + * Creates multiple DataSourceDocs with the provided data. + * @param projectId - The project ID to create the DataSourceDocs for. + * @param sourceId - The source ID to create the DataSourceDocs for. + * @param data - The data required to create a DataSourceDoc (see CreateSchema). + * @returns The IDs of the created DataSourceDocs. + */ + bulkCreate( + projectId: string, + sourceId: string, + data: z.infer[] + ): Promise; + + /** + * Fetches a DataSourceDoc by its unique identifier. + * @param id - The unique ID of the DataSourceDoc. + * @returns The DataSourceDoc object if found, otherwise null. + */ + fetch(id: string): Promise | null>; + + /** + * Fetches multiple DataSourceDocs by their unique identifiers. + * @param ids - The unique IDs of the DataSourceDocs. + * @returns The DataSourceDocs objects that were found + */ + bulkFetch(ids: string[]): Promise[]>; + + /** + * Lists DataSourceDocs for a given source, with optional filters, cursor, and limit for pagination. + * @param sourceId - The source ID to list DataSourceDocs for. + * @param filters - Optional filters (see ListFiltersSchema). + * @param cursor - Optional pagination cursor. + * @param limit - Optional maximum number of results to return. + * @returns A paginated list of DataSourceDocs. + */ + list( + sourceId: string, + filters?: z.infer, + cursor?: string, + limit?: number + ): Promise>>>; + + /** + * Marks all docs for a given source as pending. + * @param sourceId - The source ID to mark docs for. + */ + markSourceDocsPending(sourceId: string): Promise; + + /** + * Marks a DataSourceDoc as deleted. + * @param id - The unique ID of the DataSourceDoc to mark as deleted. + */ + markAsDeleted(id: string): Promise; + + /** + * Updates an existing DataSourceDoc by its ID and version with the provided data. + * @param id - The unique ID of the DataSourceDoc to update. + * @param version - Version of the DataSourceDoc for optimistic concurrency control. + * @param data - Fields to update (see UpdateSchema). + * @returns The updated DataSourceDoc object. + */ + updateByVersion( + id: string, + version: number, + data: z.infer + ): Promise>; + + /** + * Deletes a DataSourceDoc by its unique identifier. + * @param id - The unique ID of the DataSourceDoc to delete. + * @returns True if the DataSourceDoc was deleted, false otherwise. + */ + delete(id: string): Promise; + + /** + * Deletes all DataSourceDocs associated with a given source ID. + * @param sourceId - The source ID whose documents should be deleted. + */ + deleteBySourceId(sourceId: string): Promise; + + /** + * Deletes all DataSourceDocs associated with a given project ID. + * @param projectId - The project ID whose documents should be deleted. + */ + deleteByProjectId(projectId: string): Promise; +} \ No newline at end of file diff --git a/apps/rowboat/src/application/repositories/data-sources.repository.interface.ts b/apps/rowboat/src/application/repositories/data-sources.repository.interface.ts new file mode 100644 index 00000000..444c1234 --- /dev/null +++ b/apps/rowboat/src/application/repositories/data-sources.repository.interface.ts @@ -0,0 +1,124 @@ +import { PaginatedList } from "@/src/entities/common/paginated-list"; +import { DataSource } from "@/src/entities/models/data-source"; +import { z } from "zod"; + +/** + * Schema for creating a new DataSource. Requires projectId, name, description, and data fields. + */ +export const CreateSchema = DataSource.pick({ + projectId: true, + name: true, + description: true, + data: true, + status: true, +}); + +/** + * Schema for updating an existing DataSource. Allows updating status, billingError, error, attempts, active, and description fields. + */ +export const UpdateSchema = DataSource + .pick({ + billingError: true, + error: true, + description: true, + status: true, + active: true, + attempts: true, + }) + .partial(); + +/** + * Filters schema for listing DataSources. Supports optional filtering by active and deleted status. + */ +export const ListFiltersSchema = z.object({ + active: z.boolean().optional(), + deleted: z.boolean().optional(), +}).strict(); + +/** + * Schema for the payload of a release operation. + */ +export const ReleasePayloadSchema = DataSource + .pick({ + status: true, + error: true, + billingError: true, + }) + .partial(); + +/** + * Repository interface for managing DataSource entities in the persistence layer. + */ +export interface IDataSourcesRepository { + /** + * Creates a new DataSource with the provided data. + * @param data - The data required to create a DataSource (see CreateSchema). + * @returns The created DataSource object. + */ + create(data: z.infer): Promise>; + + /** + * Fetches a DataSource by its unique identifier. + * @param id - The unique ID of the DataSource. + * @returns The DataSource object if found, otherwise null. + */ + fetch(id: string): Promise | null>; + + /** + * Lists DataSources for a given project, with optional filters, cursor, and limit for pagination. + * @param projectId - The project ID to list DataSources for. + * @param filters - Optional filters (see ListFiltersSchema). + * @param cursor - Optional pagination cursor. + * @param limit - Optional maximum number of results to return. + * @returns A paginated list of DataSources. + */ + list( + projectId: string, + filters?: z.infer, + cursor?: string, + limit?: number + ): Promise>>>; + + /** + * Updates an existing DataSource by its ID with the provided data. + * @param id - The unique ID of the DataSource to update. + * @param data - The fields to update (see UpdateSchema). + * @param bumpVersion - Optional flag to increment the version. + * @returns The updated DataSource object. + */ + update(id: string, data: z.infer, bumpVersion?: boolean): Promise>; + + /** + * Deletes a DataSource by its unique identifier. + * @param id - The unique ID of the DataSource to delete. + * @returns True if the DataSource was deleted, false otherwise. + */ + delete(id: string): Promise; + + /** + * Deletes all DataSources associated with a given project ID. + * @param projectId - The project ID whose DataSources should be deleted. + * @returns A promise that resolves when the operation is complete. + */ + deleteByProjectId(projectId: string): Promise; + + /** + * Polls for a datasource that is pending delete and returns it + * @returns The datasource if found, otherwise null. + */ + pollDeleteJob(): Promise | null>; + + /** + * Polls for a datasource that is pending processing and returns it + * @returns The datasource if found, otherwise null. + */ + pollPendingJob(): Promise | null>; + + /** + * Releases a datasource by its ID and version. + * @param id - The unique ID of the datasource to release. + * @param version - The version of the datasource to release. + * @param updates - The updates to apply to the datasource (see ReleasePayloadSchema). + */ + release(id: string, version: number, updates: z.infer): Promise; +} \ No newline at end of file diff --git a/apps/rowboat/src/application/services/uploads-storage.service.interface.ts b/apps/rowboat/src/application/services/uploads-storage.service.interface.ts new file mode 100644 index 00000000..e33193bd --- /dev/null +++ b/apps/rowboat/src/application/services/uploads-storage.service.interface.ts @@ -0,0 +1,5 @@ +export interface IUploadsStorageService { + getUploadUrl(key: string, contentType: string): Promise; + getDownloadUrl(fileId: string): Promise; + getFileContents(fileId: string): Promise; +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/add-docs-to-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/add-docs-to-data-source.use-case.ts new file mode 100644 index 00000000..be0cf3f3 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/add-docs-to-data-source.use-case.ts @@ -0,0 +1,68 @@ +import { z } from "zod"; +import { IDataSourceDocsRepository, CreateSchema as DocCreateSchema } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + docs: z.array(DocCreateSchema), +}); + +export interface IAddDocsToDataSourceUseCase { + execute(request: z.infer): Promise; +} + +export class AddDocsToDataSourceUseCase implements IAddDocsToDataSourceUseCase { + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourceDocsRepository, + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise { + const { sourceId, docs } = request; + + const source = await this.dataSourcesRepository.fetch(sourceId); + if (!source) { + throw new NotFoundError('Data source not found'); + } + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId: source.projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(source.projectId); + + await this.dataSourceDocsRepository.bulkCreate(source.projectId, sourceId, docs); + + await this.dataSourcesRepository.update(sourceId, { + status: "pending", + billingError: null, + attempts: 0, + }, true); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/create-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/create-data-source.use-case.ts new file mode 100644 index 00000000..3c7c4dbb --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/create-data-source.use-case.ts @@ -0,0 +1,60 @@ +import { z } from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository, CreateSchema } from "@/src/application/repositories/data-sources.repository.interface"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + data: CreateSchema, +}); + +export interface ICreateDataSourceUseCase { + execute(request: z.infer): Promise>; +} + +export class CreateDataSourceUseCase implements ICreateDataSourceUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise> { + const { projectId } = request.data; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + let _status = "pending"; + // Only set status for non-file data sources + if (request.data.status && request.data.data.type !== 'files_local' && request.data.data.type !== 'files_s3') { + _status = request.data.status; + } + + return await this.dataSourcesRepository.create({ + ...request.data, + status: _status as z.infer['status'], + }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/delete-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/delete-data-source.use-case.ts new file mode 100644 index 00000000..b09c38b0 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/delete-data-source.use-case.ts @@ -0,0 +1,60 @@ +import { z } from "zod"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IDeleteDataSourceUseCase { + execute(request: z.infer): Promise; +} + +export class DeleteDataSourceUseCase implements IDeleteDataSourceUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise { + const existing = await this.dataSourcesRepository.fetch(request.sourceId); + if (!existing) { + throw new NotFoundError(`Data source ${request.sourceId} not found`); + } + + const { projectId } = existing; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + await this.dataSourcesRepository.update(request.sourceId, { + status: 'deleted', + attempts: 0, + billingError: null, + }, true); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case.ts new file mode 100644 index 00000000..f879f62d --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case.ts @@ -0,0 +1,67 @@ +import { z } from "zod"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + docId: z.string(), +}); + +export interface IDeleteDocFromDataSourceUseCase { + execute(request: z.infer): Promise; +} + +export class DeleteDocFromDataSourceUseCase implements IDeleteDocFromDataSourceUseCase { + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourceDocsRepository, + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise { + const { docId } = request; + + const doc = await this.dataSourceDocsRepository.fetch(docId); + if (!doc) { + throw new NotFoundError(`Doc ${docId} not found`); + } + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId: doc.projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(doc.projectId); + + await this.dataSourceDocsRepository.markAsDeleted(docId); + + await this.dataSourcesRepository.update(doc.sourceId, { + status: 'pending', + billingError: null, + attempts: 0, + }, true); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/fetch-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/fetch-data-source.use-case.ts new file mode 100644 index 00000000..f3d5f0a2 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/fetch-data-source.use-case.ts @@ -0,0 +1,57 @@ +import { z } from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IFetchDataSourceUseCase { + execute(request: z.infer): Promise>; +} + +export class FetchDataSourceUseCase implements IFetchDataSourceUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise> { + const source = await this.dataSourcesRepository.fetch(request.sourceId); + if (!source) { + throw new NotFoundError(`Data source ${request.sourceId} not found`); + } + + const { projectId } = source; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + return source; + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/get-download-url-for-file.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/get-download-url-for-file.use-case.ts new file mode 100644 index 00000000..f3da5d6e --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/get-download-url-for-file.use-case.ts @@ -0,0 +1,72 @@ +import { z } from "zod"; +import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + fileId: z.string(), +}); + +export interface IGetDownloadUrlForFileUseCase { + execute(request: z.infer): Promise; +} + +export class GetDownloadUrlForFileUseCase implements IGetDownloadUrlForFileUseCase { + private readonly s3UploadsStorageService: IUploadsStorageService; + private readonly localUploadsStorageService: IUploadsStorageService; + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + s3UploadsStorageService, + localUploadsStorageService, + dataSourceDocsRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + s3UploadsStorageService: IUploadsStorageService, + localUploadsStorageService: IUploadsStorageService, + dataSourceDocsRepository: IDataSourceDocsRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.s3UploadsStorageService = s3UploadsStorageService; + this.localUploadsStorageService = localUploadsStorageService; + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise { + const { fileId } = request; + + const file = await this.dataSourceDocsRepository.fetch(fileId); + if (!file) { + throw new NotFoundError('File not found'); + } + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId: file.projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(file.projectId); + + if (file.data.type === 'file_local') { + // use the file id instead of path here + return await this.localUploadsStorageService.getDownloadUrl(file.id); + } else if (file.data.type === 'file_s3') { + return await this.s3UploadsStorageService.getDownloadUrl(file.id); + } + + throw new NotFoundError('Invalid file type'); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case.ts new file mode 100644 index 00000000..d044d85a --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case.ts @@ -0,0 +1,82 @@ +import { z } from "zod"; +import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { ObjectId } from "mongodb"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + files: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })), +}); + +export interface IGetUploadUrlsForFilesUseCase { + execute(request: z.infer): Promise<{ fileId: string, uploadUrl: string, path: string }[]>; +} + +export class GetUploadUrlsForFilesUseCase implements IGetUploadUrlsForFilesUseCase { + private readonly s3UploadsStorageService: IUploadsStorageService; + private readonly localUploadsStorageService: IUploadsStorageService; + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + s3UploadsStorageService, + localUploadsStorageService, + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + s3UploadsStorageService: IUploadsStorageService, + localUploadsStorageService: IUploadsStorageService, + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.s3UploadsStorageService = s3UploadsStorageService; + this.localUploadsStorageService = localUploadsStorageService; + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise<{ fileId: string, uploadUrl: string, path: string }[]> { + const { sourceId, files } = request; + + const source = await this.dataSourcesRepository.fetch(sourceId); + if (!source) { + throw new NotFoundError('Data source not found'); + } + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId: source.projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(source.projectId); + + const urls: { fileId: string, uploadUrl: string, path: string }[] = []; + for (const file of files) { + const fileId = new ObjectId().toString(); + + if (source.data.type === 'files_s3') { + const projectIdPrefix = source.projectId.slice(0, 2); + const path = `datasources/files/${projectIdPrefix}/${source.projectId}/${sourceId}/${fileId}/${file.name}`; + const uploadUrl = await this.s3UploadsStorageService.getUploadUrl(path, file.type); + urls.push({ fileId, uploadUrl, path }); + } else if (source.data.type === 'files_local') { + const uploadUrl = await this.localUploadsStorageService.getUploadUrl(fileId, file.type); + urls.push({ fileId, uploadUrl, path: uploadUrl }); + } + } + + return urls; + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/list-data-sources.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/list-data-sources.use-case.ts new file mode 100644 index 00000000..851266a0 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/list-data-sources.use-case.ts @@ -0,0 +1,60 @@ +import { z } from "zod"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { DataSource } from "@/src/entities/models/data-source"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + projectId: z.string(), +}); + +export interface IListDataSourcesUseCase { + execute(request: z.infer): Promise[]>; +} + +export class ListDataSourcesUseCase implements IListDataSourcesUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise[]> { + const { projectId } = request; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + // list all sources for now + const sources = []; + let cursor = undefined; + do { + const result = await this.dataSourcesRepository.list(projectId, undefined, cursor); + sources.push(...result.items); + cursor = result.nextCursor; + } while (cursor); + + return sources; + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/list-docs-in-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/list-docs-in-data-source.use-case.ts new file mode 100644 index 00000000..7a1eb9e4 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/list-docs-in-data-source.use-case.ts @@ -0,0 +1,71 @@ +import { z } from "zod"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IListDocsInDataSourceUseCase { + execute(request: z.infer): Promise[]>; +} + +export class ListDocsInDataSourceUseCase implements IListDocsInDataSourceUseCase { + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourceDocsRepository, + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise[]> { + const { sourceId } = request; + + const source = await this.dataSourcesRepository.fetch(sourceId); + if (!source) { + throw new NotFoundError(`Data source ${sourceId} not found`); + } + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId: source.projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(source.projectId); + + // fetch all docs + const docs = []; + let cursor = undefined; + do { + const result = await this.dataSourceDocsRepository.list(sourceId, undefined, cursor); + docs.push(...result.items); + cursor = result.nextCursor; + } while (cursor); + + return docs; + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/recrawl-web-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/recrawl-web-data-source.use-case.ts new file mode 100644 index 00000000..6ebf11dc --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/recrawl-web-data-source.use-case.ts @@ -0,0 +1,71 @@ +import { z } from "zod"; +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { NotFoundError, BadRequestError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IRecrawlWebDataSourceUseCase { + execute(request: z.infer): Promise; +} + +export class RecrawlWebDataSourceUseCase implements IRecrawlWebDataSourceUseCase { + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourceDocsRepository, + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise { + const source = await this.dataSourcesRepository.fetch(request.sourceId); + if (!source) { + throw new NotFoundError(`Data source ${request.sourceId} not found`); + } + + if (source.data.type !== 'urls') { + throw new BadRequestError('Invalid data source type'); + } + + const { projectId } = source; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + await this.dataSourceDocsRepository.markSourceDocsPending(request.sourceId); + + await this.dataSourcesRepository.update(request.sourceId, { + status: 'pending', + billingError: null, + attempts: 0, + }, true); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/toggle-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/toggle-data-source.use-case.ts new file mode 100644 index 00000000..47f141a2 --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/toggle-data-source.use-case.ts @@ -0,0 +1,58 @@ +import { z } from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + active: z.boolean(), +}); + +export interface IToggleDataSourceUseCase { + execute(request: z.infer): Promise>; +} + +export class ToggleDataSourceUseCase implements IToggleDataSourceUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise> { + const existing = await this.dataSourcesRepository.fetch(request.sourceId); + if (!existing) { + throw new NotFoundError(`Data source ${request.sourceId} not found`); + } + + const { projectId } = existing; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + return await this.dataSourcesRepository.update(request.sourceId, { active: request.active }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/application/use-cases/data-sources/update-data-source.use-case.ts b/apps/rowboat/src/application/use-cases/data-sources/update-data-source.use-case.ts new file mode 100644 index 00000000..abed87fd --- /dev/null +++ b/apps/rowboat/src/application/use-cases/data-sources/update-data-source.use-case.ts @@ -0,0 +1,62 @@ +import { z } from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IUsageQuotaPolicy } from "@/src/application/policies/usage-quota.policy.interface"; +import { IProjectActionAuthorizationPolicy } from "@/src/application/policies/project-action-authorization.policy"; +import { IDataSourcesRepository } from "@/src/application/repositories/data-sources.repository.interface"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + data: DataSource + .pick({ + description: true, + }) + .partial(), +}); + +export interface IUpdateDataSourceUseCase { + execute(request: z.infer): Promise>; +} + +export class UpdateDataSourceUseCase implements IUpdateDataSourceUseCase { + private readonly dataSourcesRepository: IDataSourcesRepository; + private readonly usageQuotaPolicy: IUsageQuotaPolicy; + private readonly projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy; + + constructor({ + dataSourcesRepository, + usageQuotaPolicy, + projectActionAuthorizationPolicy, + }: { + dataSourcesRepository: IDataSourcesRepository, + usageQuotaPolicy: IUsageQuotaPolicy, + projectActionAuthorizationPolicy: IProjectActionAuthorizationPolicy, + }) { + this.dataSourcesRepository = dataSourcesRepository; + this.usageQuotaPolicy = usageQuotaPolicy; + this.projectActionAuthorizationPolicy = projectActionAuthorizationPolicy; + } + + async execute(request: z.infer): Promise> { + const source = await this.dataSourcesRepository.fetch(request.sourceId); + if (!source) { + throw new NotFoundError(`Data source ${request.sourceId} not found`); + } + + const { projectId } = source; + + await this.projectActionAuthorizationPolicy.authorize({ + caller: request.caller, + userId: request.userId, + apiKey: request.apiKey, + projectId, + }); + + await this.usageQuotaPolicy.assertAndConsume(projectId); + + return await this.dataSourcesRepository.update(request.sourceId, request.data, true); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/entities/models/data-source-doc.ts b/apps/rowboat/src/entities/models/data-source-doc.ts new file mode 100644 index 00000000..ffd183ea --- /dev/null +++ b/apps/rowboat/src/entities/models/data-source-doc.ts @@ -0,0 +1,44 @@ +import { z } from "zod"; + +export const DataSourceDoc = z.object({ + id: z.string(), + sourceId: z.string(), + projectId: z.string(), + name: z.string(), + version: z.number(), + status: z.enum([ + 'pending', + 'ready', + 'error', + 'deleted', + ]), + content: z.string().nullable(), + createdAt: z.string().datetime(), + lastUpdatedAt: z.string().datetime().nullable(), + attempts: z.number(), + error: z.string().nullable(), + data: z.discriminatedUnion('type', [ + z.object({ + type: z.literal('url'), + url: z.string(), + }), + z.object({ + type: z.literal('file_local'), + name: z.string(), + size: z.number(), + mimeType: z.string(), + path: z.string(), + }), + z.object({ + type: z.literal('file_s3'), + name: z.string(), + size: z.number(), + mimeType: z.string(), + s3Key: z.string(), + }), + z.object({ + type: z.literal('text'), + content: z.string(), + }), + ]), +}); \ No newline at end of file diff --git a/apps/rowboat/src/entities/models/data-source.ts b/apps/rowboat/src/entities/models/data-source.ts new file mode 100644 index 00000000..79928617 --- /dev/null +++ b/apps/rowboat/src/entities/models/data-source.ts @@ -0,0 +1,36 @@ +import { z } from "zod"; + +export const DataSource = z.object({ + id: z.string(), + name: z.string(), + description: z.string(), + projectId: z.string(), + active: z.boolean().default(true), + status: z.enum([ + 'pending', + 'ready', + 'error', + 'deleted', + ]), + version: z.number(), + error: z.string().nullable(), + billingError: z.string().nullable(), + createdAt: z.string().datetime(), + lastUpdatedAt: z.string().datetime().nullable(), + attempts: z.number(), + lastAttemptAt: z.string().datetime().nullable(), + data: z.discriminatedUnion('type', [ + z.object({ + type: z.literal('urls'), + }), + z.object({ + type: z.literal('files_local'), + }), + z.object({ + type: z.literal('files_s3'), + }), + z.object({ + type: z.literal('text'), + }) + ]), +}); \ No newline at end of file diff --git a/apps/rowboat/src/infrastructure/repositories/mongodb.data-source-docs.repository.ts b/apps/rowboat/src/infrastructure/repositories/mongodb.data-source-docs.repository.ts new file mode 100644 index 00000000..e2d99339 --- /dev/null +++ b/apps/rowboat/src/infrastructure/repositories/mongodb.data-source-docs.repository.ts @@ -0,0 +1,170 @@ +import { z } from "zod"; +import { Filter, ObjectId } from "mongodb"; +import { db } from "@/app/lib/mongodb"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; +import { + CreateSchema, + IDataSourceDocsRepository, + ListFiltersSchema, + UpdateSchema, +} from "@/src/application/repositories/data-source-docs.repository.interface"; +import { PaginatedList } from "@/src/entities/common/paginated-list"; +import { NotFoundError } from "@/src/entities/errors/common"; + +/** + * MongoDB document schema for DataSourceDoc. + * Excludes the 'id' field as it's represented by MongoDB's '_id'. + */ +const DocSchema = DataSourceDoc.omit({ id: true }); + +/** + * MongoDB implementation of the DataSourceDocs repository. + */ +export class MongoDBDataSourceDocsRepository implements IDataSourceDocsRepository { + private readonly collection = db.collection>("source_docs"); + + async bulkCreate(projectId: string, sourceId: string, data: z.infer[]): Promise { + const now = new Date().toISOString(); + + const result = await this.collection.insertMany(data.map(doc => { + return { + projectId, + sourceId, + name: doc.name, + version: 1, + createdAt: now, + lastUpdatedAt: null, + content: null, + attempts: 0, + error: null, + data: doc.data, + status: "pending", + } + })); + + return Object.values(result.insertedIds).map(id => id.toString()); + } + + async fetch(id: string): Promise | null> { + const result = await this.collection.findOne({ _id: new ObjectId(id) }); + if (!result) return null; + + const { _id, ...rest } = result; + return { + ...rest, + id: _id.toString(), + }; + } + + async bulkFetch(ids: string[]): Promise[]> { + const results = await this.collection.find({ _id: { $in: ids.map(id => new ObjectId(id)) } }).toArray(); + return results.map(result => { + const { _id, ...rest } = result; + return { ...rest, id: _id.toString() }; + }); + } + + async list( + sourceId: string, + filters?: z.infer, + cursor?: string, + limit: number = 50 + ): Promise>>> { + const query: Filter> = { sourceId, status: { $ne: "deleted" } }; + + if (filters?.status && filters.status.length > 0) { + query.status = { $in: filters.status }; + } + + if (cursor) { + query._id = { $lt: new ObjectId(cursor) }; + } + + const _limit = Math.min(limit, 50); + + const results = await this.collection + .find(query) + .sort({ _id: -1 }) + .limit(_limit + 1) + .toArray(); + + const hasNextPage = results.length > _limit; + const items = results.slice(0, _limit).map((doc) => { + const { _id, ...rest } = doc; + return { + ...rest, + id: _id.toString(), + }; + }); + + return { + items, + nextCursor: hasNextPage ? results[_limit - 1]._id.toString() : null, + }; + } + + async markSourceDocsPending(sourceId: string): Promise { + await this.collection.updateMany( + { sourceId }, + { + $set: { + status: "pending", + lastUpdatedAt: new Date().toISOString(), + attempts: 0, + }, + }, + ); + } + + async markAsDeleted(id: string): Promise { + await this.collection.updateOne( + { _id: new ObjectId(id) }, + { + $set: { + status: "deleted", + lastUpdatedAt: new Date().toISOString(), + }, + }, + ); + } + + async updateByVersion( + id: string, + version: number, + data: z.infer + ): Promise> { + const result = await this.collection.findOneAndUpdate( + { _id: new ObjectId(id), version }, + { + $set: { + ...data, + lastUpdatedAt: new Date().toISOString(), + }, + }, + { returnDocument: "after" } + ); + + if (!result) { + throw new NotFoundError(`DataSourceDoc ${id} not found or version mismatch`); + } + + const { _id, ...rest } = result; + return { + ...rest, + id: _id.toString(), + }; + } + + async delete(id: string): Promise { + const result = await this.collection.deleteOne({ _id: new ObjectId(id) }); + return result.deletedCount > 0; + } + + async deleteBySourceId(sourceId: string): Promise { + await this.collection.deleteMany({ sourceId }); + } + + async deleteByProjectId(projectId: string): Promise { + await this.collection.deleteMany({ projectId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/infrastructure/repositories/mongodb.data-sources.repository.ts b/apps/rowboat/src/infrastructure/repositories/mongodb.data-sources.repository.ts new file mode 100644 index 00000000..30c9fdd7 --- /dev/null +++ b/apps/rowboat/src/infrastructure/repositories/mongodb.data-sources.repository.ts @@ -0,0 +1,218 @@ +import { z } from "zod"; +import { ObjectId } from "mongodb"; +import { db } from "@/app/lib/mongodb"; +import { DataSource } from "@/src/entities/models/data-source"; +import { + CreateSchema, + IDataSourcesRepository, + ListFiltersSchema, + ReleasePayloadSchema, + UpdateSchema, +} from "@/src/application/repositories/data-sources.repository.interface"; +import { PaginatedList } from "@/src/entities/common/paginated-list"; +import { NotFoundError } from "@/src/entities/errors/common"; + +/** + * MongoDB document schema for DataSource. + * Excludes the 'id' field as it's represented by MongoDB's '_id'. + */ +const DocSchema = DataSource.omit({ id: true }); + +/** + * MongoDB implementation of the DataSources repository. + */ +export class MongoDBDataSourcesRepository implements IDataSourcesRepository { + private readonly collection = db.collection>("sources"); + + async create(data: z.infer): Promise> { + const now = new Date().toISOString(); + const _id = new ObjectId(); + + const doc: z.infer = { + ...data, + active: true, + attempts: 0, + version: 1, + createdAt: now, + error: null, + billingError: null, + lastAttemptAt: null, + lastUpdatedAt: null, + }; + + await this.collection.insertOne({ + ...doc, + _id, + }); + + return { + ...doc, + id: _id.toString(), + }; + } + + async fetch(id: string): Promise | null> { + const result = await this.collection.findOne({ _id: new ObjectId(id) }); + if (!result) return null; + + const { _id, ...rest } = result; + return { + ...rest, + id: _id.toString(), + }; + } + + async list( + projectId: string, + filters?: z.infer, + cursor?: string, + limit: number = 50 + ): Promise>>> { + const query: any = { projectId, status: { $ne: "deleted" } }; + + // Default behavior: exclude deleted unless explicitly asked for + if (filters?.deleted === true) { + query.status = "deleted"; + } + + if (typeof filters?.active === "boolean") { + query.active = filters.active; + } + + if (cursor) { + query._id = { $lt: new ObjectId(cursor) }; + } + + const _limit = Math.min(limit, 50); + + const results = await this.collection + .find(query) + .sort({ _id: -1 }) + .limit(_limit + 1) + .toArray(); + + const hasNextPage = results.length > _limit; + const items = results.slice(0, _limit).map((doc: any) => { + const { _id, ...rest } = doc; + return { + ...rest, + id: _id.toString(), + }; + }); + + return { + items, + nextCursor: hasNextPage ? results[_limit - 1]._id.toString() : null, + }; + } + + async update( + id: string, + data: z.infer, + bumpVersion?: boolean + ): Promise> { + const now = new Date().toISOString(); + + const result = await this.collection.findOneAndUpdate( + { _id: new ObjectId(id) }, + { + $set: { + ...data, + lastUpdatedAt: now, + }, + ...(bumpVersion ? { $inc: { version: 1 } } : {}), + }, + { returnDocument: "after" } + ); + + if (!result) { + throw new NotFoundError(`DataSource ${id} not found`); + } + + const { _id, ...rest } = result; + return { + ...rest, + id: _id.toString(), + }; + } + + async delete(id: string): Promise { + const result = await this.collection.deleteOne({ _id: new ObjectId(id) }); + return result.deletedCount > 0; + } + + async deleteByProjectId(projectId: string): Promise { + await this.collection.deleteMany({ projectId }); + } + + async pollDeleteJob(): Promise | null> { + const result = await this.collection.findOneAndUpdate({ + status: "deleted", + $or: [ + { attempts: { $exists: false } }, + { attempts: { $lte: 3 } } + ] + }, { $set: { lastAttemptAt: new Date().toISOString() }, $inc: { attempts: 1 } }, { returnDocument: "after", sort: { createdAt: 1 } }); + if (!result) return null; + + const { _id, ...rest } = result; + return { ...rest, id: _id.toString() }; + } + + async pollPendingJob(): Promise | null> { + const now = Date.now(); + + const result = await this.collection.findOneAndUpdate({ + $and: [ + { + $or: [ + // if the job has never been attempted + { + status: "pending", + attempts: 0, + }, + // if the job was attempted but wasn't completed in the last hour + { + status: "pending", + lastAttemptAt: { $lt: new Date(now - 60 * 60 * 1000).toISOString() }, + }, + // if the job errored out but hasn't been retried 3 times yet + { + status: "error", + attempts: { $lt: 3 }, + }, + // if the job errored out but hasn't been retried in the last hr + { + status: "error", + lastAttemptAt: { $lt: new Date(now - 60 * 60 * 1000).toISOString() }, + }, + ] + } + ] + }, { + $set: { + status: "pending", + lastAttemptAt: new Date().toISOString(), + }, + $inc: { + attempts: 1 + }, + }, { + returnDocument: "after", sort: { createdAt: 1 } + }); + if (!result) return null; + + const { _id, ...rest } = result; + return { ...rest, id: _id.toString() }; + } + + async release(id: string, version: number, updates: z.infer): Promise { + await this.collection.updateOne({ + _id: new ObjectId(id), + version, + }, { $set: { + ...updates, + lastUpdatedAt: new Date().toISOString(), + } }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/infrastructure/services/local.uploads-storage.service.ts b/apps/rowboat/src/infrastructure/services/local.uploads-storage.service.ts new file mode 100644 index 00000000..458f95af --- /dev/null +++ b/apps/rowboat/src/infrastructure/services/local.uploads-storage.service.ts @@ -0,0 +1,39 @@ +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface"; +import fs from "fs"; +import path from "path"; +import { NotFoundError } from "@/src/entities/errors/common"; + +const UPLOADS_DIR = process.env.RAG_UPLOADS_DIR || '/uploads'; + +export class LocalUploadsStorageService implements IUploadsStorageService { + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + + constructor({ + dataSourceDocsRepository, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + } + + async getUploadUrl(key: string, contentType: string): Promise { + return `/api/uploads/${key}`; + } + + async getDownloadUrl(fileId: string): Promise { + return `/api/uploads/${fileId}`; + } + + async getFileContents(fileId: string): Promise { + const file = await this.dataSourceDocsRepository.fetch(fileId); + if (!file) { + throw new NotFoundError('File not found'); + } + if (file.data.type !== 'file_local') { + throw new NotFoundError('File is not a local file'); + } + const filePath = file.data.path.split('/api/uploads/')[1]; + return fs.readFileSync(path.join(UPLOADS_DIR, filePath)); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/infrastructure/services/s3.uploads-storage.service.ts b/apps/rowboat/src/infrastructure/services/s3.uploads-storage.service.ts new file mode 100644 index 00000000..8d13eb3f --- /dev/null +++ b/apps/rowboat/src/infrastructure/services/s3.uploads-storage.service.ts @@ -0,0 +1,71 @@ +import { IDataSourceDocsRepository } from "@/src/application/repositories/data-source-docs.repository.interface"; +import { IUploadsStorageService } from "@/src/application/services/uploads-storage.service.interface"; +import { NotFoundError } from "@/src/entities/errors/common"; +import { S3Client, GetObjectCommand, PutObjectCommand } from "@aws-sdk/client-s3"; +import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; + +export class S3UploadsStorageService implements IUploadsStorageService { + private readonly s3Client: S3Client; + private readonly bucket: string; + private readonly dataSourceDocsRepository: IDataSourceDocsRepository; + + constructor({ + dataSourceDocsRepository, + }: { + dataSourceDocsRepository: IDataSourceDocsRepository, + }) { + this.dataSourceDocsRepository = dataSourceDocsRepository; + this.s3Client = new S3Client({ + region: process.env.UPLOADS_AWS_REGION || 'us-east-1', + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID || '', + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || '', + }, + }); + this.bucket = process.env.RAG_UPLOADS_S3_BUCKET || ''; + } + + async getUploadUrl(key: string, contentType: string): Promise { + const command = new PutObjectCommand({ + Bucket: this.bucket, + Key: key, + ContentType: contentType, + }); + return await getSignedUrl(this.s3Client, command, { expiresIn: 600 }); + } + + async getDownloadUrl(fileId: string): Promise { + const file = await this.dataSourceDocsRepository.fetch(fileId); + if (!file) { + throw new NotFoundError('File not found'); + } + if (file.data.type !== 'file_s3') { + throw new NotFoundError('File is not an S3 file'); + } + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: file.data.s3Key, + }); + return await getSignedUrl(this.s3Client, command, { expiresIn: 60 }); + } + + async getFileContents(fileId: string): Promise { + const file = await this.dataSourceDocsRepository.fetch(fileId); + if (!file) { + throw new NotFoundError('File not found'); + } + if (file.data.type !== 'file_s3') { + throw new NotFoundError('File is not an S3 file'); + } + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: file.data.s3Key, + }); + const response = await this.s3Client.send(command); + const chunks: Uint8Array[] = []; + for await (const chunk of response.Body as any) { + chunks.push(chunk); + } + return Buffer.concat(chunks); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller.ts new file mode 100644 index 00000000..415d19a1 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/add-docs-to-data-source.controller.ts @@ -0,0 +1,33 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IAddDocsToDataSourceUseCase } from "@/src/application/use-cases/data-sources/add-docs-to-data-source.use-case"; +import { CreateSchema as DocCreateSchema } from "@/src/application/repositories/data-source-docs.repository.interface"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + docs: z.array(DocCreateSchema), +}); + +export interface IAddDocsToDataSourceController { + execute(request: z.infer): Promise; +} + +export class AddDocsToDataSourceController implements IAddDocsToDataSourceController { + private readonly addDocsToDataSourceUseCase: IAddDocsToDataSourceUseCase; + + constructor({ addDocsToDataSourceUseCase }: { addDocsToDataSourceUseCase: IAddDocsToDataSourceUseCase }) { + this.addDocsToDataSourceUseCase = addDocsToDataSourceUseCase; + } + + async execute(request: z.infer): Promise { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId, docs } = result.data; + return await this.addDocsToDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, docs }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/create-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/create-data-source.controller.ts new file mode 100644 index 00000000..3c1cc270 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/create-data-source.controller.ts @@ -0,0 +1,33 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { ICreateDataSourceUseCase } from "@/src/application/use-cases/data-sources/create-data-source.use-case"; +import { CreateSchema } from "@/src/application/repositories/data-sources.repository.interface"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + data: CreateSchema, +}); + +export interface ICreateDataSourceController { + execute(request: z.infer): Promise>; +} + +export class CreateDataSourceController implements ICreateDataSourceController { + private readonly createDataSourceUseCase: ICreateDataSourceUseCase; + + constructor({ createDataSourceUseCase }: { createDataSourceUseCase: ICreateDataSourceUseCase }) { + this.createDataSourceUseCase = createDataSourceUseCase; + } + + async execute(request: z.infer): Promise> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, data } = result.data; + return await this.createDataSourceUseCase.execute({ caller, userId, apiKey, data }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-data-source.controller.ts new file mode 100644 index 00000000..b2946064 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-data-source.controller.ts @@ -0,0 +1,31 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IDeleteDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IDeleteDataSourceController { + execute(request: z.infer): Promise; +} + +export class DeleteDataSourceController implements IDeleteDataSourceController { + private readonly deleteDataSourceUseCase: IDeleteDataSourceUseCase; + + constructor({ deleteDataSourceUseCase }: { deleteDataSourceUseCase: IDeleteDataSourceUseCase }) { + this.deleteDataSourceUseCase = deleteDataSourceUseCase; + } + + async execute(request: z.infer): Promise { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId } = result.data; + return await this.deleteDataSourceUseCase.execute({ caller, userId, apiKey, sourceId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller.ts new file mode 100644 index 00000000..f3ab05c5 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/delete-doc-from-data-source.controller.ts @@ -0,0 +1,31 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IDeleteDocFromDataSourceUseCase } from "@/src/application/use-cases/data-sources/delete-doc-from-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + docId: z.string(), +}); + +export interface IDeleteDocFromDataSourceController { + execute(request: z.infer): Promise; +} + +export class DeleteDocFromDataSourceController implements IDeleteDocFromDataSourceController { + private readonly deleteDocFromDataSourceUseCase: IDeleteDocFromDataSourceUseCase; + + constructor({ deleteDocFromDataSourceUseCase }: { deleteDocFromDataSourceUseCase: IDeleteDocFromDataSourceUseCase }) { + this.deleteDocFromDataSourceUseCase = deleteDocFromDataSourceUseCase; + } + + async execute(request: z.infer): Promise { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, docId } = result.data; + return await this.deleteDocFromDataSourceUseCase.execute({ caller, userId, apiKey, docId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/fetch-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/fetch-data-source.controller.ts new file mode 100644 index 00000000..7fd96e59 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/fetch-data-source.controller.ts @@ -0,0 +1,33 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IFetchDataSourceUseCase } from "@/src/application/use-cases/data-sources/fetch-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IFetchDataSourceController { + execute(request: z.infer): Promise>; +} + +export class FetchDataSourceController implements IFetchDataSourceController { + private readonly fetchDataSourceUseCase: IFetchDataSourceUseCase; + + constructor({ fetchDataSourceUseCase }: { fetchDataSourceUseCase: IFetchDataSourceUseCase }) { + this.fetchDataSourceUseCase = fetchDataSourceUseCase; + } + + async execute(request: z.infer): Promise> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + + const { caller, userId, apiKey, sourceId } = result.data; + return await this.fetchDataSourceUseCase.execute({ caller, userId, apiKey, sourceId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller.ts new file mode 100644 index 00000000..7a21dd10 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/get-download-url-for-file.controller.ts @@ -0,0 +1,31 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IGetDownloadUrlForFileUseCase } from "@/src/application/use-cases/data-sources/get-download-url-for-file.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + fileId: z.string(), +}); + +export interface IGetDownloadUrlForFileController { + execute(request: z.infer): Promise; +} + +export class GetDownloadUrlForFileController implements IGetDownloadUrlForFileController { + private readonly getDownloadUrlForFileUseCase: IGetDownloadUrlForFileUseCase; + + constructor({ getDownloadUrlForFileUseCase }: { getDownloadUrlForFileUseCase: IGetDownloadUrlForFileUseCase }) { + this.getDownloadUrlForFileUseCase = getDownloadUrlForFileUseCase; + } + + async execute(request: z.infer): Promise { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, fileId } = result.data; + return await this.getDownloadUrlForFileUseCase.execute({ caller, userId, apiKey, fileId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller.ts new file mode 100644 index 00000000..247a29f6 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/get-upload-urls-for-files.controller.ts @@ -0,0 +1,32 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IGetUploadUrlsForFilesUseCase } from "@/src/application/use-cases/data-sources/get-upload-urls-for-files.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + files: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })), +}); + +export interface IGetUploadUrlsForFilesController { + execute(request: z.infer): Promise<{ fileId: string, uploadUrl: string, path: string }[]>; +} + +export class GetUploadUrlsForFilesController implements IGetUploadUrlsForFilesController { + private readonly getUploadUrlsForFilesUseCase: IGetUploadUrlsForFilesUseCase; + + constructor({ getUploadUrlsForFilesUseCase }: { getUploadUrlsForFilesUseCase: IGetUploadUrlsForFilesUseCase }) { + this.getUploadUrlsForFilesUseCase = getUploadUrlsForFilesUseCase; + } + + async execute(request: z.infer): Promise<{ fileId: string, uploadUrl: string, path: string }[]> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId, files } = result.data; + return await this.getUploadUrlsForFilesUseCase.execute({ caller, userId, apiKey, sourceId, files }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/list-data-sources.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/list-data-sources.controller.ts new file mode 100644 index 00000000..39ed5eba --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/list-data-sources.controller.ts @@ -0,0 +1,32 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IListDataSourcesUseCase } from "@/src/application/use-cases/data-sources/list-data-sources.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + projectId: z.string(), +}); + +export interface IListDataSourcesController { + execute(request: z.infer): Promise[]>; +} + +export class ListDataSourcesController implements IListDataSourcesController { + private readonly listDataSourcesUseCase: IListDataSourcesUseCase; + + constructor({ listDataSourcesUseCase }: { listDataSourcesUseCase: IListDataSourcesUseCase }) { + this.listDataSourcesUseCase = listDataSourcesUseCase; + } + + async execute(request: z.infer): Promise[]> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, projectId} = result.data; + return await this.listDataSourcesUseCase.execute({ caller, userId, apiKey, projectId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller.ts new file mode 100644 index 00000000..3e05347c --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/list-docs-in-data-source.controller.ts @@ -0,0 +1,32 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IListDocsInDataSourceUseCase } from "@/src/application/use-cases/data-sources/list-docs-in-data-source.use-case"; +import { DataSourceDoc } from "@/src/entities/models/data-source-doc"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IListDocsInDataSourceController { + execute(request: z.infer): Promise[]>; +} + +export class ListDocsInDataSourceController implements IListDocsInDataSourceController { + private readonly listDocsInDataSourceUseCase: IListDocsInDataSourceUseCase; + + constructor({ listDocsInDataSourceUseCase }: { listDocsInDataSourceUseCase: IListDocsInDataSourceUseCase }) { + this.listDocsInDataSourceUseCase = listDocsInDataSourceUseCase; + } + + async execute(request: z.infer): Promise[]> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId } = result.data; + return await this.listDocsInDataSourceUseCase.execute({ caller, userId, apiKey, sourceId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller.ts new file mode 100644 index 00000000..63d0d15d --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/recrawl-web-data-source.controller.ts @@ -0,0 +1,31 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { IRecrawlWebDataSourceUseCase } from "@/src/application/use-cases/data-sources/recrawl-web-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), +}); + +export interface IRecrawlWebDataSourceController { + execute(request: z.infer): Promise; +} + +export class RecrawlWebDataSourceController implements IRecrawlWebDataSourceController { + private readonly recrawlWebDataSourceUseCase: IRecrawlWebDataSourceUseCase; + + constructor({ recrawlWebDataSourceUseCase }: { recrawlWebDataSourceUseCase: IRecrawlWebDataSourceUseCase }) { + this.recrawlWebDataSourceUseCase = recrawlWebDataSourceUseCase; + } + + async execute(request: z.infer): Promise { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId } = result.data; + return await this.recrawlWebDataSourceUseCase.execute({ caller, userId, apiKey, sourceId }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/toggle-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/toggle-data-source.controller.ts new file mode 100644 index 00000000..1990f418 --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/toggle-data-source.controller.ts @@ -0,0 +1,33 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IToggleDataSourceUseCase } from "@/src/application/use-cases/data-sources/toggle-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + active: z.boolean(), +}); + +export interface IToggleDataSourceController { + execute(request: z.infer): Promise>; +} + +export class ToggleDataSourceController implements IToggleDataSourceController { + private readonly toggleDataSourceUseCase: IToggleDataSourceUseCase; + + constructor({ toggleDataSourceUseCase }: { toggleDataSourceUseCase: IToggleDataSourceUseCase }) { + this.toggleDataSourceUseCase = toggleDataSourceUseCase; + } + + async execute(request: z.infer): Promise> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + const { caller, userId, apiKey, sourceId, active } = result.data; + return await this.toggleDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, active }); + } +} \ No newline at end of file diff --git a/apps/rowboat/src/interface-adapters/controllers/data-sources/update-data-source.controller.ts b/apps/rowboat/src/interface-adapters/controllers/data-sources/update-data-source.controller.ts new file mode 100644 index 00000000..4e49aabd --- /dev/null +++ b/apps/rowboat/src/interface-adapters/controllers/data-sources/update-data-source.controller.ts @@ -0,0 +1,38 @@ +import { BadRequestError } from "@/src/entities/errors/common"; +import z from "zod"; +import { DataSource } from "@/src/entities/models/data-source"; +import { IUpdateDataSourceUseCase } from "@/src/application/use-cases/data-sources/update-data-source.use-case"; + +const inputSchema = z.object({ + caller: z.enum(["user", "api"]), + userId: z.string().optional(), + apiKey: z.string().optional(), + sourceId: z.string(), + data: DataSource + .pick({ + description: true, + }) + .partial(), +}); + +export interface IUpdateDataSourceController { + execute(request: z.infer): Promise>; +} + +export class UpdateDataSourceController implements IUpdateDataSourceController { + private readonly updateDataSourceUseCase: IUpdateDataSourceUseCase; + + constructor({ updateDataSourceUseCase }: { updateDataSourceUseCase: IUpdateDataSourceUseCase }) { + this.updateDataSourceUseCase = updateDataSourceUseCase; + } + + async execute(request: z.infer): Promise> { + const result = inputSchema.safeParse(request); + if (!result.success) { + throw new BadRequestError(`Invalid request: ${JSON.stringify(result.error)}`); + } + + const { caller, userId, apiKey, sourceId, data } = result.data; + return await this.updateDataSourceUseCase.execute({ caller, userId, apiKey, sourceId, data }); + } +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index e9cfc176..60d1bc15 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -149,28 +149,29 @@ services: - QDRANT_API_KEY=${QDRANT_API_KEY} restart: no - rag_files_worker: + rag-worker: build: context: ./apps/rowboat dockerfile: scripts.Dockerfile - command: ["npm", "run", "ragFilesWorker"] - profiles: [ "rag_files_worker" ] + command: ["npm", "run", "rag-worker"] + profiles: [ "rag-worker" ] environment: + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET} + - RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION} + - RAG_UPLOADS_DIR=/app/uploads + - USE_GEMINI_FILE_PARSING=${USE_GEMINI_FILE_PARSING} + - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY} - OPENAI_API_KEY=${OPENAI_API_KEY} - EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL} - EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY} - EMBEDDING_MODEL=${EMBEDDING_MODEL} - MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat - REDIS_URL=redis://redis:6379 - - GOOGLE_API_KEY=${GOOGLE_API_KEY} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - RAG_UPLOADS_S3_BUCKET=${RAG_UPLOADS_S3_BUCKET} - - RAG_UPLOADS_S3_REGION=${RAG_UPLOADS_S3_REGION} - QDRANT_URL=http://qdrant:6333 - QDRANT_API_KEY=${QDRANT_API_KEY} - - RAG_UPLOADS_DIR=/app/uploads - - USE_GEMINI_FILE_PARSING=${USE_GEMINI_FILE_PARSING} - USE_BILLING=${USE_BILLING} - BILLING_API_URL=${BILLING_API_URL} - BILLING_API_KEY=${BILLING_API_KEY} @@ -178,47 +179,6 @@ services: volumes: - uploads:/app/uploads - rag_urls_worker: - build: - context: ./apps/rowboat - dockerfile: scripts.Dockerfile - command: ["npm", "run", "ragUrlsWorker"] - profiles: [ "rag_urls_worker" ] - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY} - - EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL} - - EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY} - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat - - REDIS_URL=redis://redis:6379 - - FIRECRAWL_API_KEY=${FIRECRAWL_API_KEY} - - QDRANT_URL=http://qdrant:6333 - - QDRANT_API_KEY=${QDRANT_API_KEY} - - USE_BILLING=${USE_BILLING} - - BILLING_API_URL=${BILLING_API_URL} - - BILLING_API_KEY=${BILLING_API_KEY} - restart: unless-stopped - - rag_text_worker: - build: - context: ./apps/rowboat - dockerfile: scripts.Dockerfile - command: ["npm", "run", "ragTextWorker"] - profiles: [ "rag_text_worker" ] - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY} - - EMBEDDING_PROVIDER_BASE_URL=${EMBEDDING_PROVIDER_BASE_URL} - - EMBEDDING_PROVIDER_API_KEY=${EMBEDDING_PROVIDER_API_KEY} - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - MONGODB_CONNECTION_STRING=mongodb://mongo:27017/rowboat - - REDIS_URL=redis://redis:6379 - - QDRANT_URL=http://qdrant:6333 - - QDRANT_API_KEY=${QDRANT_API_KEY} - - USE_BILLING=${USE_BILLING} - - BILLING_API_URL=${BILLING_API_URL} - - BILLING_API_KEY=${BILLING_API_KEY} - restart: unless-stopped - jobs-worker: build: context: ./apps/rowboat diff --git a/start.sh b/start.sh index 45052199..2ee2f895 100755 --- a/start.sh +++ b/start.sh @@ -26,13 +26,7 @@ export USE_KLAVIS_TOOLS=true CMD="docker compose" CMD="$CMD --profile setup_qdrant" CMD="$CMD --profile qdrant" -CMD="$CMD --profile rag_text_worker" -CMD="$CMD --profile rag_files_worker" - -# enable rag urls worker -if [ "$USE_RAG_SCRAPING" = "true" ]; then - CMD="$CMD --profile rag_urls_worker" -fi +CMD="$CMD --profile rag-worker" # Add more mappings as needed # if [ "$SOME_OTHER_ENV" = "true" ]; then