From 58d3c5eebbe5783475150dc0697a57522a1f8f08 Mon Sep 17 00:00:00 2001
From: ramnique <30795890+ramnique@users.noreply.github.com>
Date: Wed, 5 Feb 2025 16:49:48 +0530
Subject: [PATCH 1/3] autosave on undo/redo
---
.../app/projects/[projectId]/workflow/workflow_editor.tsx | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx
index db04437b..8ecbf0b8 100644
--- a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx
+++ b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx
@@ -149,6 +149,8 @@ function reducer(state: State, action: Action): State {
const inverse = state.inversePatches[state.currentIndex - 1];
draft.present = applyPatches(state.present, inverse);
draft.currentIndex--;
+ draft.present.pendingChanges = true;
+ draft.present.chatKey++;
});
break;
}
@@ -158,7 +160,9 @@ function reducer(state: State, action: Action): State {
const patch = state.patches[state.currentIndex];
draft.present = applyPatches(state.present, patch);
draft.currentIndex++;
- });
+ draft.present.pendingChanges = true;
+ draft.present.chatKey++;
+ });
break;
}
case "update_workflow_name": {
From a1adbd722c6b1bba70c79f96b66d4f0cf6bf8bc6 Mon Sep 17 00:00:00 2001
From: ramnique <30795890+ramnique@users.noreply.github.com>
Date: Wed, 5 Feb 2025 18:12:34 +0530
Subject: [PATCH 2/3] use only firecrawl for scraping, dont rewrite
---
apps/rowboat/app/scripts/crawlUrls.ts | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/apps/rowboat/app/scripts/crawlUrls.ts b/apps/rowboat/app/scripts/crawlUrls.ts
index 62536ef0..02474d77 100644
--- a/apps/rowboat/app/scripts/crawlUrls.ts
+++ b/apps/rowboat/app/scripts/crawlUrls.ts
@@ -685,6 +685,18 @@ async function saveWebpagesToMongodb(logger: PrefixLogger, job: WithId => {
+ return batch;
+ }
+ });
+}
+
async function rewrite(_logger: PrefixLogger) {
const logger = _logger.child('rewrite');
@@ -950,7 +962,8 @@ async function mongodb(logger: PrefixLogger, job: WithId
Date: Wed, 5 Feb 2025 18:50:11 +0530
Subject: [PATCH 3/3] enable scrape data source rag
---
apps/rowboat/app/actions.ts | 14 ++++++++++++--
.../rowboat/app/projects/[projectId]/layout.tsx | 4 +++-
apps/rowboat/app/projects/[projectId]/menu.tsx | 12 ++++++------
apps/rowboat/app/projects/[projectId]/nav.tsx | 4 +++-
.../projects/[projectId]/sources/new/form.tsx | 17 +++++++++--------
.../projects/[projectId]/sources/new/page.tsx | 7 +++++++
6 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/apps/rowboat/app/actions.ts b/apps/rowboat/app/actions.ts
index fef6f4ab..af06afc0 100644
--- a/apps/rowboat/app/actions.ts
+++ b/apps/rowboat/app/actions.ts
@@ -435,8 +435,18 @@ export async function createCrawlDataSource(projectId: string, formData: FormDat
export async function createUrlsDataSource(projectId: string, formData: FormData) {
await projectAuthCheck(projectId);
const urls = formData.get('urls') as string;
- // take first 100 urls
- const limitedUrls = urls.split('\n').slice(0, 100).map((url) => url.trim());
+ // take first 100 valid urls (as in parse them)
+ const limitedUrls = urls.split('\n')
+ .map((url) => url.trim())
+ .filter((url) => {
+ try {
+ new URL(url);
+ return true;
+ } catch (e) {
+ return false;
+ }
+ })
+ .slice(0, 100);
const name = formData.get('name') as string;
const result = await dataSourcesCollection.insertOne({
diff --git a/apps/rowboat/app/projects/[projectId]/layout.tsx b/apps/rowboat/app/projects/[projectId]/layout.tsx
index 6b0bb0a9..108d21bd 100644
--- a/apps/rowboat/app/projects/[projectId]/layout.tsx
+++ b/apps/rowboat/app/projects/[projectId]/layout.tsx
@@ -7,8 +7,10 @@ export default async function Layout({
params: { projectId: string }
children: React.ReactNode
}) {
+ const useDataSources = process.env.USE_DATA_SOURCES === 'true';
+
return
-
+
{children}
diff --git a/apps/rowboat/app/projects/[projectId]/menu.tsx b/apps/rowboat/app/projects/[projectId]/menu.tsx
index 23f77e78..f5e15944 100644
--- a/apps/rowboat/app/projects/[projectId]/menu.tsx
+++ b/apps/rowboat/app/projects/[projectId]/menu.tsx
@@ -3,7 +3,7 @@ import { usePathname } from "next/navigation";
import { Tooltip } from "@nextui-org/react";
import Link from "next/link";
import clsx from "clsx";
-import { SettingsIcon, WorkflowIcon } from "lucide-react";
+import { DatabaseIcon, SettingsIcon, WorkflowIcon } from "lucide-react";
function NavLink({ href, label, icon, collapsed, selected = false }: { href: string, label: string, icon: React.ReactNode, collapsed: boolean, selected?: boolean }) {
return
}
selected={pathname.startsWith(`/projects/${projectId}/workflow`)}
/>
- {/*
-
-
+ icon={}
selected={pathname.startsWith(`/projects/${projectId}/sources`)}
- /> */}
+ />}
(null);
@@ -57,6 +59,6 @@ export function Nav({
}
-
+
;
}
\ No newline at end of file
diff --git a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx
index e9c0ba5f..d47cb9a3 100644
--- a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx
+++ b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx
@@ -10,9 +10,9 @@ export function Form({
}: {
projectId: string;
}) {
- const [sourceType, setSourceType] = useState("crawl");
+ const [sourceType, setSourceType] = useState("");
- const createCrawlDataSourceWithProjectId = createCrawlDataSource.bind(null, projectId);
+ // const createCrawlDataSourceWithProjectId = createCrawlDataSource.bind(null, projectId);
const createUrlsDataSourceWithProjectId = createUrlsDataSource.bind(null, projectId);
function handleSourceTypeChange(event: React.ChangeEvent) {
@@ -26,22 +26,23 @@ export function Form({
selectedKeys={[sourceType]}
onChange={handleSourceTypeChange}
>
- }
>
Crawl URLs
-
+ */}
}
>
- Specify URLs
+ Scrape URLs
- {sourceType === "crawl" && }
+ } */}
{sourceType === "urls" &&
- Expect about 5-10 minutes to scrape 100 pages
- - Only the first 100 URLs will be scraped
+ - Only the first 100 (valid) URLs will be scraped