From 58d3c5eebbe5783475150dc0697a57522a1f8f08 Mon Sep 17 00:00:00 2001 From: ramnique <30795890+ramnique@users.noreply.github.com> Date: Wed, 5 Feb 2025 16:49:48 +0530 Subject: [PATCH 1/3] autosave on undo/redo --- .../app/projects/[projectId]/workflow/workflow_editor.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx index db04437b..8ecbf0b8 100644 --- a/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx +++ b/apps/rowboat/app/projects/[projectId]/workflow/workflow_editor.tsx @@ -149,6 +149,8 @@ function reducer(state: State, action: Action): State { const inverse = state.inversePatches[state.currentIndex - 1]; draft.present = applyPatches(state.present, inverse); draft.currentIndex--; + draft.present.pendingChanges = true; + draft.present.chatKey++; }); break; } @@ -158,7 +160,9 @@ function reducer(state: State, action: Action): State { const patch = state.patches[state.currentIndex]; draft.present = applyPatches(state.present, patch); draft.currentIndex++; - }); + draft.present.pendingChanges = true; + draft.present.chatKey++; + }); break; } case "update_workflow_name": { From a1adbd722c6b1bba70c79f96b66d4f0cf6bf8bc6 Mon Sep 17 00:00:00 2001 From: ramnique <30795890+ramnique@users.noreply.github.com> Date: Wed, 5 Feb 2025 18:12:34 +0530 Subject: [PATCH 2/3] use only firecrawl for scraping, dont rewrite --- apps/rowboat/app/scripts/crawlUrls.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/rowboat/app/scripts/crawlUrls.ts b/apps/rowboat/app/scripts/crawlUrls.ts index 62536ef0..02474d77 100644 --- a/apps/rowboat/app/scripts/crawlUrls.ts +++ b/apps/rowboat/app/scripts/crawlUrls.ts @@ -685,6 +685,18 @@ async function saveWebpagesToMongodb(logger: PrefixLogger, job: WithId => { + return batch; + } + }); +} + async function rewrite(_logger: PrefixLogger) { const logger = _logger.child('rewrite'); @@ -950,7 +962,8 @@ async function mongodb(logger: PrefixLogger, job: WithId Date: Wed, 5 Feb 2025 18:50:11 +0530 Subject: [PATCH 3/3] enable scrape data source rag --- apps/rowboat/app/actions.ts | 14 ++++++++++++-- .../rowboat/app/projects/[projectId]/layout.tsx | 4 +++- apps/rowboat/app/projects/[projectId]/menu.tsx | 12 ++++++------ apps/rowboat/app/projects/[projectId]/nav.tsx | 4 +++- .../projects/[projectId]/sources/new/form.tsx | 17 +++++++++-------- .../projects/[projectId]/sources/new/page.tsx | 7 +++++++ 6 files changed, 40 insertions(+), 18 deletions(-) diff --git a/apps/rowboat/app/actions.ts b/apps/rowboat/app/actions.ts index fef6f4ab..af06afc0 100644 --- a/apps/rowboat/app/actions.ts +++ b/apps/rowboat/app/actions.ts @@ -435,8 +435,18 @@ export async function createCrawlDataSource(projectId: string, formData: FormDat export async function createUrlsDataSource(projectId: string, formData: FormData) { await projectAuthCheck(projectId); const urls = formData.get('urls') as string; - // take first 100 urls - const limitedUrls = urls.split('\n').slice(0, 100).map((url) => url.trim()); + // take first 100 valid urls (as in parse them) + const limitedUrls = urls.split('\n') + .map((url) => url.trim()) + .filter((url) => { + try { + new URL(url); + return true; + } catch (e) { + return false; + } + }) + .slice(0, 100); const name = formData.get('name') as string; const result = await dataSourcesCollection.insertOne({ diff --git a/apps/rowboat/app/projects/[projectId]/layout.tsx b/apps/rowboat/app/projects/[projectId]/layout.tsx index 6b0bb0a9..108d21bd 100644 --- a/apps/rowboat/app/projects/[projectId]/layout.tsx +++ b/apps/rowboat/app/projects/[projectId]/layout.tsx @@ -7,8 +7,10 @@ export default async function Layout({ params: { projectId: string } children: React.ReactNode }) { + const useDataSources = process.env.USE_DATA_SOURCES === 'true'; + return
-
; } \ No newline at end of file diff --git a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx index e9c0ba5f..d47cb9a3 100644 --- a/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx +++ b/apps/rowboat/app/projects/[projectId]/sources/new/form.tsx @@ -10,9 +10,9 @@ export function Form({ }: { projectId: string; }) { - const [sourceType, setSourceType] = useState("crawl"); + const [sourceType, setSourceType] = useState(""); - const createCrawlDataSourceWithProjectId = createCrawlDataSource.bind(null, projectId); + // const createCrawlDataSourceWithProjectId = createCrawlDataSource.bind(null, projectId); const createUrlsDataSourceWithProjectId = createUrlsDataSource.bind(null, projectId); function handleSourceTypeChange(event: React.ChangeEvent) { @@ -26,22 +26,23 @@ export function Form({ selectedKeys={[sourceType]} onChange={handleSourceTypeChange} > - } > Crawl URLs - + */} } > - Specify URLs + Scrape URLs - {sourceType === "crawl" &&
@@ -95,7 +96,7 @@ export function Form({ , }} /> -
} + } */} {sourceType === "urls" &&
Note:

  • Expect about 5-10 minutes to scrape 100 pages
  • -
  • Only the first 100 URLs will be scraped
  • +
  • Only the first 100 (valid) URLs will be scraped