feat: add unit tests, Docker polish, and workbench UX improvements

Unit tests: Consumer class (7), recursive-splitter (10), parseJsonResponse (11) — 28 total.
Docker: add 5 commented LLM provider services, dev compose override, .env.example.
Workbench: chat persistence, error boundary, disconnect banner, prompts error handling.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-07 03:51:29 -05:00
parent c7eefee607
commit 72870a7e2e
17 changed files with 718 additions and 33 deletions

View file

@ -0,0 +1,95 @@
import { describe, it, expect, vi } from "vitest";
import { parseJsonResponse } from "../extract/knowledge-extract.js";
describe("parseJsonResponse", () => {
// Suppress console.warn from the function under test
beforeEach(() => {
vi.spyOn(console, "warn").mockImplementation(() => {});
});
afterEach(() => {
vi.restoreAllMocks();
});
// ── Valid JSON array ────────────────────────────────────────────
it("parses a valid JSON array", () => {
const result = parseJsonResponse<{ a: number }[]>('[{"a":1}]');
expect(result).toEqual([{ a: 1 }]);
});
// ── JSON with markdown fences ──────────────────────────────────
it("strips markdown fences and parses JSON", () => {
const input = '```json\n[{"a":1}]\n```';
const result = parseJsonResponse<{ a: number }[]>(input);
expect(result).toEqual([{ a: 1 }]);
});
// ── JSON embedded in surrounding text ──────────────────────────
it("extracts JSON array embedded in surrounding text", () => {
const input = 'Here is the result: [{"a":1}] hope that helps';
const result = parseJsonResponse<{ a: number }[]>(input);
expect(result).toEqual([{ a: 1 }]);
});
// ── Truncated array ────────────────────────────────────────────
it("repairs truncated array by closing at last complete object", () => {
const input = '[{"a":1},{"b":2';
const result = parseJsonResponse<Record<string, number>[]>(input);
expect(result).toEqual([{ a: 1 }]);
});
// ── Single object (not array) ──────────────────────────────────
it("parses a single object directly (valid JSON passes Attempt 1)", () => {
const input = '{"a":1}';
const result = parseJsonResponse<{ a: number }>(input);
// A bare object is valid JSON, so Attempt 1 (JSON.parse) succeeds directly
expect(result).toEqual({ a: 1 });
});
it("wraps a single object in an array when embedded in non-JSON text", () => {
// When the object is surrounded by garbage, Attempt 1 and 2 fail,
// so Attempt 4 extracts the object and wraps it in an array
const input = 'some text {"a":1} more text';
const result = parseJsonResponse<{ a: number }[]>(input);
expect(result).toEqual([{ a: 1 }]);
});
// ── Complete garbage ───────────────────────────────────────────
it("returns null for complete garbage", () => {
const result = parseJsonResponse("not json at all");
expect(result).toBeNull();
});
// ── Empty string ───────────────────────────────────────────────
it("returns null for empty string", () => {
const result = parseJsonResponse("");
expect(result).toBeNull();
});
// ── Nested fences with language tag ────────────────────────────
it("parses JSON inside fences with language tag (single object)", () => {
const input = '```json\n{"key":"value"}\n```';
const result = parseJsonResponse<{ key: string }[]>(input);
// The function first strips fences, then tries JSON.parse which yields an object,
// then if that fails as array extraction, falls back to wrapping in array
// Actually: JSON.parse of '{"key":"value"}' succeeds directly, returning the object
expect(result).toEqual({ key: "value" });
});
// ── Multiple objects in valid array ────────────────────────────
it("parses a multi-element array correctly", () => {
const input = '[{"name":"Alice"},{"name":"Bob"},{"name":"Carol"}]';
const result = parseJsonResponse<{ name: string }[]>(input);
expect(result).toEqual([
{ name: "Alice" },
{ name: "Bob" },
{ name: "Carol" },
]);
});
// ── Fences without language tag ────────────────────────────────
it("strips fences without a language tag", () => {
const input = '```\n[{"x":42}]\n```';
const result = parseJsonResponse<{ x: number }[]>(input);
expect(result).toEqual([{ x: 42 }]);
});
});

View file

@ -0,0 +1,97 @@
import { describe, it, expect } from "vitest";
import { recursiveSplit } from "../chunking/recursive-splitter.js";
describe("recursiveSplit", () => {
// ── Short text returns single chunk ──────────────────────────────
it("returns single chunk when text is shorter than chunkSize", () => {
const result = recursiveSplit("Hello world", 100, 10);
expect(result).toEqual(["Hello world"]);
});
// ── Empty/whitespace text returns empty array ────────────────────
it("returns empty array for empty string", () => {
expect(recursiveSplit("", 100, 10)).toEqual([]);
});
it("returns empty array for whitespace-only text", () => {
expect(recursiveSplit(" \n\n \n ", 100, 10)).toEqual([]);
});
// ── Splits on paragraph boundary (\n\n) first ───────────────────
it("splits on paragraph boundary (\\n\\n) first", () => {
const text = "Paragraph one content here.\n\nParagraph two content here.";
const result = recursiveSplit(text, 30, 0);
expect(result.length).toBeGreaterThanOrEqual(2);
// Each chunk should contain content from its respective paragraph
expect(result[0]).toContain("Paragraph one");
expect(result[result.length - 1]).toContain("Paragraph two");
});
// ── Splits on \n when no \n\n present ────────────────────────────
it("splits on newline when no paragraph boundary present", () => {
const text = "Line one content.\nLine two content.\nLine three content.";
const result = recursiveSplit(text, 25, 0);
expect(result.length).toBeGreaterThanOrEqual(2);
expect(result[0]).toContain("Line one");
});
// ── Splits on spaces when no newlines present ────────────────────
it("splits on spaces when no newlines present", () => {
const text = "word1 word2 word3 word4 word5 word6 word7 word8 word9 word10";
const result = recursiveSplit(text, 20, 0);
expect(result.length).toBeGreaterThanOrEqual(2);
// Each chunk should be at most roughly chunkSize
for (const chunk of result) {
// Allow some tolerance for the splitting algorithm
expect(chunk.length).toBeLessThanOrEqual(30);
}
});
// ── Character-level split as last resort ─────────────────────────
it("splits at character level as last resort", () => {
// A single long word with no separators
const text = "abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz";
const result = recursiveSplit(text, 10, 0);
expect(result.length).toBeGreaterThanOrEqual(2);
// Reassembled text should match original
expect(result.join("")).toBe(text);
});
// ── Overlap: second chunk starts with tail of first ──────────────
it("applies overlap so second chunk starts with tail of first", () => {
const text = "First paragraph here.\n\nSecond paragraph here.";
const result = recursiveSplit(text, 25, 5);
expect(result.length).toBeGreaterThanOrEqual(2);
if (result.length >= 2) {
// The second chunk should start with the last 5 chars of the first
const firstTail = result[0].slice(-5);
expect(result[1].startsWith(firstTail)).toBe(true);
}
});
// ── Large text produces multiple chunks ──────────────────────────
it("large text produces multiple chunks of approximately chunkSize", () => {
// Create a large block of text with paragraph separators
const paragraphs = Array.from(
{ length: 20 },
(_, i) => `This is paragraph number ${i + 1} with some filler content to make it longer.`,
);
const text = paragraphs.join("\n\n");
const result = recursiveSplit(text, 100, 10);
expect(result.length).toBeGreaterThan(5);
});
// ── chunkOverlap=0 produces no overlap ───────────────────────────
it("chunkOverlap=0 produces no overlap between chunks", () => {
const text = "AAAA\n\nBBBB\n\nCCCC\n\nDDDD";
const result = recursiveSplit(text, 8, 0);
expect(result.length).toBeGreaterThanOrEqual(2);
// With zero overlap, no chunk (except possibly the first) should start with previous chunk's tail
for (let i = 1; i < result.length; i++) {
const prevTail = result[i - 1].slice(-3);
// The next chunk should NOT start with the previous chunk's tail
// (unless they happen to share content naturally, which won't happen with AAAA/BBBB/etc.)
expect(result[i].startsWith(prevTail)).toBe(false);
}
});
});