feat: real PDF pipeline test — end-to-end knowledge extraction working

Add full pipeline test that generates a real PDF, processes it through
the entire pipeline, and verifies knowledge lands in FalkorDB:

- Create test PDF generator using pdf-lib (2-page doc about Acme Corp)
- Add testFullPipeline() to integration tests with store verification
- Fix FalkorDB client connect() — createClient returns unconnected client
  in both TriplesStore and TriplesQuery classes

Results: PDF decoded (2 pages) → chunked (2 chunks) → extracted
(4 relationships) → 16 triples stored in FalkorDB including:
  alice-johnson → is-a-senior-engineer → acme-corporation
  cloudsync → uses-aws-for-hosting → amazon-web-services
  provenance: pages → prov:wasDerivedFrom → source document

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-07 02:19:12 -05:00
parent 5bc7a1b6fc
commit 50fb311d2d
6 changed files with 269 additions and 1 deletions

View file

@ -33,6 +33,7 @@ function createTerm(value: string): Term {
export class FalkorDBTriplesQuery {
private graph: Graph;
private connectPromise: Promise<void>;
constructor(config: FalkorDBQueryConfig = {}) {
const url = config.url ?? process.env.FALKORDB_URL ?? "redis://localhost:6379";
@ -40,6 +41,13 @@ export class FalkorDBTriplesQuery {
const client = createClient({ url });
this.graph = new Graph(client, database);
this.connectPromise = client.connect().then(() => {
console.log(`[FalkorDBTriplesQuery] Connected to ${url}, graph: ${database}`);
});
}
private async ensureConnected(): Promise<void> {
await this.connectPromise;
}
async queryTriples(
@ -48,6 +56,7 @@ export class FalkorDBTriplesQuery {
o?: Term,
limit = 100,
): Promise<Triple[]> {
await this.ensureConnected();
const sv = termToValue(s);
const pv = termToValue(p);
const ov = termToValue(o);