2026-05-10 23:12:26 +02:00
import { readdir , readFile , realpath , rm , stat , writeFile , mkdtemp } from 'node:fs/promises' ;
import { createRequire } from 'node:module' ;
import { tmpdir } from 'node:os' ;
import { dirname , join , relative , resolve } from 'node:path' ;
import { performance } from 'node:perf_hooks' ;
import { fileURLToPath } from 'node:url' ;
const require = createRequire ( import . meta . url ) ;
const scriptDir = dirname ( fileURLToPath ( import . meta . url ) ) ;
const contextDir = resolve ( scriptDir , '..' ) ;
2026-05-10 23:51:24 +02:00
const ktxRoot = resolve ( contextDir , '../..' ) ;
const docsDir = join ( ktxRoot , 'docs' ) ;
2026-05-10 23:12:26 +02:00
const reportPath = join ( docsDir , 'hybrid-search-pglite-spike.md' ) ;
async function timed ( label , fn ) {
const started = performance . now ( ) ;
const value = await fn ( ) ;
const durationMs = Number ( ( performance . now ( ) - started ) . toFixed ( 2 ) ) ;
return { label , durationMs , value } ;
}
async function directoryBytes ( path ) {
const entry = await stat ( path ) ;
if ( entry . isFile ( ) ) {
return entry . size ;
}
if ( ! entry . isDirectory ( ) ) {
return 0 ;
}
const children = await readdir ( path ) ;
const childSizes = await Promise . all ( children . map ( ( child ) => directoryBytes ( join ( path , child ) ) ) ) ;
return childSizes . reduce ( ( sum , size ) => sum + size , 0 ) ;
}
async function resolvePackageJson ( packageName ) {
let currentDir = dirname ( require . resolve ( packageName ) ) ;
while ( currentDir !== dirname ( currentDir ) ) {
const packageJsonPath = join ( currentDir , 'package.json' ) ;
try {
const packageJson = JSON . parse ( await readFile ( packageJsonPath , 'utf8' ) ) ;
if ( packageJson . name === packageName ) {
return { packageJsonPath , packageJson } ;
}
} catch ( error ) {
if ( error ? . code !== 'ENOENT' ) {
throw error ;
}
}
currentDir = dirname ( currentDir ) ;
}
throw new Error ( ` Could not resolve package.json for ${ packageName } ` ) ;
}
async function packageInfo ( packageName ) {
const { packageJsonPath , packageJson } = await resolvePackageJson ( packageName ) ;
const packageDir = await realpath ( dirname ( packageJsonPath ) ) ;
return {
name : packageName ,
version : packageJson . version ,
2026-05-10 23:51:24 +02:00
path : relative ( ktxRoot , packageDir ) ,
2026-05-10 23:12:26 +02:00
bytes : await directoryBytes ( packageDir ) ,
} ;
}
async function createDb ( PGlite , vector , pg _trgm , dataDir ) {
const db = await PGlite . create ( {
dataDir ,
extensions : {
vector ,
pg _trgm ,
} ,
} ) ;
await db . exec ( `
CREATE EXTENSION IF NOT EXISTS vector ;
CREATE EXTENSION IF NOT EXISTS pg _trgm ;
CREATE TABLE IF NOT EXISTS spike _documents (
id TEXT PRIMARY KEY ,
search _text TEXT NOT NULL ,
metadata JSONB NOT NULL DEFAULT '{}' : : jsonb ,
embedding vector ( 3 ) NOT NULL
) ;
CREATE INDEX IF NOT EXISTS spike _documents _fts _idx
ON spike _documents
USING GIN ( to _tsvector ( 'english' , search _text ) ) ;
CREATE INDEX IF NOT EXISTS spike _documents _vector _idx
ON spike _documents
USING ivfflat ( embedding vector _cosine _ops )
WITH ( lists = 1 ) ;
CREATE TABLE IF NOT EXISTS spike _dictionary _values (
connection _id TEXT NOT NULL ,
source _name TEXT NOT NULL ,
column _name TEXT NOT NULL ,
value TEXT NOT NULL ,
PRIMARY KEY ( connection _id , source _name , column _name , value )
) ;
CREATE INDEX IF NOT EXISTS spike _dictionary _values _trgm _idx
ON spike _dictionary _values
USING GIN ( value gin _trgm _ops ) ;
` );
return db ;
}
async function seed ( db ) {
await db . query (
`
INSERT INTO spike _documents ( id , search _text , metadata , embedding )
VALUES
( $1 , $2 , $3 : : jsonb , $4 : : vector ) ,
( $5 , $6 , $7 : : jsonb , $8 : : vector ) ,
( $9 , $10 , $11 : : jsonb , $12 : : vector )
ON CONFLICT ( id ) DO UPDATE
SET search _text = EXCLUDED . search _text ,
metadata = EXCLUDED . metadata ,
embedding = EXCLUDED . embedding
` ,
[
'warehouse/orders' ,
'orders paid revenue refund status customer' ,
JSON . stringify ( { connectionId : 'warehouse' , sourceName : 'orders' } ) ,
JSON . stringify ( [ 1 , 0 , 0 ] ) ,
'finance/orders' ,
'orders finance bookings gross margin' ,
JSON . stringify ( { connectionId : 'finance' , sourceName : 'orders' } ) ,
JSON . stringify ( [ 0.72 , 0.28 , 0 ] ) ,
'warehouse/customers' ,
'customers accounts lifecycle region' ,
JSON . stringify ( { connectionId : 'warehouse' , sourceName : 'customers' } ) ,
JSON . stringify ( [ 0 , 1 , 0 ] ) ,
] ,
) ;
await db . query ( `
INSERT INTO spike _dictionary _values ( connection _id , source _name , column _name , value )
VALUES
( 'warehouse' , 'orders' , 'status' , 'refunded' ) ,
( 'warehouse' , 'orders' , 'status' , 'paid' ) ,
( 'warehouse' , 'customers' , 'region' , 'emea' )
ON CONFLICT DO NOTHING
` );
}
async function closeDb ( db ) {
if ( typeof db . close === 'function' ) {
await db . close ( ) ;
}
}
async function main ( ) {
const importTimer = await timed ( 'dynamic import @electric-sql/pglite' , async ( ) => {
const [ { PGlite } , { vector } , { pg _trgm } ] = await Promise . all ( [
import ( '@electric-sql/pglite' ) ,
import ( '@electric-sql/pglite/vector' ) ,
import ( '@electric-sql/pglite/contrib/pg_trgm' ) ,
] ) ;
return { PGlite , vector , pg _trgm } ;
} ) ;
const { PGlite , vector , pg _trgm } = importTimer . value ;
2026-05-10 23:51:24 +02:00
const tempDir = await mkdtemp ( join ( tmpdir ( ) , 'ktx-pglite-report-' ) ) ;
2026-05-10 23:12:26 +02:00
const dataDir = join ( tempDir , 'pgdata' ) ;
let db ;
let reopened ;
try {
const createTimer = await timed ( 'create persistent PGlite database and load extensions' , async ( ) => {
db = await createDb ( PGlite , vector , pg _trgm , dataDir ) ;
return true ;
} ) ;
const seedTimer = await timed ( 'seed hybrid search fixture' , async ( ) => seed ( db ) ) ;
const ftsTimer = await timed ( 'Postgres FTS query' , ( ) =>
db . query (
`
SELECT id
FROM spike _documents
WHERE to _tsvector ( 'english' , search _text ) @ @ websearch _to _tsquery ( 'english' , $1 )
ORDER BY ts _rank _cd ( to _tsvector ( 'english' , search _text ) , websearch _to _tsquery ( 'english' , $1 ) ) DESC , id ASC
LIMIT 1
` ,
[ 'paid orders' ] ,
) ,
) ;
const vectorTimer = await timed ( 'pgvector cosine query' , ( ) =>
db . query (
`
SELECT id , 1 - ( embedding <= > $1 : : vector ) AS similarity
FROM spike _documents
ORDER BY embedding <= > $1 : : vector , id ASC
LIMIT 1
` ,
[ JSON . stringify ( [ 1 , 0 , 0 ] ) ] ,
) ,
) ;
const trigramTimer = await timed ( 'pg_trgm dictionary query' , ( ) =>
db . query (
`
SELECT connection _id || '/' || source _name AS id , value , similarity ( value , $1 ) AS score
FROM spike _dictionary _values
WHERE similarity ( value , $1 ) > 0
ORDER BY score DESC , id ASC , value ASC
LIMIT 1
` ,
[ 'refund' ] ,
) ,
) ;
const sameInstanceTimer = await timed ( 'same instance parallel reads' , ( ) =>
Promise . all ( Array . from ( { length : 4 } , ( ) => db . query ( 'SELECT COUNT(*)::int AS count FROM spike_documents' ) ) ) ,
) ;
let secondOpenStatus = 'opened' ;
let secondOpenMessage = 'Second direct opener executed SELECT 1.' ;
let second ;
try {
second = await createDb ( PGlite , vector , pg _trgm , dataDir ) ;
await second . query ( 'SELECT 1' ) ;
} catch ( error ) {
secondOpenStatus = 'blocked' ;
secondOpenMessage = error instanceof Error ? error . message : String ( error ) ;
} finally {
if ( second ) {
await closeDb ( second ) ;
}
}
await closeDb ( db ) ;
db = undefined ;
const reopenTimer = await timed ( 'reopen persistent PGlite database' , async ( ) => {
reopened = await createDb ( PGlite , vector , pg _trgm , dataDir ) ;
return reopened . query ( 'SELECT COUNT(*)::int AS count FROM spike_documents' ) ;
} ) ;
const packages = await Promise . all ( [
packageInfo ( '@electric-sql/pglite' ) ,
packageInfo ( '@electric-sql/pglite-socket' ) ,
] ) ;
const result = {
generatedAt : new Date ( ) . toISOString ( ) ,
node : process . version ,
packages ,
timingsMs : {
import : importTimer . durationMs ,
createAndExtensions : createTimer . durationMs ,
seed : seedTimer . durationMs ,
ftsQuery : ftsTimer . durationMs ,
vectorQuery : vectorTimer . durationMs ,
trigramQuery : trigramTimer . durationMs ,
sameInstanceParallelReads : sameInstanceTimer . durationMs ,
reopen : reopenTimer . durationMs ,
} ,
topResults : {
fts : ftsTimer . value . rows [ 0 ] ? . id ? ? null ,
vector : vectorTimer . value . rows [ 0 ] ? . id ? ? null ,
trigram : trigramTimer . value . rows [ 0 ] ? . id ? ? null ,
persistedRowCount : reopenTimer . value . rows [ 0 ] ? . count ? ? null ,
} ,
concurrency : {
sameInstanceReadCounts : sameInstanceTimer . value . map ( ( queryResult ) => queryResult . rows [ 0 ] ? . count ? ? null ) ,
secondDirectOpenStatus : secondOpenStatus ,
secondDirectOpenMessage : secondOpenMessage ,
} ,
} ;
const totalPackageBytes = packages . reduce ( ( sum , pkg ) => sum + pkg . bytes , 0 ) ;
const recommendation =
secondOpenStatus === 'opened'
? 'Prototype a PGlite backend behind an explicit owner process or socket before exposing CLI plus MCP concurrent access.'
: 'Use a socket or owner-process architecture for any PGlite backend prototype because direct second opener access was blocked.' ;
const markdown = ` # Hybrid Search PGlite Spike
Generated : $ { result . generatedAt }
# # Summary
PGlite loaded in Node $ { result . node } , enabled vector and pg _trgm extensions , executed Postgres FTS , pgvector cosine ranking , pg _trgm dictionary ranking , and reopened a persistent filesystem database .
Recommendation : $ { recommendation }
# # Package Footprint
| Package | Version | Approx bytes | Resolved path |
| -- - | -- - | -- - : | -- - |
$ { packages . map ( ( pkg ) => ` | \` ${ pkg . name } \` | \` ${ pkg . version } \` | ${ pkg . bytes } | \` ${ pkg . path } \` | ` ) . join ( '\n' ) }
Total measured package bytes : $ { totalPackageBytes }
# # Timings
| Probe | Duration ms |
| -- - | -- - : |
$ { Object . entries ( result . timingsMs )
. map ( ( [ name , ms ] ) => ` | ${ name } | ${ ms } | ` )
. join ( '\n' ) }
# # Search Feature Results
| Probe | Top result |
| -- - | -- - |
| Postgres FTS | \ ` ${ result . topResults . fts } \` |
| pgvector cosine | \ ` ${ result . topResults . vector } \` |
| pg _trgm dictionary | \ ` ${ result . topResults . trigram } \` |
| Reopened persisted row count | \ ` ${ result . topResults . persistedRowCount } \` |
# # Concurrency Observation
Same - instance parallel read counts : \ ` ${ result . concurrency . sameInstanceReadCounts . join ( ', ' ) } \`
Second direct opener status : \ ` ${ result . concurrency . secondDirectOpenStatus } \`
Second direct opener message :
\ ` \` \` text
$ { result . concurrency . secondDirectOpenMessage }
\ ` \` \`
# # Decision
The SQLite backend remains the production default . The next PGlite step , if approved , is an owner - process or socket - backed prototype that reuses the existing \ ` SearchBackendCapabilities \` and backend conformance helpers without changing the public CLI surface.
` ;
await writeFile ( reportPath , markdown ) ;
process . stdout . write ( ` Wrote ${ relative ( process . cwd ( ) , reportPath ) } \n ` ) ;
process . stdout . write ( JSON . stringify ( result , null , 2 ) ) ;
process . stdout . write ( '\n' ) ;
} finally {
if ( db ) {
await closeDb ( db ) ;
}
if ( reopened ) {
await closeDb ( reopened ) ;
}
await rm ( tempDir , { recursive : true , force : true } ) ;
}
}
main ( ) . catch ( ( error ) => {
console . error ( error ) ;
process . exitCode = 1 ;
} ) ;