fix: bundle pdf-parse, xlsx, papaparse, mammoth for packaged Electron app

These four packages are loaded via _importDynamic (new Function pattern) in
builtin-tools.ts to prevent esbuild from statically bundling pdfjs-dist's DOM
polyfills into the Electron main process. As a result, esbuild cannot inline
them into main.cjs, and they are not available at runtime in the packaged app.

Two changes to fix this:

1. bundle.mjs: mark the four packages as esbuild external so the generated
   main.cjs emits require() calls for them rather than inlining them.

2. forge.config.cjs: after bundling, recursively collect all transitive and
   optional dependencies of the four packages from the pnpm store and copy
   them into .package/node_modules/. Optional deps are included because
   @napi-rs/canvas (required by pdfjs-dist for DOMMatrix polyfills) ships
   its native binaries as optional platform-specific packages.

Fixes: pdf-parse, xlsx, papaparse, mammoth all fail with
'Cannot find package' in the packaged .app (issue #486).
This commit is contained in:
Deepak Bhagat 2026-04-21 20:39:07 +05:30
parent 2133d7226f
commit bcf354a4e2
2 changed files with 60 additions and 2 deletions

View file

@ -7,6 +7,13 @@
* - Bundling inlines all dependencies into a single file, eliminating node_modules
*
* This script is called by the generateAssets hook in forge.config.js before packaging.
*
* Why pdf-parse, xlsx, papaparse, mammoth are marked external:
* - builtin-tools.ts loads these via _importDynamic (new Function pattern) to prevent
* esbuild from statically bundling pdfjs-dist's DOM polyfills into the main process.
* - Because esbuild cannot see through the dynamic import, these packages must be
* available as real node_modules at runtime instead.
* - forge.config.cjs copies them into .package/node_modules after bundling.
*/
import * as esbuild from 'esbuild';
@ -16,13 +23,17 @@ import * as esbuild from 'esbuild';
// and we use define to replace all import.meta.url references with it.
const cjsBanner = `var __import_meta_url = require('url').pathToFileURL(__filename).href;`;
// These packages are loaded at runtime via _importDynamic and cannot be bundled.
// They must be present in node_modules alongside the app bundle.
const RUNTIME_EXTERNAL = ['pdf-parse', 'xlsx', 'papaparse', 'mammoth'];
await esbuild.build({
entryPoints: ['./dist/main.js'],
bundle: true,
platform: 'node',
target: 'node20',
outfile: './.package/dist/main.cjs',
external: ['electron'], // Provided by Electron runtime
external: ['electron', ...RUNTIME_EXTERNAL],
// Use CommonJS format - many dependencies use require() which doesn't work
// well with esbuild's ESM shim. CJS handles dynamic requires natively.
format: 'cjs',
@ -34,4 +45,4 @@ await esbuild.build({
},
});
console.log('✅ Main process bundled to .package/dist-bundle/main.js');
console.log('✅ Main process bundled to .package/dist/main.cjs');

View file

@ -170,6 +170,53 @@ module.exports = {
fs.mkdirSync(rendererDest, { recursive: true });
fs.cpSync(rendererSrc, rendererDest, { recursive: true });
// Copy runtime-external packages and their transitive deps into .package/node_modules.
// These are loaded via _importDynamic at runtime and cannot be bundled
// by esbuild (doing so would pull pdfjs-dist DOM polyfills into the
// Electron main process). They must exist as real node_modules.
const runtimeRoots = ['pdf-parse', 'xlsx', 'papaparse', 'mammoth'];
const pnpmModules = path.join(__dirname, '../../node_modules/.pnpm');
const destModules = path.join(packageDir, 'node_modules');
fs.mkdirSync(destModules, { recursive: true });
// pnpm stores @scope/pkg as @scope+pkg@version in the .pnpm directory.
// Inside that versioned dir, the package lives at node_modules/@scope/pkg.
function pnpmDirName(pkg) { return pkg.replace('/', '+'); }
// Recursively collect all transitive + optional deps from the pnpm store.
// optionalDeps are included because @napi-rs/canvas (needed by pdfjs-dist)
// ships its native binaries as optional platform-specific packages.
function collectDeps(pkgName, visited = new Set()) {
if (visited.has(pkgName)) return visited;
visited.add(pkgName);
const dirName = pnpmDirName(pkgName);
const entries = fs.readdirSync(pnpmModules).filter(e => e.startsWith(`${dirName}@`));
if (!entries.length) return visited;
const pkgJson = path.join(pnpmModules, entries[0], 'node_modules', pkgName, 'package.json');
if (!fs.existsSync(pkgJson)) return visited;
const d = JSON.parse(fs.readFileSync(pkgJson, 'utf8'));
const allDeps = { ...d.dependencies, ...d.optionalDependencies };
for (const dep of Object.keys(allDeps)) collectDeps(dep, visited);
return visited;
}
const allPkgs = new Set();
for (const root of runtimeRoots) collectDeps(root, allPkgs);
for (const pkg of allPkgs) {
const dirName = pnpmDirName(pkg);
const entries = fs.readdirSync(pnpmModules).filter(e => e.startsWith(`${dirName}@`));
if (!entries.length) continue;
const pkgSrc = path.join(pnpmModules, entries[0], 'node_modules', pkg);
if (!fs.existsSync(pkgSrc)) continue;
// Scoped packages (@scope/pkg) need their parent dir created first
const pkgDestDir = path.join(destModules, path.dirname(pkg));
if (!fs.existsSync(pkgDestDir)) fs.mkdirSync(pkgDestDir, { recursive: true });
const pkgDest = path.join(destModules, pkg);
if (!fs.existsSync(pkgDest)) fs.cpSync(pkgSrc, pkgDest, { recursive: true });
}
console.log(`📦 Copied ${allPkgs.size} packages (parseFile runtime deps) → .package/node_modules/`);
console.log('✅ All assets staged in .package/');
},
}