fix(git): refuse squash-merge into a dirty main working tree

The auto_commit:false path (stageSquashMergeIntoMain) leaves main staged, but the
shared squash helper assumed a clean target. A later ingest/memory run merging
into that dirty index would 'git commit' the prior run's staged files under the
new run's commit (contamination), and conflict cleanup's 'reset --hard HEAD'
would discard them (data loss).

Guard applySquashToIndex: if the target worktree has uncommitted tracked changes,
refuse before merging and return a 'dirty' result (untracked/gitignored files are
ignored — the squash never commits them). Callers surface it cleanly: the bundle
runner fails the run with an actionable message; the memory agent rolls back its
eager DB writes (like a conflict) so the DB never gets ahead of main. Main is
left untouched in every case.
This commit is contained in:
Andrey Avtomonov 2026-06-09 14:27:53 +02:00
parent 36ee12ff06
commit f446d207ba
5 changed files with 135 additions and 10 deletions

View file

@ -27,13 +27,21 @@ export interface WorktreeEntry {
head: string | null;
}
/**
* `dirty` means the target (main) working tree had uncommitted tracked changes before the
* merge typically residue from a prior `auto_commit: false` run that was never committed.
* Squashing into it would either commit those unrelated changes or, on conflict cleanup,
* `reset --hard` them away, so the merge is refused and the tree is left untouched.
*/
export type SquashMergeResult =
| { ok: true; squashSha: string; touchedPaths: string[] }
| { ok: false; conflict: true; conflictPaths: string[] };
| { ok: false; conflict: true; conflictPaths: string[] }
| { ok: false; dirty: true; dirtyPaths: string[] };
export type StageSquashResult =
| { ok: true; touchedPaths: string[]; stagedTree: string }
| { ok: false; conflict: true; conflictPaths: string[] };
| { ok: false; conflict: true; conflictPaths: string[] }
| { ok: false; dirty: true; dirtyPaths: string[] };
function mergeErrorMessage(error: unknown): string {
if (error instanceof Error) {
@ -731,7 +739,28 @@ export class GitService {
*/
private async applySquashToIndex(
branch: string,
): Promise<{ ok: true; touchedPaths: string[] } | { ok: false; conflict: true; conflictPaths: string[] }> {
): Promise<
| { ok: true; touchedPaths: string[] }
| { ok: false; conflict: true; conflictPaths: string[] }
| { ok: false; dirty: true; dirtyPaths: string[] }
> {
// The squash flow commits the whole index and, on conflict, `reset --hard`s the tree, so
// it must start from a clean target. Refuse if the target has uncommitted tracked changes
// (e.g. residue from a prior `auto_commit: false` run) rather than committing them under
// this run's message or discarding them. Untracked files (gitignored .ktx state, stray
// files) are not committed by the squash and are intentionally ignored here.
const dirtyPaths = (await this.git.raw(['status', '--porcelain', '--untracked-files=no']))
.split('\n')
.map((line) => line.slice(3).trim())
.filter(Boolean);
if (dirtyPaths.length > 0) {
this.logger.warn(
`applySquashToIndex: target worktree has ${dirtyPaths.length} uncommitted change(s) ` +
`(${dirtyPaths.slice(0, 5).join(', ')}); refusing to squash ${branch} to avoid contaminating or discarding them`,
);
return { ok: false, dirty: true, dirtyPaths };
}
// Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that
// aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch)
// is symmetric difference and would mis-classify cases where main moved ahead.

View file

@ -2698,6 +2698,13 @@ export class IngestBundleRunner {
});
if (!squashResult.merge.ok) {
await this.deps.runs.markFailed(runRow.id);
if ('dirty' in squashResult.merge) {
throw new Error(
'The project working tree has uncommitted changes ' +
`(${squashResult.merge.dirtyPaths.slice(0, 5).join(', ')}); commit or discard them before ingesting ` +
'(this typically means a previous run with storage.git.auto_commit: false was left staged).',
);
}
throw new Error(`squash merge conflict: ${squashResult.merge.conflictPaths.join(', ')}`);
}
const touchedPaths = squashResult.merge.touchedPaths;

View file

@ -271,8 +271,18 @@ export class MemoryAgentService {
);
if (!mergeResult.ok) {
// Both conflict and dirty-target mean "not landed" — roll back the eager session
// writes so the DB doesn't get ahead of main, and leave main untouched.
sessionOutcome = 'conflict';
sessionConflictPaths = mergeResult.conflictPaths;
if ('dirty' in mergeResult) {
this.logger.warn(
`[memory-agent] chat=${chatId} not landed: project working tree has uncommitted changes ` +
`(${mergeResult.dirtyPaths.slice(0, 5).join(', ')}); commit or discard them and re-run ` +
'(a previous run with auto_commit disabled may have been left staged).',
);
} else {
sessionConflictPaths = mergeResult.conflictPaths;
}
await this.rollbackDbForAbortedSession(session, actions);
} else if (mergeResult.touchedPaths.length === 0) {
sessionOutcome = 'empty';

View file

@ -449,8 +449,8 @@ describe('GitService', () => {
const result = await service.stageSquashMergeIntoMain('session/stage-conflict');
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
if (result.ok || !('conflict' in result)) {
throw new Error('expected a conflict');
}
expect(result.conflictPaths).toContain('conflict.md');
expect(await service.revParseHead()).toBe(mainHead);
@ -460,6 +460,67 @@ describe('GitService', () => {
});
});
describe('refuses to squash into a dirty main worktree', () => {
it('reports dirty without committing, merging, or discarding pre-existing staged changes', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-dirty`);
await service.addWorktree(wtDir, 'session/dirty', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'from-branch.yaml'), 'b: 1\n', 'utf-8');
await scoped.commitFile('from-branch.yaml', 'wip', 'System User', 'system@example.com');
// Residue from a prior auto_commit:false run: a staged change on main.
await writeFile(join(tempDir, 'pending.md'), 'pending work\n', 'utf-8');
await createSimpleGit(tempDir).add('pending.md');
const result = await service.squashMergeIntoMain('session/dirty', 'System User', 'system@example.com', 'msg');
expect(result.ok).toBe(false);
if (result.ok || !('dirty' in result)) {
throw new Error('expected a dirty refusal');
}
expect(result.dirtyPaths).toContain('pending.md');
// Main is untouched: HEAD unchanged, branch not merged, pending change preserved.
expect(await service.revParseHead()).toBe(baseSha);
await expect(readFile(join(tempDir, 'pending.md'), 'utf-8')).resolves.toBe('pending work\n');
const staged = await createSimpleGit(tempDir).raw(['diff', '--cached', '--name-only']);
expect(staged).toContain('pending.md');
const branchFileLanded = await stat(join(tempDir, 'from-branch.yaml'))
.then(() => true)
.catch(() => false);
expect(branchFileLanded).toBe(false);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('stageSquashMergeIntoMain also refuses on a dirty main', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-dirty-stage`);
await service.addWorktree(wtDir, 'session/dirty-stage', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'from-branch.yaml'), 'b: 1\n', 'utf-8');
await scoped.commitFile('from-branch.yaml', 'wip', 'System User', 'system@example.com');
await writeFile(join(tempDir, 'pending.md'), 'pending work\n', 'utf-8');
await createSimpleGit(tempDir).add('pending.md');
const result = await service.stageSquashMergeIntoMain('session/dirty-stage');
expect(result.ok).toBe(false);
if (result.ok || !('dirty' in result)) {
throw new Error('expected a dirty refusal');
}
expect(result.dirtyPaths).toContain('pending.md');
expect(await service.revParseHead()).toBe(baseSha);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('squashMergeIntoMain', () => {
it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
@ -544,8 +605,8 @@ describe('GitService', () => {
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
if (result.ok || !('conflict' in result)) {
throw new Error('expected a conflict');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toContain('shared.yaml');
@ -576,8 +637,8 @@ describe('GitService', () => {
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
if (result.ok || !('conflict' in result)) {
throw new Error('expected a conflict');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toEqual(['knowledge.md']);

View file

@ -365,6 +365,24 @@ describe('MemoryAgentService.ingest — session-branch orchestration', () => {
expect(result.actions).toEqual([]);
});
it('dirty-target path: rolls back DB and does not land when main has uncommitted changes', async () => {
const mocks = buildMocks();
mocks.gitService.squashMergeIntoMain.mockResolvedValue({
ok: false,
dirty: true,
dirtyPaths: ['pending.md'],
});
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
// Treated as a not-landed abort: rolled back, no commit, no message-enhancement job.
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'conflict', expect.any(Object));
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled();
expect(result.commitHash).toBeNull();
expect(result.actions).toEqual([]);
});
it('crash path: post-loop step throws → cleanup(crash), commitHash=null', async () => {
const mocks = buildMocks();
// Force the cross-ref reconciler to throw, escaping into the outer try/catch and