Skip to content

Commit e522b04

Browse files
committed
fix(sync): harden rollback checkpoint recovery
1 parent 2b272c5 commit e522b04

File tree

2 files changed

+112
-5
lines changed

2 files changed

+112
-5
lines changed

lib/codex-cli/sync.ts

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
type AccountStorageV3,
77
findMatchingAccountIndex,
88
getLastAccountsSaveTimestamp,
9+
getRedactedFilesystemErrorLabel,
910
getStoragePath,
1011
type NamedBackupMetadata,
1112
normalizeAccountStorage,
@@ -35,6 +36,8 @@ import {
3536
const log = createLogger("codex-cli-sync");
3637
const RETRYABLE_SELECTION_TIMESTAMP_CODES = new Set(["EBUSY", "EPERM"]);
3738
export const SELECTION_TIMESTAMP_READ_MAX_ATTEMPTS = 4;
39+
const RETRYABLE_ROLLBACK_SAVE_CODES = new Set(["EBUSY", "EAGAIN"]);
40+
const ROLLBACK_SAVE_MAX_ATTEMPTS = 5;
3841

3942
function createEmptyStorage(): AccountStorageV3 {
4043
return {
@@ -442,12 +445,11 @@ async function loadRollbackSnapshot(
442445
storage: normalized,
443446
};
444447
} catch (error) {
448+
const errorLabel = getRedactedFilesystemErrorLabel(error);
445449
const reason =
446450
(error as NodeJS.ErrnoException).code === "ENOENT"
447-
? `Rollback checkpoint is missing at ${snapshot.path}.`
448-
: `Failed to read rollback checkpoint: ${
449-
error instanceof Error ? error.message : String(error)
450-
}`;
451+
? `Rollback checkpoint file not found (snapshot: ${snapshot.name}).`
452+
: `Failed to read rollback checkpoint for snapshot ${snapshot.name} [${errorLabel}].`;
451453
return {
452454
status: "unavailable",
453455
reason,
@@ -456,6 +458,34 @@ async function loadRollbackSnapshot(
456458
}
457459
}
458460

461+
function isRetryableRollbackSaveError(error: unknown): boolean {
462+
const code = (error as NodeJS.ErrnoException).code;
463+
if (typeof code !== "string") {
464+
return false;
465+
}
466+
if (RETRYABLE_ROLLBACK_SAVE_CODES.has(code)) {
467+
return true;
468+
}
469+
return code === "EPERM" && process.platform === "win32";
470+
}
471+
472+
async function saveRollbackStorageWithRetry(storage: AccountStorageV3): Promise<void> {
473+
for (let attempt = 0; attempt < ROLLBACK_SAVE_MAX_ATTEMPTS; attempt += 1) {
474+
try {
475+
await saveAccounts(storage);
476+
return;
477+
} catch (error) {
478+
if (
479+
!isRetryableRollbackSaveError(error) ||
480+
attempt + 1 >= ROLLBACK_SAVE_MAX_ATTEMPTS
481+
) {
482+
throw error;
483+
}
484+
await sleep(10 * 2 ** attempt);
485+
}
486+
}
487+
}
488+
459489
export async function getLatestCodexCliSyncRollbackPlan(): Promise<CodexCliSyncRollbackPlan> {
460490
const lastManualRun = await findLatestManualRollbackRun();
461491
if (!lastManualRun) {
@@ -484,7 +514,7 @@ export async function rollbackLatestCodexCliSync(
484514
}
485515

486516
try {
487-
await saveAccounts(resolvedPlan.storage);
517+
await saveRollbackStorageWithRetry(resolvedPlan.storage);
488518
return {
489519
status: "restored",
490520
reason: resolvedPlan.reason,

test/codex-cli-sync.test.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,10 +1699,15 @@ describe("codex-cli sync", () => {
16991699
const plan = await getLatestCodexCliSyncRollbackPlan();
17001700
expect(plan.status).toBe("unavailable");
17011701
expect(plan.reason).toContain("missing");
1702+
expect(plan.reason).toContain(missingRun.rollbackSnapshot?.name ?? "");
1703+
expect(plan.reason).not.toContain(missingRun.rollbackSnapshot?.path ?? "");
17021704

17031705
const rollbackResult = await rollbackLatestCodexCliSync(plan);
17041706
expect(rollbackResult.status).toBe("unavailable");
17051707
expect(rollbackResult.reason).toContain("missing");
1708+
expect(rollbackResult.reason).not.toContain(
1709+
missingRun.rollbackSnapshot?.path ?? "",
1710+
);
17061711
});
17071712

17081713
it.each([
@@ -1821,6 +1826,78 @@ describe("codex-cli sync", () => {
18211826
saveSpy.mockRestore();
18221827
});
18231828

1829+
it("retries transient rollback save failures before succeeding", async () => {
1830+
const snapshotPath = join(tempDir, "rollback-retry-snapshot.json");
1831+
await writeFile(
1832+
snapshotPath,
1833+
JSON.stringify(
1834+
{
1835+
version: 3,
1836+
accounts: [
1837+
{
1838+
accountId: "acc_old",
1839+
accountIdSource: "token",
1840+
email: "old@example.com",
1841+
refreshToken: "refresh-old",
1842+
accessToken: "access-old",
1843+
addedAt: 1,
1844+
lastUsed: 1,
1845+
},
1846+
],
1847+
activeIndex: 0,
1848+
activeIndexByFamily: { codex: 0 },
1849+
} satisfies AccountStorageV3,
1850+
null,
1851+
2,
1852+
),
1853+
"utf-8",
1854+
);
1855+
1856+
const recordedRun: CodexCliSyncRun = {
1857+
outcome: "changed",
1858+
runAt: 10,
1859+
sourcePath: accountsPath,
1860+
targetPath: targetStoragePath,
1861+
summary: {
1862+
sourceAccountCount: 1,
1863+
targetAccountCountBefore: 1,
1864+
targetAccountCountAfter: 1,
1865+
addedAccountCount: 0,
1866+
updatedAccountCount: 1,
1867+
unchangedAccountCount: 0,
1868+
destinationOnlyPreservedCount: 0,
1869+
selectionChanged: false,
1870+
},
1871+
trigger: "manual",
1872+
rollbackSnapshot: {
1873+
name: "accounts-codex-cli-sync-snapshot-retry",
1874+
path: snapshotPath,
1875+
},
1876+
};
1877+
await appendSyncHistoryEntry({
1878+
kind: "codex-cli-sync",
1879+
recordedAt: recordedRun.runAt,
1880+
run: recordedRun,
1881+
});
1882+
1883+
const transientError = Object.assign(new Error("save busy"), {
1884+
code: "EBUSY",
1885+
});
1886+
const saveSpy = vi
1887+
.spyOn(storageModule, "saveAccounts")
1888+
.mockRejectedValueOnce(transientError)
1889+
.mockResolvedValueOnce(undefined);
1890+
1891+
const plan = await getLatestCodexCliSyncRollbackPlan();
1892+
expect(plan.status).toBe("ready");
1893+
1894+
const rollbackResult = await rollbackLatestCodexCliSync(plan);
1895+
expect(rollbackResult.status).toBe("restored");
1896+
expect(saveSpy).toHaveBeenCalledTimes(2);
1897+
1898+
saveSpy.mockRestore();
1899+
});
1900+
18241901
it("re-reads Codex CLI state on apply when forceRefresh is requested", async () => {
18251902
await writeFile(
18261903
accountsPath,

0 commit comments

Comments
 (0)