diff --git a/README.md b/README.md index 8d7db61b..ea682379 100644 --- a/README.md +++ b/README.md @@ -184,9 +184,11 @@ jsdiff's diff functions all take an old text and a new text and perform three st Once all patches have been applied or an error occurs, the `options.complete(err)` callback is made. -* `parsePatch(diffStr)` - Parses a patch into structured data +* `parsePatch(diffStr)` - Parses a unified diff format patch into a structured patch object. - Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`. + Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`, except that `oldFileName` and `newFileName` may be `undefined` if the patch doesn't contain enough information to determine them (e.g. a hunk-only patch with no file headers). + + `parsePatch` has some understanding of [Git's particular dialect of unified diff format](https://git-scm.com/docs/git-diff#generate_patch_text_with_p). In particular, it can extract filenames from the patch headers or extended headers of Git patches that contain no hunks and no file headers, including ones representing a file being renamed without changes. (However, it ignores many extended headers that describe things irrelevant to jsdiff's patch representation, like mode changes.) * `reversePatch(patch)` - Returns a new structured patch which when applied will undo the original `patch`. @@ -360,6 +362,70 @@ applyPatches(patch, { }); ``` +##### Applying a multi-file Git patch that may include renames + +[Git patches](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) can include file renames and copies (with or without content changes), which need to be handled in the callbacks you provide to `applyPatches`. `parsePatch` sets `isRename` or `isCopy` on the structured patch object so you can distinguish these cases. Patches can also potentially include file *swaps* (renaming `a → b` and `b → a`), in which case it is incorrect to simply apply each change atomically in sequence. The pattern with the `pendingWrites` Map below handles all of these nuances: + +``` +const {applyPatches} = require('diff'); +const patch = fs.readFileSync("git-diff.patch").toString(); +const DELETE = Symbol('delete'); +const pendingWrites = new Map(); // filePath → {content, mode} or DELETE sentinel +applyPatches(patch, { + loadFile: (patch, callback) => { + if (patch.isCreate) { + // Newly created file — no old content to load + callback(undefined, ''); + return; + } + try { + // Git diffs use a/ and b/ prefixes; strip them to get the real path + const filePath = patch.oldFileName.replace(/^a\//, ''); + callback(undefined, fs.readFileSync(filePath).toString()); + } catch (e) { + callback(`No such file: ${patch.oldFileName}`); + } + }, + patched: (patch, patchedContent, callback) => { + if (patchedContent === false) { + callback(`Failed to apply patch to ${patch.oldFileName}`); + return; + } + const oldPath = patch.oldFileName.replace(/^a\//, ''); + const newPath = patch.newFileName.replace(/^b\//, ''); + if (patch.isDelete) { + if (!pendingWrites.has(oldPath)) { + pendingWrites.set(oldPath, DELETE); + } + } else { + pendingWrites.set(newPath, {content: patchedContent, mode: patch.newMode}); + // For renames, delete the old file (but not for copies, + // where the old file should be kept) + if (patch.isRename && !pendingWrites.has(oldPath)) { + pendingWrites.set(oldPath, DELETE); + } + } + callback(); + }, + complete: (err) => { + if (err) { + console.log("Failed with error:", err); + return; + } + for (const [filePath, entry] of pendingWrites) { + if (entry === DELETE) { + fs.unlinkSync(filePath); + } else { + fs.writeFileSync(filePath, entry.content); + if (entry.mode) { + fs.chmodSync(filePath, parseInt(entry.mode, 8) & 0o777); + } + } + } + } +}); +``` + ## Compatibility jsdiff should support all ES5 environments. If you find one that it doesn't support, please [open an issue](https://github.com/kpdecker/jsdiff/issues). diff --git a/release-notes.md b/release-notes.md index 68e5db6d..2862cfc7 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,43 @@ # Release Notes +## 9.0.0 (prerelease) + +TODO: +- Tidy up AI slop below the --- +- Note support for parsing quoted filenames in +++ and --- headers (even outside Git patches as `diff -u` outputs these) + - Also in formatPatch +- Note fixes to #640 and #648 +- Note fix to formatPatch in case where file name is undefined (prev emitted 'undefined' literally) + +--- + + +- **`parsePatch` now robustly handles Git-style diffs.** Previously, `parsePatch` had inconsistent regex usage that caused several bugs when parsing `diff --git` output: + * Multi-file Git diffs containing hunk-less entries (e.g. mode-only changes, binary files, rename-only entries without content changes) could cause file entries to be merged together or lost entirely. + * Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, `old mode`/`new mode`, `index`, etc.) were not parsed and could cause parse errors. + * `diff --git` was not consistently recognized as a diff header, leading to missing `index` entries and other subtle issues. + + The parser now: + * Correctly recognizes `diff --git` headers and parses filenames from them, including C-style quoted filenames (used by Git when paths contain tabs, newlines, backslashes, or double quotes). + * Consumes Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, mode changes, similarity index, etc.) without choking. + * Handles hunk-less entries (rename-only, mode-only, binary) as distinct file entries rather than merging them into adjacent entries. + * Sets metadata flags on the resulting `StructuredPatch`: `isGit` (always, for Git diffs), `isRename`, `isCopy`, `isCreate`, `isDelete` (when the corresponding extended headers are present), and `oldMode`/`newMode` (parsed from `old mode`, `new mode`, `deleted file mode`, or `new file mode` headers). This lets consumers distinguish renames (where the old file should be deleted) from copies (where it should be kept), detect file creations and deletions, and preserve file mode information. + * Uses consistent, centralized helper functions for header detection instead of duplicated regexes. + +- **`reversePatch` now correctly reverses copy patches.** Reversing a copy produces a deletion (the reversed patch has `newFileName` set to `'/dev/null'` and `isCopy`/`isRename` unset), since undoing a copy means deleting the file that was created. Reversing a rename still produces a rename in the opposite direction, as before. + +- **`formatPatch` now supports Git-style patches.** When a `StructuredPatch` has `isGit: true`, `formatPatch` emits a `diff --git` header (instead of `Index:` / underline) and the appropriate Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, `deleted file mode`, `new file mode`, `old mode`/`new mode`) based on the patch's metadata flags. File headers (`---`/`+++`) are omitted on hunk-less Git patches (e.g. pure renames, mode-only changes), matching Git's own output. This means `parsePatch` output can be round-tripped through `formatPatch`. + +- **`formatPatch` now gracefully handles patches with undefined filenames** instead of emitting nonsensical headers like `--- undefined`. If `oldFileName` or `newFileName` is `undefined`, the `---`/`+++` file headers and the `Index:` line are silently omitted. This is consistent with how such patches can arise from parsing Git diffs that lack `---`/`+++` lines. + +- **README: added documentation and example for applying Git patches that include renames, copies, deletions, and file creations** using `applyPatches`. + +### Breaking changes + +- **The `oldFileName` and `newFileName` fields of `StructuredPatch` are now typed as `string | undefined` instead of `string`.** This reflects the reality that `parsePatch` can produce patches without filenames (e.g. when parsing a Git diff with an unparseable `diff --git` header and no `---`/`+++` fallback). TypeScript users who access these fields without null checks will see type errors and should update their code to handle the `undefined` case. + +- **`StructuredPatch` has new optional fields for Git metadata:** `isGit`, `isRename`, `isCopy`, `isCreate`, `isDelete`, `oldMode`, and `newMode`. These are set by `parsePatch` when parsing Git diffs. Code that does exact deep-equality checks (e.g. `assert.deepEqual`) against `StructuredPatch` objects from `parsePatch` may need updating to account for the new fields. + ## 8.0.4 (prerelease) - [#667](https://github.com/kpdecker/jsdiff/pull/667) - **fix another bug in `diffWords` when used with an `Intl.Segmenter`**. If the text to be diffed included a combining mark after a whitespace character (i.e. roughly speaking, an accented space), `diffWords` would previously crash. Now this case is handled correctly. diff --git a/src/patch/create.ts b/src/patch/create.ts index 13c2a2b1..86737c06 100644 --- a/src/patch/create.ts +++ b/src/patch/create.ts @@ -1,6 +1,77 @@ import {diffLines} from '../diff/line.js'; import type { StructuredPatch, DiffLinesOptionsAbortable, DiffLinesOptionsNonabortable, AbortableDiffOptions, ChangeObject } from '../types.js'; +/** + * Returns true if the filename contains characters that require C-style + * quoting (as used by Git and GNU diffutils in diff output). + */ +function needsQuoting(s: string): boolean { + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + if (c < 0x20 || c > 0x7e || s[i] === '"' || s[i] === '\\') { + return true; + } + } + return false; +} + +/** + * C-style quotes a filename, encoding special characters as escape sequences + * and non-ASCII bytes as octal escapes. This is the inverse of + * `parseQuotedFileName` in parse.ts. + * + * Non-ASCII bytes are encoded as UTF-8 before being emitted as octal escapes. + * This matches the behaviour of both Git and GNU diffutils, which always emit + * UTF-8 octal escapes regardless of the underlying filesystem encoding (e.g. + * Git for Windows converts from NTFS's UTF-16 to UTF-8 internally). + * + * If the filename doesn't need quoting, returns it as-is. + */ +function quoteFileNameIfNeeded(s: string): string { + if (!needsQuoting(s)) { + return s; + } + + let result = '"'; + const bytes = new TextEncoder().encode(s); + let i = 0; + while (i < bytes.length) { + const b = bytes[i]; + + // See https://en.wikipedia.org/wiki/Escape_sequences_in_C#Escape_sequences + if (b === 0x07) { + result += '\\a'; + } else if (b === 0x08) { + result += '\\b'; + } else if (b === 0x09) { + result += '\\t'; + } else if (b === 0x0a) { + result += '\\n'; + } else if (b === 0x0b) { + result += '\\v'; + } else if (b === 0x0c) { + result += '\\f'; + } else if (b === 0x0d) { + result += '\\r'; + } else if (b === 0x22) { + result += '\\"'; + } else if (b === 0x5c) { + result += '\\\\'; + } else if (b >= 0x20 && b <= 0x7e) { + // Just a printable ASCII character that is neither a double quote nor a + // backslash; no need to escape it. + result += String.fromCharCode(b); + } else { + // Either part of a non-ASCII character or a control character without a + // special escape sequence; needs escaping as as 3-digit octal escape + result += '\\' + b.toString(8).padStart(3, '0'); + } + i++; + } + result += '"'; + return result; +} + type StructuredPatchCallbackAbortable = (patch: StructuredPatch | undefined) => void; type StructuredPatchCallbackNonabortable = (patch: StructuredPatch) => void; @@ -292,15 +363,44 @@ export function formatPatch(patch: StructuredPatch | StructuredPatch[], headerOp } const ret = []; - if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName) { - ret.push('Index: ' + patch.oldFileName); - } - if (headerOptions.includeUnderline) { - ret.push('==================================================================='); + + if (patch.isGit) { + // Emit Git-style diff --git header and extended headers + ret.push('diff --git ' + quoteFileNameIfNeeded(patch.oldFileName ?? '') + ' ' + quoteFileNameIfNeeded(patch.newFileName ?? '')); + if (patch.isDelete) { + ret.push('deleted file mode ' + (patch.oldMode ?? '100644')); + } + if (patch.isCreate) { + ret.push('new file mode ' + (patch.newMode ?? '100644')); + } + if (patch.oldMode && patch.newMode && !patch.isDelete && !patch.isCreate) { + ret.push('old mode ' + patch.oldMode); + ret.push('new mode ' + patch.newMode); + } + if (patch.isRename) { + ret.push('rename from ' + quoteFileNameIfNeeded((patch.oldFileName ?? '').replace(/^a\//, ''))); + ret.push('rename to ' + quoteFileNameIfNeeded((patch.newFileName ?? '').replace(/^b\//, ''))); + } + if (patch.isCopy) { + ret.push('copy from ' + quoteFileNameIfNeeded((patch.oldFileName ?? '').replace(/^a\//, ''))); + ret.push('copy to ' + quoteFileNameIfNeeded((patch.newFileName ?? '').replace(/^b\//, ''))); + } + } else { + if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName && patch.oldFileName !== undefined) { + ret.push('Index: ' + patch.oldFileName); + } + if (headerOptions.includeUnderline) { + ret.push('==================================================================='); + } } - if (headerOptions.includeFileHeaders) { - ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); - ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); + + // Emit --- / +++ file headers. For Git patches with no hunks (e.g. + // pure renames, mode-only changes), Git omits these, so we do too. + const hasHunks = patch.hunks.length > 0; + if (headerOptions.includeFileHeaders && patch.oldFileName !== undefined && patch.newFileName !== undefined + && (!patch.isGit || hasHunks)) { + ret.push('--- ' + quoteFileNameIfNeeded(patch.oldFileName) + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); + ret.push('+++ ' + quoteFileNameIfNeeded(patch.newFileName) + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); } for (let i = 0; i < patch.hunks.length; i++) { diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 29356d61..eb18a6b8 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -10,40 +10,173 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { list: Partial[] = []; let i = 0; + // These helper functions identify line types that can appear between files + // in a multi-file patch. Keeping them in one place avoids subtle + // inconsistencies from having the same regexes duplicated in multiple places. + + // Matches `diff --git ...` lines specifically. + function isGitDiffHeader(line: string): boolean { + return (/^diff --git /).test(line); + } + + // Matches lines that denote the start of a new diff's section in a + // multi-file patch: `diff --git ...`, `Index: ...`, or `diff -r ...`. + function isDiffHeader(line: string): boolean { + return isGitDiffHeader(line) + || (/^Index:\s/).test(line) + || (/^diff(?: -r \w+)+\s/).test(line); + } + + // Matches `--- ...` and `+++ ...` file header lines. + function isFileHeader(line: string): boolean { + return (/^(---|\+\+\+)\s/).test(line); + } + + // Matches `@@ ...` hunk header lines. + function isHunkHeader(line: string): boolean { + return (/^@@\s/).test(line); + } + function parseIndex() { const index: Partial = {}; + index.hunks = []; list.push(index); // Parse diff metadata + let seenDiffHeader = false; while (i < diffstr.length) { const line = diffstr[i]; - // File header found, end parsing diff metadata - if ((/^(---|\+\+\+|@@)\s/).test(line)) { + // File header (---, +++) or hunk header (@@) found; end parsing diff metadata + if (isFileHeader(line) || isHunkHeader(line)) { break; } - // Try to parse the line as a diff header, like - // Index: README.md - // or - // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore - // or - // Index: something with multiple words - // and extract the filename (or whatever else is used as an index name) - // from the end (i.e. 'README.md', '.hgignore', or - // 'something with multiple words' in the examples above). + // The next two branches handle recognized diff headers. Note that + // isDiffHeader deliberately does NOT match arbitrary `diff` + // commands like `diff -u -p -r1.1 -r1.2`, because in some + // formats (e.g. CVS diffs) such lines appear as metadata within + // a single file's header section, after an `Index:` line. See the + // diffx documentation (https://diffx.org) for examples. // - // TODO: It seems awkward that we indiscriminately trim off trailing - // whitespace here. Theoretically, couldn't that be meaningful - - // e.g. if the patch represents a diff of a file whose name ends - // with a space? Seems wrong to nuke it. - // But this behaviour has been around since v2.2.1 in 2015, so if - // it's going to change, it should be done cautiously and in a new - // major release, for backwards-compat reasons. - // -- ExplodingCabbage - const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line); - if (headerMatch) { - index.index = line.substring(headerMatch[0].length).trim(); + // In both branches: if we've already seen a diff header for *this* + // file and now we encounter another one, it must belong to the + // next file, so break. + + if (isGitDiffHeader(line)) { + if (seenDiffHeader) { + return; + } + seenDiffHeader = true; + index.isGit = true; + + // Parse the old and new filenames from the `diff --git` header and + // tentatively set oldFileName and newFileName from them. These may + // be overridden below by `rename from` / `rename to` or `copy from` / + // `copy to` extended headers, or by --- and +++ lines. But for Git + // diffs that lack all of those (e.g. mode-only changes, binary + // file changes without rename), these are the only filenames we + // get. + // parseGitDiffHeader returns null if the header can't be parsed + // (e.g. unterminated quoted filename, or unexpected format). In + // that case we skip setting filenames here; they may still be + // set from --- / +++ or rename from / rename to lines below. + const paths = parseGitDiffHeader(line); + if (paths) { + index.oldFileName = paths.oldFileName; + index.newFileName = paths.newFileName; + } + + // Consume Git extended headers (`old mode`, `new mode`, `rename from`, + // `rename to`, `similarity index`, `index`, `Binary files ... differ`, + // etc.) + i++; + while (i < diffstr.length) { + const extLine = diffstr[i]; + + // Stop consuming extended headers if we hit a file header, + // hunk header, or another diff header. + if (isFileHeader(extLine) || isHunkHeader(extLine) || isDiffHeader(extLine)) { + break; + } + + // Parse `rename from` / `rename to` lines - these give us + // unambiguous filenames. These lines don't include the + // a/ and b/ prefixes that appear in the `diff --git` header + // and --- / +++ lines, so we add them for consistency. + // Git C-style quotes filenames containing special characters + // (tabs, newlines, backslashes, double quotes), so we must + // unquote them when present. + const renameFromMatch = (/^rename from (.*)/).exec(extLine); + if (renameFromMatch) { + index.oldFileName = 'a/' + unquoteIfQuoted(renameFromMatch[1]); + index.isRename = true; + } + const renameToMatch = (/^rename to (.*)/).exec(extLine); + if (renameToMatch) { + index.newFileName = 'b/' + unquoteIfQuoted(renameToMatch[1]); + index.isRename = true; + } + + // Parse copy from / copy to lines similarly + const copyFromMatch = (/^copy from (.*)/).exec(extLine); + if (copyFromMatch) { + index.oldFileName = 'a/' + unquoteIfQuoted(copyFromMatch[1]); + index.isCopy = true; + } + const copyToMatch = (/^copy to (.*)/).exec(extLine); + if (copyToMatch) { + index.newFileName = 'b/' + unquoteIfQuoted(copyToMatch[1]); + index.isCopy = true; + } + + const newFileModeMatch = (/^new file mode (\d+)/).exec(extLine); + if (newFileModeMatch) { + index.isCreate = true; + index.newMode = newFileModeMatch[1]; + } + const deletedFileModeMatch = (/^deleted file mode (\d+)/).exec(extLine); + if (deletedFileModeMatch) { + index.isDelete = true; + index.oldMode = deletedFileModeMatch[1]; + } + const oldModeMatch = (/^old mode (\d+)/).exec(extLine); + if (oldModeMatch) { + index.oldMode = oldModeMatch[1]; + } + const newModeMatch = (/^new mode (\d+)/).exec(extLine); + if (newModeMatch) { + index.newMode = newModeMatch[1]; + } + + i++; + } + continue; + } else if (isDiffHeader(line)) { + if (seenDiffHeader) { + return; + } + seenDiffHeader = true; + + // For Mercurial-style headers like + // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore + // or Index: headers like + // Index: something with multiple words + // we extract the trailing filename as the index. + // + // TODO: It seems awkward that we indiscriminately trim off + // trailing whitespace here. Theoretically, couldn't that + // be meaningful - e.g. if the patch represents a diff of a + // file whose name ends with a space? Seems wrong to nuke + // it. But this behaviour has been around since v2.2.1 in + // 2015, so if it's going to change, it should be done + // cautiously and in a new major release, for + // backwards-compat reasons. + // -- ExplodingCabbage + const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line); + if (headerMatch) { + index.index = line.substring(headerMatch[0].length).trim(); + } } i++; @@ -54,14 +187,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { parseFileHeader(index); parseFileHeader(index); - // Parse hunks - index.hunks = []; - while (i < diffstr.length) { const line = diffstr[i]; - if ((/^(Index:\s|diff\s|---\s|\+\+\+\s|===================================================================)/).test(line)) { + if (isDiffHeader(line) || isFileHeader(line) || (/^===================================================================/).test(line)) { break; - } else if ((/^@@/).test(line)) { + } else if (isHunkHeader(line)) { index.hunks.push(parseHunk()); } else if (line) { throw new Error('Unknown line ' + (i + 1) + ' ' + JSON.stringify(line)); @@ -71,6 +201,190 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { } } + /** + * Parses the old and new filenames from a `diff --git` header line. + * + * The format is: + * diff --git a/ b/ + * + * When filenames contain special characters (including newlines, tabs, + * backslashes, or double quotes), Git quotes them with C-style escaping: + * diff --git "a/file\twith\ttabs.txt" "b/file\twith\ttabs.txt" + * + * When filenames don't contain special characters and the old and new names + * are the same, we can unambiguously split on ` b/` by finding where the + * two halves (including their a/ and b/ prefixes) yield matching bare names. + * + * A pathological case exists in which we cannot reliably determine the paths + * from the `diff --git` header. This case is when the following are true: + * - the old and new file paths differ + * - they are both unquoted (i.e. contain no special characters) + * - at least one of the underlying file paths includes the substring ` b/` + * In this scenario, we do not know which occurrence of ` b/` indicates the + * start of the new file path, so the header is inherently ambiguous. We thus + * select a possible interpretation arbitrarily and return that. + * + * Fortunately, this ambiguity should never matter, because in any patch + * genuinely output by Git in which this pathological scenario occurs, there + * must also be `rename from`/`rename to` or `copy from`/`copy to` extended + * headers present below the `diff --git` header. `parseIndex` will parse + * THOSE headers, from which we CAN unambiguously determine the filenames, + * and will discard the result returned by this function. + * + * Returns null if the header can't be parsed at all — e.g. a quoted filename + * has an unterminated quote, or if the unquoted header doesn't match the + * expected `a/... b/...` format. In that case, the caller (parseIndex) + * skips setting oldFileName/newFileName from this header, but they may + * still be set later from `---`/`+++` lines or `rename from`/`rename to` + * extended headers; if none of those are present either, they'll remain + * undefined in the output. + */ + function parseGitDiffHeader(line: string): { oldFileName: string, newFileName: string } | null { + // Strip the "diff --git " prefix + const rest = line.substring('diff --git '.length); + + // Handle quoted paths: "a/path" "b/path" + // Git quotes paths when they contain characters like newlines, tabs, + // backslashes, or double quotes (but notably not spaces). + if (rest.startsWith('"')) { + const oldPath = parseQuotedFileName(rest); + if (oldPath === null) { return null; } + const afterOld = rest.substring(oldPath.rawLength + 1); // +1 for space + let newFileName: string; + if (afterOld.startsWith('"')) { + const newPath = parseQuotedFileName(afterOld); + if (newPath === null) { return null; } + newFileName = newPath.fileName; + } else { + newFileName = afterOld; + } + return { + oldFileName: oldPath.fileName, + newFileName + }; + } + + // Check if the second path is quoted + // e.g. diff --git a/simple "b/renamed\nnewline.txt" + const quoteIdx = rest.indexOf('"'); + if (quoteIdx > 0) { + const oldFileName = rest.substring(0, quoteIdx - 1); + const newPath = parseQuotedFileName(rest.substring(quoteIdx)); + if (newPath === null) { return null; } + return { + oldFileName, + newFileName: newPath.fileName + }; + } + + // Unquoted paths. Try to find the split point. + // The format is: a/ b/ + // + // Note the potential ambiguity caused by the possibility of the file paths + // themselves containing the substring ` b/`, plus the pathological case + // described in the comment above. + // + // Strategy: find all occurrences of " b/" and split on the middle + // one. When old and new names are the same (which is the only case where + // we can't rely on extended headers later in the patch so HAVE to get + // this right), this will always be the correct split. + if (rest.startsWith('a/')) { + const splits = []; + let idx = 0; + while (true) { + idx = rest.indexOf(' b/', idx + 1); + if (idx === -1) { break; } + splits.push(idx); + } + if (splits.length > 0) { + const mid = splits[Math.floor(splits.length / 2)]; + return { + oldFileName: rest.substring(0, mid), + newFileName: rest.substring(mid + 1) + }; + } + } + + // Fallback: can't parse, return null + return null; + } + + /** + * If `s` starts with a double quote, unquotes it using C-style escape + * rules (as used by Git). Otherwise returns `s` as-is. + */ + function unquoteIfQuoted(s: string): string { + if (s.startsWith('"')) { + const parsed = parseQuotedFileName(s); + if (parsed) { + return parsed.fileName; + } + } + return s; + } + + /** + * Parses a C-style quoted filename as used by Git or GNU `diff -u`. + * Returns the unescaped filename and the raw length consumed (including quotes). + */ + function parseQuotedFileName(s: string): { fileName: string, rawLength: number } | null { + if (!s.startsWith('"')) { return null; } + let result = ''; + let j = 1; // skip opening quote + while (j < s.length) { + if (s[j] === '"') { + return { fileName: result, rawLength: j + 1 }; + } + if (s[j] === '\\' && j + 1 < s.length) { + j++; + switch (s[j]) { + case 'a': result += '\x07'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case 'v': result += '\v'; break; + case '\\': result += '\\'; break; + case '"': result += '"'; break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + // C-style octal escapes represent raw bytes. Collect + // consecutive octal-escaped bytes and decode as UTF-8. + // Validate that we have a full 3-digit octal escape + if (j + 2 >= s.length || s[j + 1] < '0' || s[j + 1] > '7' || s[j + 2] < '0' || s[j + 2] > '7') { + return null; + } + const bytes = [parseInt(s.substring(j, j + 3), 8)]; + j += 3; + while (s[j] === '\\' && s[j + 1] >= '0' && s[j + 1] <= '7') { + if (j + 3 >= s.length || s[j + 2] < '0' || s[j + 2] > '7' || s[j + 3] < '0' || s[j + 3] > '7') { + return null; + } + bytes.push(parseInt(s.substring(j + 1, j + 4), 8)); + j += 4; + } + result += new TextDecoder('utf-8').decode(new Uint8Array(bytes)); + continue; // j already points at the next character + } + // Note that in C, there are also three kinds of hex escape sequences: + // - \xhh + // - \uhhhh + // - \Uhhhhhhhh + // We do not bother to parse them here because, so far as we know, + // they are never emitted by any tools that generate unified diff + // format diffs, and so for now jsdiff does not consider them legal. + default: return null; + } + } else { + result += s[j]; + } + j++; + } + // Unterminated quote + return null; + } + // Parses the --- and +++ headers, if none are found, no lines // are consumed. function parseFileHeader(index: Partial) { @@ -79,9 +393,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { const prefix = fileHeaderMatch[1], data = diffstr[i].substring(3).trim().split('\t', 2), header = (data[1] || '').trim(); - let fileName = data[0].replace(/\\\\/g, '\\'); - if (fileName.startsWith('"') && fileName.endsWith('"')) { - fileName = fileName.substr(1, fileName.length - 2); + let fileName = data[0]; + if (fileName.startsWith('"')) { + fileName = unquoteIfQuoted(fileName); + } else { + fileName = fileName.replace(/\\\\/g, '\\'); } if (prefix === '---') { index.oldFileName = fileName; diff --git a/src/patch/reverse.ts b/src/patch/reverse.ts index 65a5abc3..4cd62e44 100644 --- a/src/patch/reverse.ts +++ b/src/patch/reverse.ts @@ -13,7 +13,7 @@ export function reversePatch(structuredPatch: StructuredPatch | StructuredPatch[ return structuredPatch.map(patch => reversePatch(patch)).reverse(); } - return { + const reversed: StructuredPatch = { ...structuredPatch, oldFileName: structuredPatch.newFileName, oldHeader: structuredPatch.newHeader, @@ -33,4 +33,18 @@ export function reversePatch(structuredPatch: StructuredPatch | StructuredPatch[ }; }) }; + + if (structuredPatch.isCopy) { + // Reversing a copy means deleting the file that was created by the copy. + // The "old" file in the reversed patch is the copy destination (which + // exists and should be removed), and the "new" file is /dev/null. + reversed.newFileName = '/dev/null'; + reversed.newHeader = undefined; + delete reversed.isCopy; + delete reversed.isRename; + } + // Reversing a rename is just a rename in the opposite direction; + // isRename stays set and the filenames are already swapped above. + + return reversed; } diff --git a/src/types.ts b/src/types.ts index 1ae11370..ebaf3821 100644 --- a/src/types.ts +++ b/src/types.ts @@ -225,12 +225,50 @@ export type AllDiffOptions = DiffJsonOptions; export interface StructuredPatch { - oldFileName: string, - newFileName: string, + oldFileName: string | undefined, + newFileName: string | undefined, oldHeader: string | undefined, newHeader: string | undefined, hunks: StructuredPatchHunk[], index?: string, + /** + * Set to true when the patch was parsed from a Git-style diff (one with a + * `diff --git` header). Controls whether `formatPatch` emits a `diff --git` + * header (instead of `Index:` / underline headers) when formatting the patch. + */ + isGit?: boolean, + /** + * Set to true when parsing a Git diff that includes `rename from`/`rename to` + * extended headers, indicating the file was renamed (and the old file no + * longer exists). Consumers applying the patch should delete the old file. + */ + isRename?: boolean, + /** + * Set to true when parsing a Git diff that includes `copy from`/`copy to` + * extended headers, indicating the file was copied (and the old file still + * exists). Consumers applying the patch should NOT delete the old file. + */ + isCopy?: boolean, + /** + * Set to true when parsing a Git diff that includes a `new file mode` extended + * header, indicating the file was newly created. + */ + isCreate?: boolean, + /** + * Set to true when parsing a Git diff that includes a `deleted file mode` + * extended header, indicating the file was deleted. + */ + isDelete?: boolean, + /** + * The file mode (e.g. `'100644'`, `'100755'`) of the old file, parsed from + * Git extended headers (`old mode` or `deleted file mode`). + */ + oldMode?: string, + /** + * The file mode (e.g. `'100644'`, `'100755'`) of the new file, parsed from + * Git extended headers (`new mode` or `new file mode`). + */ + newMode?: string, } export interface StructuredPatchHunk { diff --git a/test/patch/create.js b/test/patch/create.js index 204818d8..b47efe8a 100644 --- a/test/patch/create.js +++ b/test/patch/create.js @@ -1175,6 +1175,196 @@ describe('patch/create', function() { // eslint-disable-next-line dot-notation expect(() => formatPatch(patchArray, OMIT_HEADERS)).to.throw(); }); + + it('should silently skip headers when filenames are undefined', function() { + const patchWithNoFilenames = { + oldFileName: undefined, + newFileName: undefined, + oldHeader: undefined, + newHeader: undefined, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + }] + }; + // All header options should silently skip headers when filenames + // are undefined, rather than emitting "--- undefined" etc. + const expectedOutput = + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n'; + const expectedWithUnderline = + '===================================================================\n' + + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n'; + expect(formatPatch(patchWithNoFilenames, OMIT_HEADERS)).to.equal(expectedOutput); + expect(formatPatch(patchWithNoFilenames, FILE_HEADERS_ONLY)).to.equal(expectedOutput); + // INCLUDE_HEADERS still emits the underline, just skips Index and file headers + expect(formatPatch(patchWithNoFilenames, INCLUDE_HEADERS)).to.equal(expectedWithUnderline); + expect(formatPatch(patchWithNoFilenames)).to.equal(expectedWithUnderline); + // includeIndex: true with undefined filenames should also skip silently + expect(formatPatch(patchWithNoFilenames, { + includeIndex: true, + includeUnderline: false, + includeFileHeaders: false + })).to.equal(expectedOutput); + }); + + it('should emit diff --git header for patches with isGit flag', function() { + const patch = { + oldFileName: 'a/file.txt', + newFileName: 'b/file.txt', + oldHeader: '', + newHeader: '', + isGit: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/file.txt b/file.txt\n' + + '--- a/file.txt\t\n' + + '+++ b/file.txt\t\n' + + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n' + ); + }); + + it('should emit rename headers for patches with isGit and isRename', function() { + const patch = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: undefined, + newHeader: undefined, + isGit: true, + isRename: true, + hunks: [] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/old.txt b/new.txt\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + ); + }); + + it('should emit copy headers for patches with isGit and isCopy', function() { + const patch = { + oldFileName: 'a/original.txt', + newFileName: 'b/copy.txt', + oldHeader: undefined, + newHeader: undefined, + isGit: true, + isCopy: true, + hunks: [] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/original.txt b/copy.txt\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + ); + }); + + it('should emit new file mode header for patches with isGit and isCreate', function() { + const patch = { + oldFileName: '/dev/null', + newFileName: 'b/newfile.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isCreate: true, + hunks: [{ + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 1, + lines: ['+hello'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git /dev/null b/newfile.txt\n' + + 'new file mode 100644\n' + + '--- /dev/null\t\n' + + '+++ b/newfile.txt\t\n' + + '@@ -0,0 +1,1 @@\n' + + '+hello\n' + ); + }); + + it('should emit deleted file mode header for patches with isGit and isDelete', function() { + const patch = { + oldFileName: 'a/doomed.txt', + newFileName: '/dev/null', + oldHeader: '', + newHeader: '', + isGit: true, + isDelete: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 0, + lines: ['-goodbye'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/doomed.txt /dev/null\n' + + 'deleted file mode 100644\n' + + '--- a/doomed.txt\t\n' + + '+++ /dev/null\t\n' + + '@@ -1,1 +0,0 @@\n' + + '-goodbye\n' + ); + }); + + it('should emit rename headers with file headers when hunks are present', function() { + const patch = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isRename: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/old.txt b/new.txt\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + + '--- a/old.txt\t\n' + + '+++ b/new.txt\t\n' + + '@@ -1,1 +1,1 @@\n' + + '-aaa\n' + + '+bbb\n' + ); + }); + + it('should round-trip a Git rename patch through formatPatch and parsePatch', function() { + const original = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isRename: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + }] + }; + const formatted = formatPatch(original); + const parsed = parsePatch(formatted); + expect(parsed).to.have.length(1); + expect(parsed[0].oldFileName).to.equal('a/old.txt'); + expect(parsed[0].newFileName).to.equal('b/new.txt'); + expect(parsed[0].isGit).to.equal(true); + expect(parsed[0].isRename).to.equal(true); + }); }); }); }); diff --git a/test/patch/parse.js b/test/patch/parse.js index ed604ab1..f5c86616 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -710,5 +710,768 @@ line3 // eslint-disable-next-line dot-notation expect(() => {parsePatch(patchStr);}).to.throw('Hunk at line 5 contained invalid line line3'); }); + + it('should parse a single-file diff --git patch', function() { + expect(parsePatch( +`diff --git a/file.txt b/file.txt +index abc1234..def5678 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }]); + }); + + it('should parse a multi-file diff --git patch', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +index abc1234..def5678 100644 +--- a/file1.txt ++++ b/file1.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4 +diff --git a/file2.txt b/file2.txt +index 1234567..abcdef0 100644 +--- a/file2.txt ++++ b/file2.txt +@@ -1,3 +1,4 @@ + lineA + lineB ++lineC + lineD`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }, { + oldFileName: 'a/file2.txt', + oldHeader: '', + newFileName: 'b/file2.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' lineA', + ' lineB', + '+lineC', + ' lineD' + ] + } + ] + }]); + }); + + it('should parse a diff --git rename with no content change', function() { + expect(parsePatch( +`diff --git a/README.md b/README-2.md +similarity index 100% +rename from README.md +rename to README-2.md`)) + .to.eql([{ + oldFileName: 'a/README.md', + newFileName: 'b/README-2.md', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should parse a diff --git rename with content change', function() { + expect(parsePatch( +`diff --git a/old-name.txt b/new-name.txt +similarity index 85% +rename from old-name.txt +rename to new-name.txt +index abc1234..def5678 100644 +--- a/old-name.txt ++++ b/new-name.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4`)) + .to.eql([{ + oldFileName: 'a/old-name.txt', + oldHeader: '', + newFileName: 'b/new-name.txt', + newHeader: '', + isGit: true, + isRename: true, + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }]); + }); + + it('should parse a diff --git mode-only change', function() { + expect(parsePatch( +`diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/script.sh', + newFileName: 'b/script.sh', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should parse a diff --git binary file change', function() { + expect(parsePatch( +`diff --git a/image.png b/image.png +index abc1234..def5678 100644 +Binary files a/image.png and b/image.png differ`)) + .to.eql([{ + oldFileName: 'a/image.png', + newFileName: 'b/image.png', + isGit: true, + hunks: [] + }]); + }); + + it('should not lose files when a diff --git binary change is followed by a text change', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +--- a/file1.txt ++++ b/file1.txt +@@ -1 +1 @@ +-old ++new +diff --git a/image.png b/image.png +Binary files a/image.png and b/image.png differ +diff --git a/file3.txt b/file3.txt +--- a/file3.txt ++++ b/file3.txt +@@ -1 +1 @@ +-foo ++bar`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }, { + oldFileName: 'a/image.png', + newFileName: 'b/image.png', + isGit: true, + hunks: [] + }, { + oldFileName: 'a/file3.txt', + oldHeader: '', + newFileName: 'b/file3.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-foo', '+bar'] + } + ] + }]); + }); + + it('should not lose files when a diff --git mode-only change is in the middle', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +--- a/file1.txt ++++ b/file1.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4 +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +diff --git a/file3.txt b/file3.txt +--- a/file3.txt ++++ b/file3.txt +@@ -1,2 +1,3 @@ + aaa ++bbb + ccc`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }, { + oldFileName: 'a/script.sh', + newFileName: 'b/script.sh', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }, { + oldFileName: 'a/file3.txt', + oldHeader: '', + newFileName: 'b/file3.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 2, + newStart: 1, newLines: 3, + lines: [ + ' aaa', + '+bbb', + ' ccc' + ] + } + ] + }]); + }); + + it('should parse a diff --git copy', function() { + expect(parsePatch( +`diff --git a/original.txt b/copy.txt +similarity index 100% +copy from original.txt +copy to copy.txt`)) + .to.eql([{ + oldFileName: 'a/original.txt', + newFileName: 'b/copy.txt', + isGit: true, + hunks: [], + isCopy: true + }]); + }); + + it('should parse a diff --git new file', function() { + expect(parsePatch( +`diff --git a/newfile.txt b/newfile.txt +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/newfile.txt +@@ -0,0 +1,2 @@ ++hello ++world`)) + .to.eql([{ + oldFileName: '/dev/null', + oldHeader: '', + newFileName: 'b/newfile.txt', + newHeader: '', + isGit: true, + isCreate: true, + newMode: '100644', + hunks: [ + { + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 2, + lines: ['+hello', '+world'] + } + ] + }]); + }); + + it('should parse a diff --git deleted file', function() { + expect(parsePatch( +`diff --git a/old.txt b/old.txt +deleted file mode 100644 +index ce01362..0000000 +--- a/old.txt ++++ /dev/null +@@ -1 +0,0 @@ +-goodbye`)) + .to.eql([{ + oldFileName: 'a/old.txt', + oldHeader: '', + newFileName: '/dev/null', + newHeader: '', + isGit: true, + isDelete: true, + oldMode: '100644', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 0, + lines: ['-goodbye'] + } + ] + }]); + }); + + it('should parse a diff --git empty file creation (no --- / +++ or hunks)', function() { + expect(parsePatch( +`diff --git a/empty.txt b/empty.txt +new file mode 100644 +index 0000000..e69de29`)) + .to.eql([{ + oldFileName: 'a/empty.txt', + newFileName: 'b/empty.txt', + isGit: true, + isCreate: true, + newMode: '100644', + hunks: [] + }]); + }); + + it('should parse a diff --git empty file deletion (no --- / +++ or hunks)', function() { + expect(parsePatch( +`diff --git a/empty.txt b/empty.txt +deleted file mode 100644 +index e69de29..0000000`)) + .to.eql([{ + oldFileName: 'a/empty.txt', + newFileName: 'b/empty.txt', + isGit: true, + isDelete: true, + oldMode: '100644', + hunks: [] + }]); + }); + + it('should parse diff --git with quoted filenames containing spaces', function() { + expect(parsePatch( +`diff --git "a/file with spaces.txt" "b/file with spaces.txt" +index abc1234..def5678 100644 +--- "a/file with spaces.txt" ++++ "b/file with spaces.txt" +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file with spaces.txt', + oldHeader: '', + newFileName: 'b/file with spaces.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should parse diff --git rename with quoted filenames', function() { + expect(parsePatch( +`diff --git "a/old name.txt" "b/new name.txt" +similarity index 100% +rename from old name.txt +rename to new name.txt`)) + .to.eql([{ + oldFileName: 'a/old name.txt', + newFileName: 'b/new name.txt', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should unquote C-style quoted filenames in rename from/to', function() { + expect(parsePatch( +`diff --git "a/file\\twith\\ttabs.txt" b/normal.txt +similarity index 100% +rename from "file\\twith\\ttabs.txt" +rename to normal.txt`)) + .to.eql([{ + oldFileName: 'a/file\twith\ttabs.txt', + newFileName: 'b/normal.txt', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should handle all Git C-style escape sequences in quoted filenames', function() { + expect(parsePatch( +`diff --git "a/\\a\\b\\f\\r\\v\\001file.txt" "b/\\a\\b\\f\\r\\v\\001file.txt" +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/\x07\b\f\r\v\x01file.txt', + newFileName: 'b/\x07\b\f\r\v\x01file.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should handle multi-byte UTF-8 octal escapes in quoted filenames (emoji)', function() { + // 🎉 is U+1F389, UTF-8 bytes F0 9F 8E 89 = octal 360 237 216 211 + expect(parsePatch( +`diff --git "a/file\\360\\237\\216\\211.txt" "b/file\\360\\237\\216\\211.txt" +new file mode 100644 +index 0000000..ce01362 +--- /dev/null ++++ "b/file\\360\\237\\216\\211.txt" +@@ -0,0 +1 @@ ++hello`)) + .to.eql([{ + oldFileName: '/dev/null', + oldHeader: '', + newFileName: 'b/file🎉.txt', + newHeader: '', + isGit: true, + isCreate: true, + newMode: '100644', + hunks: [ + { + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 1, + lines: ['+hello'] + } + ] + }]); + }); + + it('should handle multi-byte UTF-8 octal escapes in quoted filenames (accented latin)', function() { + // é is U+00E9, UTF-8 bytes C3 A9 = octal 303 251 + expect(parsePatch( +`diff --git "a/caf\\303\\251.txt" "b/caf\\303\\251.txt" +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/café.txt', + newFileName: 'b/café.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should unquote C-style quoted filenames in copy from/to', function() { + expect(parsePatch( +`diff --git a/original.txt "b/copy\\nwith\\nnewlines.txt" +similarity index 100% +copy from original.txt +copy to "copy\\nwith\\nnewlines.txt"`)) + .to.eql([{ + oldFileName: 'a/original.txt', + newFileName: 'b/copy\nwith\nnewlines.txt', + isGit: true, + hunks: [], + isCopy: true + }]); + }); + + it('should let --- and +++ lines override filenames from diff --git header', function() { + // When --- and +++ are present, they should take precedence over + // the filenames parsed from the diff --git header line. + expect(parsePatch( +`diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should not be confused by a diff --git rename followed by files with hunks', function() { + expect(parsePatch( +`diff --git a/old.txt b/new.txt +similarity index 100% +rename from old.txt +rename to new.txt +diff --git a/other.txt b/other.txt +--- a/other.txt ++++ b/other.txt +@@ -1 +1 @@ +-aaa ++bbb`)) + .to.eql([{ + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + isGit: true, + hunks: [], + isRename: true + }, { + oldFileName: 'a/other.txt', + oldHeader: '', + newFileName: 'b/other.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + } + ] + }]); + }); + + it('should parse diff --git with unquoted filenames containing spaces (same old and new)', function() { + expect(parsePatch( +`diff --git a/file with spaces.txt b/file with spaces.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/file with spaces.txt', + newFileName: 'b/file with spaces.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should parse diff --git rename with unquoted filenames containing spaces', function() { + // The diff --git line alone is ambiguous when filenames contain spaces + // and old != new, but rename from / rename to resolve the ambiguity. + expect(parsePatch( +`diff --git a/file with spaces.txt b/another file with spaces.txt +similarity index 100% +rename from file with spaces.txt +rename to another file with spaces.txt`)) + .to.eql([{ + oldFileName: 'a/file with spaces.txt', + newFileName: 'b/another file with spaces.txt', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should handle diff --git with a filename containing " b/"', function() { + // The filename literally contains " b/" which is also the separator + // between the old and new paths. Since old === new, the parser can + // find the unique split where both halves match. + expect(parsePatch( +`diff --git a/x b/y.txt b/x b/y.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/x b/y.txt', + newFileName: 'b/x b/y.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should handle diff --git rename where filenames contain " b/"', function() { + // rename from / rename to lines are unambiguous (one filename per + // line) so " b/" in the name is not a problem for them. The + // diff --git header IS ambiguous, but rename from/to override it. + expect(parsePatch( +`diff --git a/x b/old.txt b/x b/new.txt +similarity index 100% +rename from x b/old.txt +rename to x b/new.txt`)) + .to.eql([{ + oldFileName: 'a/x b/old.txt', + newFileName: 'b/x b/new.txt', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should handle diff --git rename where filenames contain " b/", without rename from/to', function() { + // Without rename from/to, the diff --git header is ambiguous when + // filenames contain " b/". But --- and +++ lines resolve it. + expect(parsePatch( +`diff --git a/x b/old.txt b/x b/new.txt +--- a/x b/old.txt ++++ b/x b/new.txt +@@ -1 +1 @@ +-hello ++world`)) + .to.eql([{ + oldFileName: 'a/x b/old.txt', + oldHeader: '', + newFileName: 'b/x b/new.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-hello', '+world'] + } + ] + }]); + }); + + // So far as we know, Git never actually produces diff --git headers that + // can't be parsed (e.g. with unterminated quotes or missing a/b prefixes). + // But we test these cases to confirm parsePatch doesn't crash and instead + // gracefully falls back to getting filenames from --- / +++ lines. + + it('should handle an unparseable diff --git header with unterminated quote', function() { + expect(parsePatch( +`diff --git "a/unterminated +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should handle an unparseable diff --git header with no a/b prefixes', function() { + expect(parsePatch( +`diff --git file.txt file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should handle an incomplete octal escape in a quoted filename', function() { + // The quoted filename has a truncated octal escape (\36 instead of \360). + // parseQuotedFileName should return null, so parseGitDiffHeader returns + // null and we fall back to --- / +++ lines for filenames. + expect(parsePatch( +`diff --git "a/file\\36" "b/file\\36" +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should handle an unparseable diff --git header with no --- or +++ fallback', function() { + // When both the diff --git header is unparseable AND there are no + // --- / +++ lines, filenames remain undefined. + expect(parsePatch( +`diff --git file.txt file.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); }); }); diff --git a/test/patch/readme-rename-example.js b/test/patch/readme-rename-example.js new file mode 100644 index 00000000..833b255c --- /dev/null +++ b/test/patch/readme-rename-example.js @@ -0,0 +1,240 @@ +import {applyPatches} from '../../libesm/patch/apply.js'; + +import {expect} from 'chai'; +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +describe('README Git rename example', function() { + let tmpDir; + let originalCwd; + + beforeEach(function() { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsdiff-readme-test-')); + originalCwd = process.cwd(); + process.chdir(tmpDir); + }); + + afterEach(function() { + process.chdir(originalCwd); + fs.rmSync(tmpDir, {recursive: true, force: true}); + }); + + /** + * Extract the Git rename example code from the README and return it as a + * function that takes (applyPatches, patch, fs, path) and runs the example. + */ + function getReadmeExampleFn() { + const readme = fs.readFileSync( + path.join(__dirname, '../../README.md'), + 'utf-8' + ); + + // Find the heading + const headingIndex = readme.indexOf('##### Applying a multi-file Git patch that may include renames'); + if (headingIndex === -1) { + throw new Error('Could not find the Git rename example heading in README.md'); + } + + // Find the code block after the heading + const afterHeading = readme.substring(headingIndex); + const codeBlockStart = afterHeading.indexOf('\n```\n'); + if (codeBlockStart === -1) { + throw new Error('Could not find the code block in the Git rename example'); + } + const codeStart = codeBlockStart + 4; // skip past the \n```\n + const codeBlockEnd = afterHeading.indexOf('\n```\n', codeStart); + if (codeBlockEnd === -1) { + throw new Error('Could not find the end of the code block in the Git rename example'); + } + + let code = afterHeading.substring(codeStart, codeBlockEnd); + + // Strip the require line — we'll provide applyPatches as an argument. + // Strip the fs.readFileSync for the patch — we'll provide patch as an argument. + code = code + .replace(/const \{applyPatches\}.*\n/, '') + .replace(/const patch = .*\n/, ''); + + // eslint-disable-next-line no-new-func + return new Function('applyPatches', 'patch', 'fs', 'path', code); + } + + it('should handle a simple rename with content change', function() { + fs.writeFileSync('old.txt', 'line1\nline2\nline3\n'); + + const patch = +`diff --git a/old.txt b/new.txt +similarity index 80% +rename from old.txt +rename to new.txt +--- a/old.txt ++++ b/new.txt +@@ -1,3 +1,3 @@ + line1 +-line2 ++line2modified + line3 +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.existsSync('old.txt')).to.equal(false); + expect(fs.readFileSync('new.txt', 'utf-8')) + .to.equal('line1\nline2modified\nline3\n'); + }); + + it('should handle a swap rename (a→b, b→a)', function() { + fs.writeFileSync('a.txt', 'content of a\n'); + fs.writeFileSync('b.txt', 'content of b\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 100% +rename from a.txt +rename to b.txt +diff --git a/b.txt b/a.txt +similarity index 100% +rename from b.txt +rename to a.txt +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('content of b\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('content of a\n'); + }); + + it('should handle a swap rename with content changes', function() { + fs.writeFileSync('a.txt', 'aaa\n'); + fs.writeFileSync('b.txt', 'bbb\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 50% +rename from a.txt +rename to b.txt +--- a/a.txt ++++ b/b.txt +@@ -1 +1 @@ +-aaa ++aaa-modified +diff --git a/b.txt b/a.txt +similarity index 50% +rename from b.txt +rename to a.txt +--- a/b.txt ++++ b/a.txt +@@ -1 +1 @@ +-bbb ++bbb-modified +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('bbb-modified\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('aaa-modified\n'); + }); + + it('should handle a three-way rotation (a→b, b→c, c→a)', function() { + fs.writeFileSync('a.txt', 'content of a\n'); + fs.writeFileSync('b.txt', 'content of b\n'); + fs.writeFileSync('c.txt', 'content of c\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 100% +rename from a.txt +rename to b.txt +diff --git a/b.txt b/c.txt +similarity index 100% +rename from b.txt +rename to c.txt +diff --git a/c.txt b/a.txt +similarity index 100% +rename from c.txt +rename to a.txt +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('content of c\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('content of a\n'); + expect(fs.readFileSync('c.txt', 'utf-8')).to.equal('content of b\n'); + }); + + it('should handle a file deletion', function() { + fs.writeFileSync('doomed.txt', 'goodbye\n'); + + const patch = +`diff --git a/doomed.txt b/doomed.txt +deleted file mode 100644 +index 2b31011..0000000 +--- a/doomed.txt ++++ /dev/null +@@ -1 +0,0 @@ +-goodbye +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.existsSync('doomed.txt')).to.equal(false); + }); + + it('should handle a file creation', function() { + const patch = +`diff --git a/brand-new.txt b/brand-new.txt +new file mode 100644 +index 0000000..fa49b07 +--- /dev/null ++++ b/brand-new.txt +@@ -0,0 +1 @@ ++hello world +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('brand-new.txt', 'utf-8')).to.equal('hello world\n'); + }); + + it('should create a new executable file with correct mode', function() { + const patch = +`diff --git a/run.sh b/run.sh +new file mode 100755 +index 0000000..abc1234 +--- /dev/null ++++ b/run.sh +@@ -0,0 +1,2 @@ ++#!/bin/bash ++echo hello +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('run.sh', 'utf-8')).to.equal('#!/bin/bash\necho hello\n'); + const mode = fs.statSync('run.sh').mode & 0o777; + expect(mode).to.equal(0o755); + }); + + it('should set the mode when a file is modified with a mode change', function() { + fs.writeFileSync('script.sh', 'echo old\n'); + fs.chmodSync('script.sh', 0o644); + + const patch = +`diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +--- a/script.sh ++++ b/script.sh +@@ -1 +1 @@ +-echo old ++echo new +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('script.sh', 'utf-8')).to.equal('echo new\n'); + const mode = fs.statSync('script.sh').mode & 0o777; + expect(mode).to.equal(0o755); + }); +}); diff --git a/test/patch/reverse.js b/test/patch/reverse.js index d151b439..bd8e6213 100644 --- a/test/patch/reverse.js +++ b/test/patch/reverse.js @@ -61,7 +61,7 @@ describe('patch/reverse', function() { '+bar\n' ); expect(formatPatch(reversePatch(patch))).to.equal( - '===================================================================\n' + + 'diff --git b/README.md a/README.md\n' + '--- b/README.md\t\n' + '+++ a/README.md\t\n' + '@@ -1,7 +1,5 @@\n' + @@ -79,7 +79,7 @@ describe('patch/reverse', function() { '-\n' + '-bar\n' + '\n' + - '===================================================================\n' + + 'diff --git b/CONTRIBUTING.md a/CONTRIBUTING.md\n' + '--- b/CONTRIBUTING.md\t\n' + '+++ a/CONTRIBUTING.md\t\n' + '@@ -2,8 +2,6 @@\n' + @@ -93,5 +93,90 @@ describe('patch/reverse', function() { ' Generally we like to see pull requests that\n' ); }); + + it('should reverse a rename patch into a rename in the opposite direction', function() { + const patch = parsePatch( + 'diff --git a/old.txt b/new.txt\n' + + 'similarity index 85%\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + + '--- a/old.txt\n' + + '+++ b/new.txt\n' + + '@@ -1,3 +1,3 @@\n' + + ' line1\n' + + '-line2\n' + + '+line2modified\n' + + ' line3\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/new.txt'); + expect(reversed[0].newFileName).to.equal('a/old.txt'); + expect(reversed[0].isRename).to.equal(true); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.have.length(1); + expect(reversed[0].hunks[0].lines).to.eql([ + ' line1', + '+line2', + '-line2modified', + ' line3' + ]); + }); + + it('should reverse a copy patch into a deletion', function() { + const patch = parsePatch( + 'diff --git a/original.txt b/copy.txt\n' + + 'similarity index 85%\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + + '--- a/original.txt\n' + + '+++ b/copy.txt\n' + + '@@ -1,3 +1,3 @@\n' + + ' line1\n' + + '-line2\n' + + '+line2modified\n' + + ' line3\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + // Reversing a copy means deleting the copy destination + expect(reversed[0].oldFileName).to.equal('b/copy.txt'); + expect(reversed[0].newFileName).to.equal('/dev/null'); + expect(reversed[0].newHeader).to.equal(undefined); + expect(reversed[0].isRename).to.equal(undefined); + expect(reversed[0].isCopy).to.equal(undefined); + }); + + it('should reverse a hunk-less copy into a deletion', function() { + const patch = parsePatch( + 'diff --git a/original.txt b/copy.txt\n' + + 'similarity index 100%\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/copy.txt'); + expect(reversed[0].newFileName).to.equal('/dev/null'); + expect(reversed[0].isRename).to.equal(undefined); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.eql([]); + }); + + it('should reverse a hunk-less rename', function() { + const patch = parsePatch( + 'diff --git a/old.txt b/new.txt\n' + + 'similarity index 100%\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/new.txt'); + expect(reversed[0].newFileName).to.equal('a/old.txt'); + expect(reversed[0].isRename).to.equal(true); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.eql([]); + }); }); });