Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 123 additions & 12 deletions codex-rs/apply-patch/src/invocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ use crate::ApplyPatchArgs;
use crate::ApplyPatchError;
use crate::ApplyPatchFileChange;
use crate::ApplyPatchFileUpdate;
use crate::ApplyPatchOptions;
use crate::IoError;
use crate::MaybeApplyPatchVerified;
use crate::PRESERVE_CRLF_FLAG;
use crate::parser::Hunk;
use crate::parser::ParseError;
use crate::parser::parse_patch;
Expand Down Expand Up @@ -100,23 +102,59 @@ fn extract_apply_patch_from_shell(
}

// TODO: make private once we remove tests in lib.rs
#[cfg_attr(not(test), allow(dead_code))]
pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
maybe_parse_apply_patch_with_options(argv, ApplyPatchOptions::default())
}

pub fn maybe_parse_apply_patch_with_options(
argv: &[String],
options: ApplyPatchOptions,
) -> MaybeApplyPatch {
match argv {
// Direct invocation: apply_patch <patch>
[cmd, body] if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) => match parse_patch(body) {
Ok(source) => MaybeApplyPatch::Body(source),
Err(e) => MaybeApplyPatch::PatchParseError(e),
},
[cmd, body] if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) => {
let body = if options.preserve_crlf {
body.to_string()
} else {
body.replace("\r\n", "\n")
};
match parse_patch(&body) {
Ok(source) => MaybeApplyPatch::Body(source),
Err(e) => MaybeApplyPatch::PatchParseError(e),
}
}
// Direct invocation with CRLF preservation: apply_patch --preserve-crlf <patch>
[cmd, flag, body]
if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) && flag == PRESERVE_CRLF_FLAG =>
{
let body = if options.preserve_crlf {
body.to_string()
} else {
body.replace("\r\n", "\n")
};
match parse_patch(&body) {
Ok(source) => MaybeApplyPatch::Body(source),
Err(e) => MaybeApplyPatch::PatchParseError(e),
}
}
// Shell heredoc form: (optional `cd <path> &&`) apply_patch <<'EOF' ...
_ => match parse_shell_script(argv) {
Some((shell, script)) => match extract_apply_patch_from_shell(shell, script) {
Ok((body, workdir)) => match parse_patch(&body) {
Ok(mut source) => {
source.workdir = workdir;
MaybeApplyPatch::Body(source)
Ok((body, workdir)) => {
let body = if options.preserve_crlf {
body
} else {
body.replace("\r\n", "\n")
};
match parse_patch(&body) {
Ok(mut source) => {
source.workdir = workdir;
MaybeApplyPatch::Body(source)
}
Err(e) => MaybeApplyPatch::PatchParseError(e),
}
Err(e) => MaybeApplyPatch::PatchParseError(e),
},
}
Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch) => {
MaybeApplyPatch::NotApplyPatch
}
Expand All @@ -130,6 +168,14 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
/// cwd must be an absolute path so that we can resolve relative paths in the
/// patch.
pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
maybe_parse_apply_patch_verified_with_options(argv, cwd, ApplyPatchOptions::default())
}

pub fn maybe_parse_apply_patch_verified_with_options(
argv: &[String],
cwd: &Path,
options: ApplyPatchOptions,
) -> MaybeApplyPatchVerified {
// Detect a raw patch body passed directly as the command or as the body of a shell
// script. In these cases, report an explicit error rather than applying the patch.
if let [body] = argv
Expand All @@ -143,7 +189,7 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
}

match maybe_parse_apply_patch(argv) {
match maybe_parse_apply_patch_with_options(argv, options) {
MaybeApplyPatch::Body(ApplyPatchArgs {
patch,
hunks,
Expand Down Expand Up @@ -221,7 +267,9 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
///
/// Supported top‑level forms (must be the only top‑level statement):
/// - `apply_patch <<'EOF'\n...\nEOF`
/// - `apply_patch --preserve-crlf <<'EOF'\n...\nEOF`
/// - `cd <path> && apply_patch <<'EOF'\n...\nEOF`
/// - `cd <path> && apply_patch --preserve-crlf <<'EOF'\n...\nEOF`
///
/// Notes about matching:
/// - Parsed with Tree‑sitter Bash and a strict query that uses anchors so the
Expand All @@ -243,7 +291,9 @@ fn extract_apply_patch_from_bash(
// whole-script forms, each expressed as a single top-level statement:
//
// 1. apply_patch <<'EOF'\n...\nEOF
// 2. cd <path> && apply_patch <<'EOF'\n...\nEOF
// 2. apply_patch --preserve-crlf <<'EOF'\n...\nEOF
// 3. cd <path> && apply_patch <<'EOF'\n...\nEOF
// 4. cd <path> && apply_patch --preserve-crlf <<'EOF'\n...\nEOF
//
// Key ideas when reading the query:
// - dots (`.`) between named nodes enforces adjacency among named children and
Expand Down Expand Up @@ -279,6 +329,21 @@ fn extract_apply_patch_from_bash(
.))
.)

(
program
. (redirected_statement
body: (command
name: (command_name (word) @apply_name)
argument: (word) @apply_flag .)
(#any-of? @apply_name "apply_patch" "applypatch")
(#eq? @apply_flag "--preserve-crlf")
redirect: (heredoc_redirect
. (heredoc_start)
. (heredoc_body) @heredoc
. (heredoc_end)
.))
.)

(
program
. (redirected_statement
Expand All @@ -302,6 +367,32 @@ fn extract_apply_patch_from_bash(
. (heredoc_end)
.))
.)

(
program
. (redirected_statement
body: (list
. (command
name: (command_name (word) @cd_name) .
argument: [
(word) @cd_path
(string (string_content) @cd_path)
(raw_string) @cd_raw_string
] .)
"&&"
. (command
name: (command_name (word) @apply_name)
argument: (word) @apply_flag)
.)
(#eq? @cd_name "cd")
(#any-of? @apply_name "apply_patch" "applypatch")
(#eq? @apply_flag "--preserve-crlf")
redirect: (heredoc_redirect
. (heredoc_start)
. (heredoc_body) @heredoc
. (heredoc_end)
.))
.)
"#,
)
.expect("valid bash query")
Expand Down Expand Up @@ -522,6 +613,26 @@ mod tests {
}
}

#[test]
fn test_literal_with_preserve_crlf_flag() {
let args = strs_to_strings(&[
"apply_patch",
PRESERVE_CRLF_FLAG,
r#"*** Begin Patch
*** Add File: foo
+hi
*** End Patch
"#,
]);

match maybe_parse_apply_patch(&args) {
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, .. }) => {
assert_eq!(hunks, expected_single_add());
}
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
}
}

#[test]
fn test_heredoc() {
assert_match(&heredoc_script(""), None);
Expand Down
25 changes: 24 additions & 1 deletion codex-rs/apply-patch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,19 @@ use similar::TextDiff;
use thiserror::Error;

pub use invocation::maybe_parse_apply_patch_verified;
pub use invocation::maybe_parse_apply_patch_verified_with_options;
pub use standalone_executable::main;

use crate::invocation::ExtractHeredocError;

/// Detailed instructions for gpt-4.1 on how to use the `apply_patch` tool.
pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_tool_instructions.md");

#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct ApplyPatchOptions {
pub preserve_crlf: bool,
}

/// Special argv[1] flag used when the Codex executable self-invokes to run the
/// internal `apply_patch` path.
///
Expand All @@ -34,6 +40,9 @@ pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_too
/// dispatcher.
pub const CODEX_CORE_APPLY_PATCH_ARG1: &str = "--codex-run-as-apply-patch";

/// Flag used to preserve CRLF line endings when applying CRLF patches.
pub const PRESERVE_CRLF_FLAG: &str = "--preserve-crlf";

#[derive(Debug, Error, PartialEq)]
pub enum ApplyPatchError {
#[error(transparent)]
Expand Down Expand Up @@ -185,7 +194,21 @@ pub fn apply_patch(
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
let hunks = match parse_patch(patch) {
apply_patch_with_options(patch, ApplyPatchOptions::default(), stdout, stderr)
}

pub fn apply_patch_with_options(
patch: &str,
options: ApplyPatchOptions,
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
let patch = if options.preserve_crlf {
patch.to_string()
} else {
patch.replace("\r\n", "\n")
};
let hunks = match parse_patch(&patch) {
Ok(source) => source.hunks,
Err(e) => {
match &e {
Expand Down
54 changes: 41 additions & 13 deletions codex-rs/apply-patch/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ enum ParseMode {
}

fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
let lines: Vec<&str> = patch.trim().lines().collect();
let lines: Vec<&str> = patch.trim().split('\n').collect();
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
Ok(()) => &lines,
Err(e) => match mode {
Expand Down Expand Up @@ -206,7 +206,9 @@ fn check_patch_boundaries_lenient<'a>(
) -> Result<&'a [&'a str], ParseError> {
match original_lines {
[first, .., last] => {
if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
let first = first.trim();
let last = last.trim();
if (first == "<<EOF" || first == "<<'EOF'" || first == "<<\"EOF\"")
&& last.ends_with("EOF")
&& original_lines.len() >= 4
{
Expand Down Expand Up @@ -282,9 +284,11 @@ fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), P
let mut parsed_lines = 1;

// Optional: move file line
let move_path = remaining_lines
.first()
.and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
let move_path = remaining_lines.first().and_then(|x| {
x.strip_suffix('\r')
.unwrap_or(x)
.strip_prefix(MOVE_TO_MARKER)
});

if move_path.is_some() {
remaining_lines = &remaining_lines[1..];
Expand Down Expand Up @@ -353,9 +357,10 @@ fn parse_update_file_chunk(
}
// If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally
// allow treating the chunk as starting directly with diff lines.
let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
let first_line = lines[0].strip_suffix('\r').unwrap_or(lines[0]);
let (change_context, start_index) = if first_line == EMPTY_CHANGE_CONTEXT_MARKER {
(None, 1)
} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
} else if let Some(context) = first_line.strip_prefix(CHANGE_CONTEXT_MARKER) {
(Some(context.to_string()), 1)
} else {
if !allow_missing_context {
Expand Down Expand Up @@ -383,7 +388,8 @@ fn parse_update_file_chunk(
};
let mut parsed_lines = 0;
for line in &lines[start_index..] {
match *line {
let line_contents = line.strip_suffix('\r').unwrap_or(line);
match line_contents {
EOF_MARKER => {
if parsed_lines == 0 {
return Err(InvalidHunkError {
Expand All @@ -395,22 +401,22 @@ fn parse_update_file_chunk(
parsed_lines += 1;
break;
}
line_contents => {
_ => {
match line_contents.chars().next() {
None => {
// Interpret this as an empty line.
chunk.old_lines.push(String::new());
chunk.new_lines.push(String::new());
}
Some(' ') => {
chunk.old_lines.push(line_contents[1..].to_string());
chunk.new_lines.push(line_contents[1..].to_string());
chunk.old_lines.push(line[1..].to_string());
chunk.new_lines.push(line[1..].to_string());
}
Some('+') => {
chunk.new_lines.push(line_contents[1..].to_string());
chunk.new_lines.push(line[1..].to_string());
}
Some('-') => {
chunk.old_lines.push(line_contents[1..].to_string());
chunk.old_lines.push(line[1..].to_string());
}
_ => {
if parsed_lines == 0 {
Expand Down Expand Up @@ -669,6 +675,28 @@ fn test_parse_patch_lenient() {
);
}

#[test]
fn test_parse_patch_preserves_crlf() {
let patch_text = "*** Begin Patch\r\n*** Update File: sample.txt\r\n@@\r\n-one\r\n+uno\r\n two\r\n*** End Patch\r\n";
assert_eq!(
parse_patch_text(patch_text, ParseMode::Strict),
Ok(ApplyPatchArgs {
hunks: vec![UpdateFile {
path: PathBuf::from("sample.txt"),
move_path: None,
chunks: vec![UpdateFileChunk {
change_context: None,
old_lines: vec!["one\r".to_string(), "two\r".to_string()],
new_lines: vec!["uno\r".to_string(), "two\r".to_string()],
is_end_of_file: false,
}],
}],
patch: patch_text.trim().to_string(),
workdir: None,
})
);
}

#[test]
fn test_parse_one_hunk() {
assert_eq!(
Expand Down
Loading
Loading