From 14ad0ea9b231589b385a8e5acbe0222f648ff4e6 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Sun, 15 Mar 2026 01:30:08 +0000 Subject: [PATCH] feat(find): add -printf format flag support (#633) Support %f (filename), %p (path), %P (relative path), %s (size), %m (octal mode), %M (symbolic mode), %y (type), %d (depth), %T@ (mtime), and escape sequences \n, \t, \0, \\. --- crates/bashkit/src/builtins/ls.rs | 132 ++++++++- crates/bashkit/src/interpreter/mod.rs | 268 ++++++++++++++++++ .../tests/spec_cases/bash/find.test.sh | 48 ++++ 3 files changed, 445 insertions(+), 3 deletions(-) diff --git a/crates/bashkit/src/builtins/ls.rs b/crates/bashkit/src/builtins/ls.rs index 47907c8a..722b7764 100644 --- a/crates/bashkit/src/builtins/ls.rs +++ b/crates/bashkit/src/builtins/ls.rs @@ -297,11 +297,12 @@ struct FindOptions { type_filter: Option, max_depth: Option, min_depth: Option, + printf_format: Option, } /// The find builtin - search for files. /// -/// Usage: find [PATH...] [-name PATTERN] [-type TYPE] [-maxdepth N] [-mindepth N] [-exec CMD {} \;] +/// Usage: find [PATH...] [-name PATTERN] [-type TYPE] [-maxdepth N] [-mindepth N] [-printf FMT] [-exec CMD {} \;] /// /// Options: /// -name PATTERN Match filename against PATTERN (supports * and ?) @@ -309,6 +310,7 @@ struct FindOptions { /// -maxdepth N Descend at most N levels /// -mindepth N Do not apply tests at levels less than N /// -print Print matching paths (default) +/// -printf FMT Print using format string (%f %p %P %s %m %M %y %d %T@) /// -exec CMD {} \; Execute CMD for each match ({} = path) /// -exec CMD {} + Execute CMD once with all matches pub struct Find; @@ -322,6 +324,7 @@ impl Builtin for Find { type_filter: None, max_depth: None, min_depth: None, + printf_format: None, }; // Parse arguments @@ -394,6 +397,16 @@ impl Builtin for Find { "-print" | "-print0" => { // Default action, ignore } + "-printf" => { + i += 1; + if i >= ctx.args.len() { + return Ok(ExecResult::err( + "find: missing argument to '-printf'\n".to_string(), + 1, + )); + } + opts.printf_format = Some(ctx.args[i].clone()); + } "-exec" | "-execdir" => { // -exec is handled at interpreter level (execute_find); // skip args here for fallback path @@ -484,8 +497,12 @@ fn find_recursive<'a>( // Output if matches (or if no filters, show everything) if type_matches && name_matches && above_min_depth { - output.push_str(display_path); - output.push('\n'); + if let Some(ref fmt) = opts.printf_format { + output.push_str(&find_printf_format(fmt, display_path, &metadata)); + } else { + output.push_str(display_path); + output.push('\n'); + } } // Recurse into directories @@ -525,6 +542,115 @@ fn find_recursive<'a>( }) } +/// Format a path using find's -printf format string. +fn find_printf_format(fmt: &str, display_path: &str, metadata: &crate::fs::Metadata) -> String { + let mut out = String::new(); + let chars: Vec = fmt.chars().collect(); + let mut i = 0; + while i < chars.len() { + match chars[i] { + '\\' => { + i += 1; + if i < chars.len() { + match chars[i] { + 'n' => out.push('\n'), + 't' => out.push('\t'), + '0' => out.push('\0'), + '\\' => out.push('\\'), + c => { + out.push('\\'); + out.push(c); + } + } + } + } + '%' => { + i += 1; + if i >= chars.len() { + out.push('%'); + continue; + } + match chars[i] { + 'f' => { + let name = std::path::Path::new(display_path) + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| display_path.to_string()); + out.push_str(&name); + } + 'p' => out.push_str(display_path), + 'P' => { + // In builtin context, display_path is already relative + let rel = display_path.strip_prefix("./").unwrap_or(display_path); + out.push_str(rel); + } + 's' => out.push_str(&metadata.size.to_string()), + 'm' => out.push_str(&format!("{:o}", metadata.mode & 0o7777)), + 'M' => { + let type_ch = if metadata.file_type.is_dir() { + 'd' + } else if metadata.file_type.is_symlink() { + 'l' + } else { + '-' + }; + out.push(type_ch); + for shift in [6, 3, 0] { + let bits = (metadata.mode >> shift) & 7; + out.push(if bits & 4 != 0 { 'r' } else { '-' }); + out.push(if bits & 2 != 0 { 'w' } else { '-' }); + out.push(if bits & 1 != 0 { 'x' } else { '-' }); + } + } + 'y' => { + let ch = if metadata.file_type.is_dir() { + 'd' + } else if metadata.file_type.is_symlink() { + 'l' + } else { + 'f' + }; + out.push(ch); + } + 'd' => { + // Approximate depth from display_path + let base = display_path.strip_prefix("./").unwrap_or(display_path); + let depth = if base == "." || base.is_empty() { + 0 + } else { + base.matches('/').count() + 1 + }; + out.push_str(&depth.to_string()); + } + 'T' => { + i += 1; + if i < chars.len() && chars[i] == '@' { + let secs = metadata + .modified + .duration_since(std::time::UNIX_EPOCH) + .ok() + .map(|d| d.as_secs()) + .unwrap_or(0); + out.push_str(&secs.to_string()); + } else { + out.push_str("%T"); + continue; + } + } + '%' => out.push('%'), + c => { + out.push('%'); + out.push(c); + } + } + } + c => out.push(c), + } + i += 1; + } + out +} + /// Simple glob pattern matching for find -name pub(crate) fn glob_match(value: &str, pattern: &str) -> bool { let mut value_chars = value.chars().peekable(); diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index f5312343..b2d4ffda 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -2465,8 +2465,10 @@ impl Interpreter { let mut name_pattern: Option = None; let mut type_filter: Option = None; let mut max_depth: Option = None; + let mut min_depth: Option = None; let mut exec_args: Vec = Vec::new(); let mut exec_batch = false; + let mut printf_format: Option = None; // Parse arguments let mut i = 0; @@ -2516,7 +2518,35 @@ impl Interpreter { } } } + "-mindepth" => { + i += 1; + if i >= args.len() { + return Ok(ExecResult::err( + "find: missing argument to '-mindepth'\n".to_string(), + 1, + )); + } + match args[i].parse::() { + Ok(n) => min_depth = Some(n), + Err(_) => { + return Ok(ExecResult::err( + format!("find: invalid mindepth value '{}'\n", args[i]), + 1, + )); + } + } + } "-print" | "-print0" => {} + "-printf" => { + i += 1; + if i >= args.len() { + return Ok(ExecResult::err( + "find: missing argument to '-printf'\n".to_string(), + 1, + )); + } + printf_format = Some(args[i].clone()); + } "-exec" | "-execdir" => { i += 1; while i < args.len() { @@ -2572,6 +2602,43 @@ impl Interpreter { .await?; } + // Filter by mindepth + if let Some(min) = min_depth { + matched_paths.retain(|p| { + let depth = if search_paths.len() == 1 { + let base = &search_paths[0]; + if p == base { + 0 + } else { + let suffix = p.strip_prefix(base).unwrap_or(p); + let suffix = suffix.strip_prefix('/').unwrap_or(suffix); + if suffix.is_empty() { + 0 + } else { + suffix.matches('/').count() + 1 + } + } + } else { + 0 + }; + depth >= min + }); + } + + // Handle -printf output + if let Some(ref fmt) = printf_format { + let mut output = String::new(); + for found_path in &matched_paths { + let resolved = self.resolve_path(found_path); + let formatted = self + .find_printf_format(fmt, found_path, &resolved, &search_paths) + .await; + output.push_str(&formatted); + } + let result = ExecResult::ok(output); + return self.apply_redirections(result, redirects).await; + } + // Execute commands for matched paths if exec_args.is_empty() { // No exec command parsed, just print @@ -2739,6 +2806,156 @@ impl Interpreter { }) } + /// Format a single path using a `-printf` format string. + /// + /// Supported specifiers: `%f` (filename), `%p` (full path), `%P` (relative path), + /// `%s` (size), `%m` (octal mode), `%M` (symbolic mode), `%T@` (mtime epoch), + /// `%y` (type char), `%d` (depth). Escapes: `\n`, `\t`, `\0`, `\\`. + async fn find_printf_format( + &self, + fmt: &str, + display_path: &str, + resolved_path: &Path, + search_paths: &[String], + ) -> String { + let meta = self.fs.stat(resolved_path).await.ok(); + + let mut out = String::new(); + let chars: Vec = fmt.chars().collect(); + let mut i = 0; + while i < chars.len() { + match chars[i] { + '\\' => { + i += 1; + if i < chars.len() { + match chars[i] { + 'n' => out.push('\n'), + 't' => out.push('\t'), + '0' => out.push('\0'), + '\\' => out.push('\\'), + c => { + out.push('\\'); + out.push(c); + } + } + } + } + '%' => { + i += 1; + if i >= chars.len() { + out.push('%'); + continue; + } + match chars[i] { + 'f' => { + // Filename (basename) + let name = Path::new(display_path) + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| display_path.to_string()); + out.push_str(&name); + } + 'p' => { + out.push_str(display_path); + } + 'P' => { + // Path relative to search root (strip search path prefix) + let base = search_paths.first().map(|s| s.as_str()).unwrap_or("."); + let rel = display_path + .strip_prefix(base) + .unwrap_or(display_path) + .trim_start_matches('/'); + out.push_str(rel); + } + 's' => { + let size = meta.as_ref().map(|m| m.size).unwrap_or(0); + out.push_str(&size.to_string()); + } + 'm' => { + let mode = meta.as_ref().map(|m| m.mode).unwrap_or(0); + out.push_str(&format!("{:o}", mode & 0o7777)); + } + 'M' => { + // Symbolic mode like ls -l (e.g., -rw-r--r--) + let m = meta.as_ref(); + let ft = m.map(|m| &m.file_type); + let mode = m.map(|m| m.mode).unwrap_or(0); + let type_ch = match ft { + Some(ft) if ft.is_dir() => 'd', + Some(ft) if ft.is_symlink() => 'l', + _ => '-', + }; + out.push(type_ch); + for shift in [6, 3, 0] { + let bits = (mode >> shift) & 7; + out.push(if bits & 4 != 0 { 'r' } else { '-' }); + out.push(if bits & 2 != 0 { 'w' } else { '-' }); + out.push(if bits & 1 != 0 { 'x' } else { '-' }); + } + } + 'y' => { + let ch = match meta.as_ref().map(|m| &m.file_type) { + Some(ft) if ft.is_dir() => 'd', + Some(ft) if ft.is_symlink() => 'l', + Some(ft) if ft.is_file() => 'f', + _ => 'f', + }; + out.push(ch); + } + 'd' => { + // Depth relative to search root + let base = search_paths.first().map(|s| s.as_str()).unwrap_or("."); + let depth = if display_path == base { + 0 + } else { + let suffix = display_path + .strip_prefix(base) + .unwrap_or(display_path) + .trim_start_matches('/'); + if suffix.is_empty() { + 0 + } else { + suffix.matches('/').count() + 1 + } + }; + out.push_str(&depth.to_string()); + } + 'T' => { + // %T@ = mtime as seconds since epoch + i += 1; + if i < chars.len() && chars[i] == '@' { + let secs = meta + .as_ref() + .and_then(|m| { + m.modified + .duration_since(std::time::UNIX_EPOCH) + .ok() + .map(|d| d.as_secs()) + }) + .unwrap_or(0); + out.push_str(&secs.to_string()); + } else { + out.push_str("%T"); + // re-process current char + continue; + } + } + '%' => { + out.push('%'); + } + c => { + out.push('%'); + out.push(c); + } + } + } + c => out.push(c), + } + i += 1; + } + out + } + /// Execute `bash` or `sh` command - interpret scripts using this interpreter. /// /// Supports: @@ -10614,4 +10831,55 @@ echo "count=$COUNT" assert_eq!(result.exit_code, 0); assert!(!result.stdout.trim().is_empty()); } + + // find -printf tests + + #[tokio::test] + async fn test_find_printf_filename() { + let result = run_script( + r#"mkdir -p /tmp/fp1 && touch /tmp/fp1/hello.txt && find /tmp/fp1 -type f -printf '%f\n'"#, + ) + .await; + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout.trim(), "hello.txt"); + } + + #[tokio::test] + async fn test_find_printf_path() { + let result = run_script( + r#"mkdir -p /tmp/fp2 && touch /tmp/fp2/a.txt && find /tmp/fp2 -type f -printf '%p\n'"#, + ) + .await; + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout.trim(), "/tmp/fp2/a.txt"); + } + + #[tokio::test] + async fn test_find_printf_size() { + let result = run_script( + r#"mkdir -p /tmp/fp3 && echo -n "hello" > /tmp/fp3/five.txt && find /tmp/fp3 -type f -printf '%s\n'"#, + ) + .await; + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout.trim(), "5"); + } + + #[tokio::test] + async fn test_find_printf_type() { + let result = + run_script(r#"mkdir -p /tmp/fp4/sub && find /tmp/fp4 -maxdepth 0 -printf '%y\n'"#) + .await; + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout.trim(), "d"); + } + + #[tokio::test] + async fn test_find_printf_combined() { + let result = run_script( + r#"mkdir -p /tmp/fp5 && touch /tmp/fp5/x.txt && find /tmp/fp5 -type f -printf '%f %y\n'"#, + ) + .await; + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout.trim(), "x.txt f"); + } } diff --git a/crates/bashkit/tests/spec_cases/bash/find.test.sh b/crates/bashkit/tests/spec_cases/bash/find.test.sh index be2779e0..25a69d40 100644 --- a/crates/bashkit/tests/spec_cases/bash/find.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/find.test.sh @@ -100,6 +100,54 @@ find /tmp/md2test -mindepth 2 -type f | sort /tmp/md2test/a/mid.txt ### end +### find_printf_filename +# find -printf '%f\n' should print basenames +mkdir -p /tmp/pf1 +touch /tmp/pf1/alpha.txt +touch /tmp/pf1/beta.txt +find /tmp/pf1 -type f -printf '%f\n' | sort +### expect +alpha.txt +beta.txt +### end + +### find_printf_path +# find -printf '%p\n' should print full paths (same as -print) +mkdir -p /tmp/pf2 +touch /tmp/pf2/file.txt +find /tmp/pf2 -type f -printf '%p\n' +### expect +/tmp/pf2/file.txt +### end + +### find_printf_type +# find -printf '%y' should print type chars +mkdir -p /tmp/pf3/sub +touch /tmp/pf3/sub/file.txt +find /tmp/pf3 -maxdepth 1 -printf '%y %f\n' | sort +### expect +d pf3 +d sub +### end + +### find_printf_size +# find -printf '%s' should print file size +mkdir -p /tmp/pf4 +echo -n "hello" > /tmp/pf4/five.txt +find /tmp/pf4 -type f -printf '%f %s\n' +### expect +five.txt 5 +### end + +### find_printf_escapes +# find -printf should handle escape sequences +mkdir -p /tmp/pf5 +touch /tmp/pf5/a.txt +find /tmp/pf5 -type f -printf '%f\t%y\n' +### expect +a.txt f +### end + ### ls_recursive # ls -R should list nested directories mkdir -p /tmp/lsrec/a/b