From 35983bf71953559ccb9af77f75a680dd51cf02c1 Mon Sep 17 00:00:00 2001 From: huanghsiang_cheng Date: Wed, 18 Mar 2026 15:53:31 -0700 Subject: [PATCH 1/3] Allow single array argument --- datafusion/functions-nested/src/arrays_zip.rs | 6 +++--- datafusion/sqllogictest/test_files/array.slt | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/arrays_zip.rs b/datafusion/functions-nested/src/arrays_zip.rs index 5035439286fbb..9c63fc1191b12 100644 --- a/datafusion/functions-nested/src/arrays_zip.rs +++ b/datafusion/functions-nested/src/arrays_zip.rs @@ -111,7 +111,7 @@ impl ScalarUDFImpl for ArraysZip { fn return_type(&self, arg_types: &[DataType]) -> Result { if arg_types.is_empty() { - return exec_err!("arrays_zip requires at least two arguments"); + return exec_err!("arrays_zip requires at least one argument"); } let mut fields = Vec::with_capacity(arg_types.len()); @@ -157,8 +157,8 @@ impl ScalarUDFImpl for ArraysZip { /// lengths, shorter arrays are padded with NULLs. /// Supports List, LargeList, and Null input types. fn arrays_zip_inner(args: &[ArrayRef]) -> Result { - if args.len() < 2 { - return exec_err!("arrays_zip requires at least two arguments"); + if args.is_empty() { + return exec_err!("arrays_zip requires at least one argument"); } let num_rows = args[0].len(); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a0da989990d53..9eb3d66403ecb 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7346,9 +7346,11 @@ select arrays_zip([42], ['hello']); ---- [{1: 42, 2: hello}] -# error: too few arguments -statement error +# single argument +query ? select arrays_zip([1, 2, 3]); +---- +[{1: 1}, {1: 2}, {1: 3}] # arrays_zip with LargeList inputs query ? @@ -7368,6 +7370,12 @@ select arrays_zip( ---- [{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] +# single argument from LargeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + # arrays_zip with FixedSizeList inputs query ? select arrays_zip( @@ -7377,6 +7385,12 @@ select arrays_zip( ---- [{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] +# single argument from FixedSizeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + # arrays_zip mixing List and LargeList query ? select arrays_zip( From f36fe972e66747220bf304f8ed60e636f4787a9d Mon Sep 17 00:00:00 2001 From: huanghsiang_cheng Date: Wed, 18 Mar 2026 16:12:44 -0700 Subject: [PATCH 2/3] Update user doc --- datafusion/functions-nested/src/arrays_zip.rs | 9 ++++----- docs/source/user-guide/sql/scalar_functions.md | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/datafusion/functions-nested/src/arrays_zip.rs b/datafusion/functions-nested/src/arrays_zip.rs index 9c63fc1191b12..1cb6d0cc9b9f4 100644 --- a/datafusion/functions-nested/src/arrays_zip.rs +++ b/datafusion/functions-nested/src/arrays_zip.rs @@ -49,30 +49,29 @@ struct ListColumnView { make_udf_expr_and_func!( ArraysZip, arrays_zip, - "combines multiple arrays into a single array of structs.", + "combines one or multiple arrays into a single array of structs.", arrays_zip_udf ); #[user_doc( doc_section(label = "Array Functions"), description = "Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs.", - syntax_example = "arrays_zip(array1, array2[, ..., array_n])", + syntax_example = "arrays_zip(array1[, ..., array_n])", sql_example = r#"```sql > select arrays_zip([1, 2, 3], ['a', 'b', 'c']); +---------------------------------------------------+ | arrays_zip([1, 2, 3], ['a', 'b', 'c']) | +---------------------------------------------------+ -| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] | +| [{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] | +---------------------------------------------------+ > select arrays_zip([1, 2], [3, 4, 5]); +---------------------------------------------------+ | arrays_zip([1, 2], [3, 4, 5]) | +---------------------------------------------------+ -| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] | +| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: , 2: 5}] | +---------------------------------------------------+ ```"#, argument(name = "array1", description = "First array expression."), - argument(name = "array2", description = "Second array expression."), argument(name = "array_n", description = "Subsequent array expressions.") )] #[derive(Debug, PartialEq, Eq, Hash)] diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 15ce5878808ea..891848a18f812 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4274,13 +4274,12 @@ _Alias of [array_has_any](#array_has_any)._ Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs. ```sql -arrays_zip(array1, array2[, ..., array_n]) +arrays_zip(array1[, ..., array_n]) ``` #### Arguments - **array1**: First array expression. -- **array2**: Second array expression. - **array_n**: Subsequent array expressions. #### Example @@ -4290,13 +4289,13 @@ arrays_zip(array1, array2[, ..., array_n]) +---------------------------------------------------+ | arrays_zip([1, 2, 3], ['a', 'b', 'c']) | +---------------------------------------------------+ -| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] | +| [{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] | +---------------------------------------------------+ > select arrays_zip([1, 2], [3, 4, 5]); +---------------------------------------------------+ | arrays_zip([1, 2], [3, 4, 5]) | +---------------------------------------------------+ -| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] | +| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: , 2: 5}] | +---------------------------------------------------+ ``` From ca8c67b73d9f78ca232fbfbf78e7ed15bbedd8cd Mon Sep 17 00:00:00 2001 From: huanghsiang_cheng Date: Wed, 18 Mar 2026 16:41:22 -0700 Subject: [PATCH 3/3] More single argument tests --- datafusion/sqllogictest/test_files/array.slt | 44 ++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 9eb3d66403ecb..d35909bd1a436 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7324,6 +7324,50 @@ select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, [{1: NULL, 2: 30}, {1: NULL, 2: 40}] [{1: 5, 2: NULL}, {1: 6, 2: NULL}] +# column-level test with single argument +query ? +select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1}, {1: 2}] +NULL +[{1: 5}, {1: 6}] + +query ? +select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 10}, {1: 20}] +[{1: 30}, {1: 40}] +NULL + +# null input +query ? +select arrays_zip(null) +---- +NULL + +# single empty array +query ? +select arrays_zip([]) +---- +[] + + +# single array of null +query ? +select arrays_zip([null]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int[]]) +---- +[{1: NULL}] + # alias: list_zip query ? select list_zip([1, 2], [3, 4]);