diff --git a/datafusion/functions-nested/src/arrays_zip.rs b/datafusion/functions-nested/src/arrays_zip.rs index 5035439286fbb..1cb6d0cc9b9f4 100644 --- a/datafusion/functions-nested/src/arrays_zip.rs +++ b/datafusion/functions-nested/src/arrays_zip.rs @@ -49,30 +49,29 @@ struct ListColumnView { make_udf_expr_and_func!( ArraysZip, arrays_zip, - "combines multiple arrays into a single array of structs.", + "combines one or multiple arrays into a single array of structs.", arrays_zip_udf ); #[user_doc( doc_section(label = "Array Functions"), description = "Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs.", - syntax_example = "arrays_zip(array1, array2[, ..., array_n])", + syntax_example = "arrays_zip(array1[, ..., array_n])", sql_example = r#"```sql > select arrays_zip([1, 2, 3], ['a', 'b', 'c']); +---------------------------------------------------+ | arrays_zip([1, 2, 3], ['a', 'b', 'c']) | +---------------------------------------------------+ -| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] | +| [{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] | +---------------------------------------------------+ > select arrays_zip([1, 2], [3, 4, 5]); +---------------------------------------------------+ | arrays_zip([1, 2], [3, 4, 5]) | +---------------------------------------------------+ -| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] | +| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: , 2: 5}] | +---------------------------------------------------+ ```"#, argument(name = "array1", description = "First array expression."), - argument(name = "array2", description = "Second array expression."), argument(name = "array_n", description = "Subsequent array expressions.") )] #[derive(Debug, PartialEq, Eq, Hash)] @@ -111,7 +110,7 @@ impl ScalarUDFImpl for ArraysZip { fn return_type(&self, arg_types: &[DataType]) -> Result { if arg_types.is_empty() { - return exec_err!("arrays_zip requires at least two arguments"); + return exec_err!("arrays_zip requires at least one argument"); } let mut fields = Vec::with_capacity(arg_types.len()); @@ -157,8 +156,8 @@ impl ScalarUDFImpl for ArraysZip { /// lengths, shorter arrays are padded with NULLs. /// Supports List, LargeList, and Null input types. fn arrays_zip_inner(args: &[ArrayRef]) -> Result { - if args.len() < 2 { - return exec_err!("arrays_zip requires at least two arguments"); + if args.is_empty() { + return exec_err!("arrays_zip requires at least one argument"); } let num_rows = args[0].len(); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a0da989990d53..d35909bd1a436 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7324,6 +7324,50 @@ select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, [{1: NULL, 2: 30}, {1: NULL, 2: 40}] [{1: 5, 2: NULL}, {1: 6, 2: NULL}] +# column-level test with single argument +query ? +select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 1}, {1: 2}] +NULL +[{1: 5}, {1: 6}] + +query ? +select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b); +---- +[{1: 10}, {1: 20}] +[{1: 30}, {1: 40}] +NULL + +# null input +query ? +select arrays_zip(null) +---- +NULL + +# single empty array +query ? +select arrays_zip([]) +---- +[] + + +# single array of null +query ? +select arrays_zip([null]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int]) +---- +[{1: NULL}] + +query ? +select arrays_zip([NULL::int[]]) +---- +[{1: NULL}] + # alias: list_zip query ? select list_zip([1, 2], [3, 4]); @@ -7346,9 +7390,11 @@ select arrays_zip([42], ['hello']); ---- [{1: 42, 2: hello}] -# error: too few arguments -statement error +# single argument +query ? select arrays_zip([1, 2, 3]); +---- +[{1: 1}, {1: 2}, {1: 3}] # arrays_zip with LargeList inputs query ? @@ -7368,6 +7414,12 @@ select arrays_zip( ---- [{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}] +# single argument from LargeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + # arrays_zip with FixedSizeList inputs query ? select arrays_zip( @@ -7377,6 +7429,12 @@ select arrays_zip( ---- [{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}] +# single argument from FixedSizeList +query ? +select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')); +---- +[{1: 1}, {1: 2}, {1: 3}] + # arrays_zip mixing List and LargeList query ? select arrays_zip( diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 15ce5878808ea..891848a18f812 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4274,13 +4274,12 @@ _Alias of [array_has_any](#array_has_any)._ Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs. ```sql -arrays_zip(array1, array2[, ..., array_n]) +arrays_zip(array1[, ..., array_n]) ``` #### Arguments - **array1**: First array expression. -- **array2**: Second array expression. - **array_n**: Subsequent array expressions. #### Example @@ -4290,13 +4289,13 @@ arrays_zip(array1, array2[, ..., array_n]) +---------------------------------------------------+ | arrays_zip([1, 2, 3], ['a', 'b', 'c']) | +---------------------------------------------------+ -| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] | +| [{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}] | +---------------------------------------------------+ > select arrays_zip([1, 2], [3, 4, 5]); +---------------------------------------------------+ | arrays_zip([1, 2], [3, 4, 5]) | +---------------------------------------------------+ -| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] | +| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: , 2: 5}] | +---------------------------------------------------+ ```