From 1970fc91da9a835248a8bf081cbb341ff2c57a7b Mon Sep 17 00:00:00 2001 From: Cristhian Lopez Vidal Date: Mon, 23 Mar 2026 16:23:23 -0700 Subject: [PATCH 1/2] feat(clickhouse): support PARTITION BY after ORDER BY in CREATE TABLE ClickHouse DDL allows PARTITION BY to appear after ORDER BY, which differs from standard SQL ordering. This change makes the parser accept both orderings when using the ClickHouseDialect or GenericDialect. Fixes a parse failure for production ClickHouse CREATE TABLE statements like: CREATE TABLE t (...) ENGINE = MergeTree() ORDER BY (...) PARTITION BY expr Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_clickhouse.rs | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cefc0c6f6..cca7071af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8435,6 +8435,17 @@ impl<'a> Parser<'a> { None }; + // ClickHouse allows PARTITION BY after ORDER BY + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by + let partition_by = if create_table_config.partition_by.is_none() + && dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + create_table_config.partition_by + }; + let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { Some(self.parse_create_table_on_commit()?) } else { @@ -8505,7 +8516,7 @@ impl<'a> Parser<'a> { .on_commit(on_commit) .on_cluster(on_cluster) .clustered_by(clustered_by) - .partition_by(create_table_config.partition_by) + .partition_by(partition_by) .cluster_by(create_table_config.cluster_by) .inherits(create_table_config.inherits) .partition_of(partition_of) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 82f79577b..5ba6e35ed 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -233,6 +233,28 @@ fn parse_create_table() { ); } +#[test] +fn parse_create_table_partition_by_after_order_by() { + // ClickHouse DDL places PARTITION BY after ORDER BY. + // MergeTree() is canonicalized to MergeTree and type names are uppercased. + clickhouse().one_statement_parses_to( + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` Int64, `col2` Int32) ", + "ENGINE = MergeTree() ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` INT64, `col2` Int32) ", + "ENGINE = MergeTree ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + ); +} + #[test] fn parse_insert_into_function() { clickhouse().verified_stmt(r#"INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"#); From 097a9292ba43e0eefd34ce5338a8123f62c06de4 Mon Sep 17 00:00:00 2001 From: Cristhian Lopez Vidal Date: Mon, 23 Mar 2026 16:37:08 -0700 Subject: [PATCH 2/2] feat(clickhouse): add ARRAY JOIN, LEFT/INNER ARRAY JOIN support ClickHouse supports ARRAY JOIN clauses for unnesting arrays inline. This adds JoinOperator variants for ARRAY JOIN, LEFT ARRAY JOIN, and INNER ARRAY JOIN. These joins take a table expression (the array to unnest) rather than a standard table reference, and do not use ON/USING constraints. Also adds Spanned impls for the new variants in spans.rs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/query.rs | 15 ++++++ src/ast/spans.rs | 3 ++ src/keywords.rs | 2 + src/parser/mod.rs | 27 ++++++++++ tests/sqlparser_clickhouse.rs | 94 +++++++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index ca74db440..143697842 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2755,6 +2755,13 @@ impl fmt::Display for Join { self.relation, suffix(constraint) )), + JoinOperator::ArrayJoin => f.write_fmt(format_args!("ARRAY JOIN {}", self.relation)), + JoinOperator::LeftArrayJoin => { + f.write_fmt(format_args!("LEFT ARRAY JOIN {}", self.relation)) + } + JoinOperator::InnerArrayJoin => { + f.write_fmt(format_args!("INNER ARRAY JOIN {}", self.relation)) + } } } } @@ -2809,6 +2816,14 @@ pub enum JoinOperator { /// /// See . StraightJoin(JoinConstraint), + /// ClickHouse: `ARRAY JOIN` for unnesting arrays inline. + /// + /// See . + ArrayJoin, + /// ClickHouse: `LEFT ARRAY JOIN` for unnesting arrays inline (preserves rows with empty arrays). + LeftArrayJoin, + /// ClickHouse: `INNER ARRAY JOIN` for unnesting arrays inline (filters rows with empty arrays). + InnerArrayJoin, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 2af57d98e..a2da7cd82 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2234,6 +2234,9 @@ impl Spanned for JoinOperator { JoinOperator::Anti(join_constraint) => join_constraint.span(), JoinOperator::Semi(join_constraint) => join_constraint.span(), JoinOperator::StraightJoin(join_constraint) => join_constraint.span(), + JoinOperator::ArrayJoin => Span::empty(), + JoinOperator::LeftArrayJoin => Span::empty(), + JoinOperator::InnerArrayJoin => Span::empty(), } } } diff --git a/src/keywords.rs b/src/keywords.rs index f0f37b1c0..a5ecd7f00 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1231,6 +1231,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse ARRAY JOIN (ARRAY must not be parsed as a table alias) + Keyword::ARRAY, // for Clickhouse PREWHERE Keyword::PREWHERE, Keyword::SETTINGS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cca7071af..111d4732a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15342,6 +15342,33 @@ impl<'a> Parser<'a> { constraint: self.parse_join_constraint(false)?, }, } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::INNER, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: INNER ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::InnerArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::LEFT, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: LEFT ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::LeftArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::ArrayJoin, + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5ba6e35ed..f16a1f8e4 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -253,6 +253,43 @@ fn parse_create_table_partition_by_after_order_by() { "PARTITION BY col1 % 64" ), ); + + // PARTITION BY after ORDER BY works with both ClickHouseDialect and GenericDialect + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a"); + + // Arithmetic expression in PARTITION BY (roundtrip) + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64"); + + // AST: partition_by is populated with the correct expression + match clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64") + { + Statement::CreateTable(CreateTable { partition_by, .. }) => { + assert_eq!( + partition_by, + Some(Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("a"))), + op: BinaryOperator::Modulo, + right: Box::new(Expr::Value( + Value::Number("64".parse().unwrap(), false).with_empty_span(), + )), + })) + ); + } + _ => unreachable!(), + } + + // Function call expression in PARTITION BY (ClickHouse-specific function) + clickhouse().verified_stmt( + "CREATE TABLE t (d DATE) ENGINE = MergeTree ORDER BY d PARTITION BY toYYYYMM(d)", + ); + + // Negative: PARTITION BY with no expression should fail + clickhouse_and_generic() + .parse_sql_statements("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY") + .expect_err("PARTITION BY with no expression should fail"); } #[test] @@ -1751,6 +1788,63 @@ fn test_parse_not_null_in_column_options() { ); } +#[test] +fn parse_array_join() { + // ARRAY JOIN works with both ClickHouseDialect and GenericDialect (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x"); + + // AST: join_operator is the unit variant ArrayJoin (no constraint) + match clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::ArrayJoin); + } + _ => unreachable!(), + } + + // Combined: regular JOIN followed by ARRAY JOIN + clickhouse_and_generic() + .verified_stmt("SELECT x FROM t JOIN u ON t.id = u.id ARRAY JOIN arr AS x"); + + // Negative: ARRAY JOIN with no table expression should fail + clickhouse_and_generic() + .parse_sql_statements("SELECT x FROM t ARRAY JOIN") + .expect_err("ARRAY JOIN requires a table expression"); +} + +#[test] +fn parse_left_array_join() { + // LEFT ARRAY JOIN preserves rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x"); + + // AST: join_operator is LeftArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::LeftArrayJoin); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_inner_array_join() { + // INNER ARRAY JOIN filters rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x"); + + // AST: join_operator is InnerArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::InnerArrayJoin); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) }