diff --git a/src/ast/query.rs b/src/ast/query.rs index ca74db440..143697842 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2755,6 +2755,13 @@ impl fmt::Display for Join { self.relation, suffix(constraint) )), + JoinOperator::ArrayJoin => f.write_fmt(format_args!("ARRAY JOIN {}", self.relation)), + JoinOperator::LeftArrayJoin => { + f.write_fmt(format_args!("LEFT ARRAY JOIN {}", self.relation)) + } + JoinOperator::InnerArrayJoin => { + f.write_fmt(format_args!("INNER ARRAY JOIN {}", self.relation)) + } } } } @@ -2809,6 +2816,14 @@ pub enum JoinOperator { /// /// See . StraightJoin(JoinConstraint), + /// ClickHouse: `ARRAY JOIN` for unnesting arrays inline. + /// + /// See . + ArrayJoin, + /// ClickHouse: `LEFT ARRAY JOIN` for unnesting arrays inline (preserves rows with empty arrays). + LeftArrayJoin, + /// ClickHouse: `INNER ARRAY JOIN` for unnesting arrays inline (filters rows with empty arrays). + InnerArrayJoin, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 2af57d98e..a2da7cd82 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2234,6 +2234,9 @@ impl Spanned for JoinOperator { JoinOperator::Anti(join_constraint) => join_constraint.span(), JoinOperator::Semi(join_constraint) => join_constraint.span(), JoinOperator::StraightJoin(join_constraint) => join_constraint.span(), + JoinOperator::ArrayJoin => Span::empty(), + JoinOperator::LeftArrayJoin => Span::empty(), + JoinOperator::InnerArrayJoin => Span::empty(), } } } diff --git a/src/keywords.rs b/src/keywords.rs index f0f37b1c0..a5ecd7f00 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1231,6 +1231,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse ARRAY JOIN (ARRAY must not be parsed as a table alias) + Keyword::ARRAY, // for Clickhouse PREWHERE Keyword::PREWHERE, Keyword::SETTINGS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cefc0c6f6..111d4732a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8435,6 +8435,17 @@ impl<'a> Parser<'a> { None }; + // ClickHouse allows PARTITION BY after ORDER BY + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#partition-by + let partition_by = if create_table_config.partition_by.is_none() + && dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + create_table_config.partition_by + }; + let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { Some(self.parse_create_table_on_commit()?) } else { @@ -8505,7 +8516,7 @@ impl<'a> Parser<'a> { .on_commit(on_commit) .on_cluster(on_cluster) .clustered_by(clustered_by) - .partition_by(create_table_config.partition_by) + .partition_by(partition_by) .cluster_by(create_table_config.cluster_by) .inherits(create_table_config.inherits) .partition_of(partition_of) @@ -15331,6 +15342,33 @@ impl<'a> Parser<'a> { constraint: self.parse_join_constraint(false)?, }, } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::INNER, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: INNER ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::InnerArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::LEFT, Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: LEFT ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::LeftArrayJoin, + } + } else if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::ARRAY, Keyword::JOIN]) + { + // ClickHouse: ARRAY JOIN + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::ArrayJoin, + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 82f79577b..f16a1f8e4 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -233,6 +233,65 @@ fn parse_create_table() { ); } +#[test] +fn parse_create_table_partition_by_after_order_by() { + // ClickHouse DDL places PARTITION BY after ORDER BY. + // MergeTree() is canonicalized to MergeTree and type names are uppercased. + clickhouse().one_statement_parses_to( + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` Int64, `col2` Int32) ", + "ENGINE = MergeTree() ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + concat!( + "CREATE TABLE IF NOT EXISTS \"MyTable\" (`col1` INT64, `col2` Int32) ", + "ENGINE = MergeTree ", + "PRIMARY KEY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "ORDER BY (toDate(toDateTime(`col2`)), `col1`, `col2`) ", + "PARTITION BY col1 % 64" + ), + ); + + // PARTITION BY after ORDER BY works with both ClickHouseDialect and GenericDialect + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a"); + + // Arithmetic expression in PARTITION BY (roundtrip) + clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64"); + + // AST: partition_by is populated with the correct expression + match clickhouse_and_generic() + .verified_stmt("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY a % 64") + { + Statement::CreateTable(CreateTable { partition_by, .. }) => { + assert_eq!( + partition_by, + Some(Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("a"))), + op: BinaryOperator::Modulo, + right: Box::new(Expr::Value( + Value::Number("64".parse().unwrap(), false).with_empty_span(), + )), + })) + ); + } + _ => unreachable!(), + } + + // Function call expression in PARTITION BY (ClickHouse-specific function) + clickhouse().verified_stmt( + "CREATE TABLE t (d DATE) ENGINE = MergeTree ORDER BY d PARTITION BY toYYYYMM(d)", + ); + + // Negative: PARTITION BY with no expression should fail + clickhouse_and_generic() + .parse_sql_statements("CREATE TABLE t (a INT) ENGINE = MergeTree ORDER BY a PARTITION BY") + .expect_err("PARTITION BY with no expression should fail"); +} + #[test] fn parse_insert_into_function() { clickhouse().verified_stmt(r#"INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"#); @@ -1729,6 +1788,63 @@ fn test_parse_not_null_in_column_options() { ); } +#[test] +fn parse_array_join() { + // ARRAY JOIN works with both ClickHouseDialect and GenericDialect (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x"); + + // AST: join_operator is the unit variant ArrayJoin (no constraint) + match clickhouse_and_generic().verified_stmt("SELECT x FROM t ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::ArrayJoin); + } + _ => unreachable!(), + } + + // Combined: regular JOIN followed by ARRAY JOIN + clickhouse_and_generic() + .verified_stmt("SELECT x FROM t JOIN u ON t.id = u.id ARRAY JOIN arr AS x"); + + // Negative: ARRAY JOIN with no table expression should fail + clickhouse_and_generic() + .parse_sql_statements("SELECT x FROM t ARRAY JOIN") + .expect_err("ARRAY JOIN requires a table expression"); +} + +#[test] +fn parse_left_array_join() { + // LEFT ARRAY JOIN preserves rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x"); + + // AST: join_operator is LeftArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t LEFT ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::LeftArrayJoin); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_inner_array_join() { + // INNER ARRAY JOIN filters rows with empty/null arrays (roundtrip) + clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x"); + + // AST: join_operator is InnerArrayJoin + match clickhouse_and_generic().verified_stmt("SELECT x FROM t INNER ARRAY JOIN arr AS x") { + Statement::Query(query) => { + let select = query.body.as_select().unwrap(); + let join = &select.from[0].joins[0]; + assert_eq!(join.join_operator, JoinOperator::InnerArrayJoin); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) }