From c9858ff733bf533c49c4fc9431a359f9360cdd51 Mon Sep 17 00:00:00 2001 From: Marcelo Altmann Date: Tue, 16 Sep 2025 23:35:08 -0300 Subject: [PATCH] Add support for comment hints This commit adds support for comment hints supported by MySQL. It parses and consumes the optional version number after the `!` character and return all tokens inside a comment hint. --- src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 6 +++ src/dialect/mysql.rs | 5 ++ src/tokenizer.rs | 106 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 345d63fe4..a39ad9423 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -176,6 +176,10 @@ impl Dialect for GenericDialect { true } + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn supports_user_host_grantee(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 477d60f83..1f395008b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1086,6 +1086,12 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports optimizer hints in multiline comments + /// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/` + fn supports_multiline_comment_hints(&self) -> bool { + false + } + /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem` /// as an alias assignment operator, rather than a boolean expression. /// For example: the following statements are equivalent for such a dialect: diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index ad3ba6f3a..b5295df79 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -88,6 +88,11 @@ impl Dialect for MySqlDialect { true } + /// see + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 506dee1d7..f84917f68 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -945,10 +945,66 @@ impl<'a> Tokenizer<'a> { while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { let span = location.span_to(state.location()); - buf.push(TokenWithSpan { token, span }); + // Check if this is a multiline comment hint that should be expanded + match &token { + Token::Whitespace(Whitespace::MultiLineComment(comment)) + if self.dialect.supports_multiline_comment_hints() + && comment.starts_with('!') => + { + // Re-tokenize the hints and add them to the buffer + self.tokenize_comment_hints(comment, span, buf)?; + } + _ => { + buf.push(TokenWithSpan { token, span }); + } + } + + location = state.location(); + } + Ok(()) + } + + /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer. + /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024` + fn tokenize_comment_hints( + &self, + comment: &str, + span: Span, + buf: &mut Vec, + ) -> Result<(), TokenizerError> { + // Strip the leading '!' and any version digits (e.g., "50110") + let hint_content = comment + .strip_prefix('!') + .unwrap_or(comment) + .trim_start_matches(|c: char| c.is_ascii_digit()) + .trim(); + + // If there's no content after stripping, nothing to tokenize + if hint_content.is_empty() { + return Ok(()); + } + // Create a new tokenizer for the hint content + let mut inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape); + + // Create a state for tracking position within the hint + let mut state = State { + peekable: hint_content.chars().peekable(), + line: span.start.line, + col: span.start.column, + }; + + // Tokenize the hint content and add tokens to the buffer + let mut location = state.location(); + while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { + let token_span = location.span_to(state.location()); + buf.push(TokenWithSpan { + token, + span: token_span, + }); location = state.location(); } + Ok(()) } @@ -980,7 +1036,7 @@ impl<'a> Tokenizer<'a> { /// Get the next token or return None fn next_token( - &self, + &mut self, chars: &mut State, prev_token: Option<&Token>, ) -> Result, TokenizerError> { @@ -2227,13 +2283,12 @@ impl<'a> Tokenizer<'a> { } fn tokenize_multiline_comment( - &self, + &mut self, chars: &mut State, ) -> Result, TokenizerError> { let mut s = String::new(); let mut nested = 1; let supports_nested_comments = self.dialect.supports_nested_comments(); - loop { match chars.next() { Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => { @@ -4218,6 +4273,47 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("y", None), ], - ) + ); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint() { + let sql = String::from("0/*! word */1"); + + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Word(Word { + value: "word".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint_and_version() { + let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1"); + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "KEY_BLOCK_SIZE".to_string(), + quote_style: None, + keyword: Keyword::KEY_BLOCK_SIZE, + }), + Token::Whitespace(Whitespace::Space), + Token::Eq, + Token::Whitespace(Whitespace::Space), + Token::Number("1024".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); } }