mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-12-23 11:12:51 +00:00
Merge f96249cc1d into f84887d004
This commit is contained in:
commit
4bf7a7e1aa
4 changed files with 102 additions and 3 deletions
|
|
@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
|
|||
true
|
||||
}
|
||||
|
||||
fn supports_c_style_hints(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_user_host_grantee(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
|
|
|||
|
|
@ -900,6 +900,12 @@ pub trait Dialect: Debug + Any {
|
|||
false
|
||||
}
|
||||
|
||||
/// Returns true if the dialect supports hint and C-style comments
|
||||
/// e.g. `/*! hint */`
|
||||
fn supports_c_style_hints(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
|
||||
/// as an alias assignment operator, rather than a boolean expression.
|
||||
/// For example: the following statements are equivalent for such a dialect:
|
||||
|
|
|
|||
|
|
@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
|
|||
true
|
||||
}
|
||||
|
||||
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
|
||||
fn supports_c_style_hints(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn parse_infix(
|
||||
&self,
|
||||
parser: &mut crate::parser::Parser,
|
||||
|
|
|
|||
|
|
@ -821,6 +821,8 @@ pub struct Tokenizer<'a> {
|
|||
/// If true (the default), the tokenizer will un-escape literal
|
||||
/// SQL strings See [`Tokenizer::with_unescape`] for more details.
|
||||
unescape: bool,
|
||||
/// Tokens injected back into the stream (e.g. from MySQL C-style hints)
|
||||
pending_tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'a> Tokenizer<'a> {
|
||||
|
|
@ -845,6 +847,7 @@ impl<'a> Tokenizer<'a> {
|
|||
dialect,
|
||||
query,
|
||||
unescape: true,
|
||||
pending_tokens: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -947,10 +950,16 @@ impl<'a> Tokenizer<'a> {
|
|||
|
||||
/// Get the next token or return None
|
||||
fn next_token(
|
||||
&self,
|
||||
&mut self,
|
||||
chars: &mut State,
|
||||
prev_token: Option<&Token>,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
// Return any previously injected tokens first
|
||||
{
|
||||
if let Some(tok) = self.pending_tokens.pop() {
|
||||
return Ok(Some(tok));
|
||||
}
|
||||
}
|
||||
match chars.peek() {
|
||||
Some(&ch) => match ch {
|
||||
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
|
||||
|
|
@ -2190,13 +2199,14 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
|
||||
fn tokenize_multiline_comment(
|
||||
&self,
|
||||
&mut self,
|
||||
chars: &mut State,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
let mut s = String::new();
|
||||
let mut nested = 1;
|
||||
let mut c_style_comments = false;
|
||||
let supports_nested_comments = self.dialect.supports_nested_comments();
|
||||
|
||||
let supports_c_style_comments = self.dialect.supports_c_style_hints();
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
|
||||
|
|
@ -2205,10 +2215,24 @@ impl<'a> Tokenizer<'a> {
|
|||
s.push('*');
|
||||
nested += 1;
|
||||
}
|
||||
Some('!') if supports_c_style_comments => {
|
||||
c_style_comments = true;
|
||||
// consume only version digits (leave following whitespace/content intact)
|
||||
while let Some(&c) = chars.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some('*') if matches!(chars.peek(), Some('/')) => {
|
||||
chars.next(); // consume the '/'
|
||||
nested -= 1;
|
||||
if nested == 0 {
|
||||
if c_style_comments {
|
||||
break self.inject_tokens_from_c_style_hints_and_return_first(s);
|
||||
}
|
||||
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
|
||||
}
|
||||
s.push('*');
|
||||
|
|
@ -2227,6 +2251,26 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tokenize the given string using the same dialect/unescape settings and inject
|
||||
/// the resulting tokens back into this tokenizer so they are returned before
|
||||
/// any further characters from the main stream. Returns the first injected token.
|
||||
fn inject_tokens_from_c_style_hints_and_return_first(
|
||||
&mut self,
|
||||
inner_sql: String,
|
||||
) -> Result<Option<Token>, TokenizerError> {
|
||||
let trimmed = inner_sql.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let mut inner = Tokenizer::new(self.dialect, trimmed).with_unescape(self.unescape);
|
||||
let tokens = inner.tokenize()?;
|
||||
// push in reverse so we can pop from the end efficiently
|
||||
for t in tokens.into_iter().rev() {
|
||||
self.pending_tokens.push(t);
|
||||
}
|
||||
Ok(self.pending_tokens.pop())
|
||||
}
|
||||
|
||||
fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option<char>) {
|
||||
let mut last_char = None;
|
||||
let mut s = String::new();
|
||||
|
|
@ -4147,4 +4191,44 @@ mod tests {
|
|||
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn tokenize_multiline_comment_with_c_style_comment() {
|
||||
let sql = String::from("0/*! word */1");
|
||||
|
||||
let dialect = MySqlDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Word(Word {
|
||||
value: "word".to_string(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::NoKeyword,
|
||||
}),
|
||||
Token::Number("1".to_string(), false),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
|
||||
let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1");
|
||||
let dialect = MySqlDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap();
|
||||
let expected = vec![
|
||||
Token::Number("0".to_string(), false),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Word(Word {
|
||||
value: "KEY_BLOCK_SIZE".to_string(),
|
||||
quote_style: None,
|
||||
keyword: Keyword::KEY_BLOCK_SIZE,
|
||||
}),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Eq,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1024".to_string(), false),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number("1".to_string(), false),
|
||||
];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue