diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a8c93439..81d0e22f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -437,6 +437,8 @@ pub enum Expr { Nested(Box), /// A literal value, such as string, number, date or NULL Value(Value), + /// + IntroducedString { introducer: String, value: Value }, /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). @@ -696,6 +698,7 @@ impl fmt::Display for Expr { Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"), Expr::Nested(ast) => write!(f, "({ast})"), Expr::Value(v) => write!(f, "{v}"), + Expr::IntroducedString { introducer, value } => write!(f, "{introducer} {value}"), Expr::TypedString { data_type, value } => { write!(f, "{data_type}")?; write!(f, " '{}'", &value::escape_single_quote_string(value)) diff --git a/src/parser.rs b/src/parser.rs index 58252d3f..eaf47d42 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -734,6 +734,17 @@ impl<'a> Parser<'a> { Ok(Expr::CompoundIdentifier(id_parts)) } } + // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html + Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::HexStringLiteral(_) + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value, + value: self.parse_introduced_string_value()?, + }) + } _ => Ok(Expr::Identifier(w.to_ident())), }, }, // End of Token::Word @@ -784,7 +795,6 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } - Token::LParen => { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { @@ -4142,6 +4152,23 @@ impl<'a> Parser<'a> { } } + fn parse_introduced_string_value(&mut self) -> Result { + let next_token = self.next_token(); + let location = next_token.location; + match next_token.token { + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + unexpected => self.expected( + "a string value", + TokenWithLocation { + token: unexpected, + location, + }, + ), + } + } + /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { let next_token = self.next_token(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index eef4cb7b..9780de04 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -546,12 +546,12 @@ impl<'a> Tokenizer<'a> { // identifier or keyword ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume the first char - let s = self.tokenize_word(ch, chars); + let word = self.tokenize_word(ch, chars); // TODO: implement parsing of exponent here - if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { + if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { let mut inner_state = State { - peekable: s.chars().peekable(), + peekable: word.chars().peekable(), line: 0, col: 0, }; @@ -562,7 +562,8 @@ impl<'a> Tokenizer<'a> { s += s2.as_str(); return Ok(Some(Token::Number(s, false))); } - Ok(Some(Token::make_word(&s, None))) + + Ok(Some(Token::make_word(&word, None))) } // single quoted string '\'' => { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3bf74255..04c86cb1 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1264,3 +1264,44 @@ fn parse_values() { mysql().verified_stmt("VALUES ROW(1, true, 'a')"); mysql().verified_stmt("SELECT a, c FROM (VALUES ROW(1, true, 'a'), ROW(2, false, 'b'), ROW(3, false, 'c')) AS t (a, b, c)"); } + +#[test] +fn parse_hex_string_introducer() { + assert_eq!( + mysql().verified_stmt("SELECT _latin1 X'4D7953514C'"), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: false, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString { + introducer: "_latin1".to_string(), + value: Value::HexStringLiteral("4D7953514C".to_string()) + })], + from: vec![], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + qualify: None, + into: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + })) + ) +} + +#[test] +fn parse_string_introducers() { + mysql().verified_stmt("SELECT _binary 'abc'"); + mysql().one_statement_parses_to("SELECT _utf8'abc'", "SELECT _utf8 'abc'"); + mysql().one_statement_parses_to("SELECT _utf8mb4'abc'", "SELECT _utf8mb4 'abc'"); + mysql().verified_stmt("SELECT _binary 'abc', _utf8mb4 'abc'"); +}