Support MySQL Character Set Introducers (#788)

* MySQL Character Set Introducers

* Documentation fix

* Parsing string introducer from Token::word

* Fixed lint

* fix clippy

---------

Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
Maciej Skrzypkowski 2023-02-17 19:38:43 +01:00 committed by GitHub
parent b31ede7733
commit 488e8a8156
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 77 additions and 5 deletions

View file

@ -437,6 +437,8 @@ pub enum Expr {
Nested(Box<Expr>),
/// A literal value, such as string, number, date or NULL
Value(Value),
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
IntroducedString { introducer: String, value: Value },
/// A constant of form `<data_type> 'value'`.
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
/// as well as constants of other types (a non-standard PostgreSQL extension).
@ -696,6 +698,7 @@ impl fmt::Display for Expr {
Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"),
Expr::Nested(ast) => write!(f, "({ast})"),
Expr::Value(v) => write!(f, "{v}"),
Expr::IntroducedString { introducer, value } => write!(f, "{introducer} {value}"),
Expr::TypedString { data_type, value } => {
write!(f, "{data_type}")?;
write!(f, " '{}'", &value::escape_single_quote_string(value))

View file

@ -734,6 +734,17 @@ impl<'a> Parser<'a> {
Ok(Expr::CompoundIdentifier(id_parts))
}
}
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
| Token::HexStringLiteral(_)
if w.value.starts_with('_') =>
{
Ok(Expr::IntroducedString {
introducer: w.value,
value: self.parse_introduced_string_value()?,
})
}
_ => Ok(Expr::Identifier(w.to_ident())),
},
}, // End of Token::Word
@ -784,7 +795,6 @@ impl<'a> Parser<'a> {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
}
Token::LParen => {
let expr =
if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) {
@ -4142,6 +4152,23 @@ impl<'a> Parser<'a> {
}
}
fn parse_introduced_string_value(&mut self) -> Result<Value, ParserError> {
let next_token = self.next_token();
let location = next_token.location;
match next_token.token {
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
unexpected => self.expected(
"a string value",
TokenWithLocation {
token: unexpected,
location,
},
),
}
}
/// Parse an unsigned literal integer/long
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
let next_token = self.next_token();

View file

@ -546,12 +546,12 @@ impl<'a> Tokenizer<'a> {
// identifier or keyword
ch if self.dialect.is_identifier_start(ch) => {
chars.next(); // consume the first char
let s = self.tokenize_word(ch, chars);
let word = self.tokenize_word(ch, chars);
// TODO: implement parsing of exponent here
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
let mut inner_state = State {
peekable: s.chars().peekable(),
peekable: word.chars().peekable(),
line: 0,
col: 0,
};
@ -562,7 +562,8 @@ impl<'a> Tokenizer<'a> {
s += s2.as_str();
return Ok(Some(Token::Number(s, false)));
}
Ok(Some(Token::make_word(&s, None)))
Ok(Some(Token::make_word(&word, None)))
}
// single quoted string
'\'' => {

View file

@ -1264,3 +1264,44 @@ fn parse_values() {
mysql().verified_stmt("VALUES ROW(1, true, 'a')");
mysql().verified_stmt("SELECT a, c FROM (VALUES ROW(1, true, 'a'), ROW(2, false, 'b'), ROW(3, false, 'c')) AS t (a, b, c)");
}
#[test]
fn parse_hex_string_introducer() {
assert_eq!(
mysql().verified_stmt("SELECT _latin1 X'4D7953514C'"),
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
introducer: "_latin1".to_string(),
value: Value::HexStringLiteral("4D7953514C".to_string())
})],
from: vec![],
lateral_views: vec![],
selection: None,
group_by: vec![],
cluster_by: vec![],
distribute_by: vec![],
sort_by: vec![],
having: None,
qualify: None,
into: None
}))),
order_by: vec![],
limit: None,
offset: None,
fetch: None,
locks: vec![],
}))
)
}
#[test]
fn parse_string_introducers() {
mysql().verified_stmt("SELECT _binary 'abc'");
mysql().one_statement_parses_to("SELECT _utf8'abc'", "SELECT _utf8 'abc'");
mysql().one_statement_parses_to("SELECT _utf8mb4'abc'", "SELECT _utf8mb4 'abc'");
mysql().verified_stmt("SELECT _binary 'abc', _utf8mb4 'abc'");
}