mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-17 04:30:16 +00:00
Support MySQL Character Set Introducers (#788)
* MySQL Character Set Introducers * Documentation fix * Parsing string introducer from Token::word * Fixed lint * fix clippy --------- Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
b31ede7733
commit
488e8a8156
4 changed files with 77 additions and 5 deletions
|
@ -437,6 +437,8 @@ pub enum Expr {
|
||||||
Nested(Box<Expr>),
|
Nested(Box<Expr>),
|
||||||
/// A literal value, such as string, number, date or NULL
|
/// A literal value, such as string, number, date or NULL
|
||||||
Value(Value),
|
Value(Value),
|
||||||
|
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
|
||||||
|
IntroducedString { introducer: String, value: Value },
|
||||||
/// A constant of form `<data_type> 'value'`.
|
/// A constant of form `<data_type> 'value'`.
|
||||||
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
||||||
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
||||||
|
@ -696,6 +698,7 @@ impl fmt::Display for Expr {
|
||||||
Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"),
|
Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"),
|
||||||
Expr::Nested(ast) => write!(f, "({ast})"),
|
Expr::Nested(ast) => write!(f, "({ast})"),
|
||||||
Expr::Value(v) => write!(f, "{v}"),
|
Expr::Value(v) => write!(f, "{v}"),
|
||||||
|
Expr::IntroducedString { introducer, value } => write!(f, "{introducer} {value}"),
|
||||||
Expr::TypedString { data_type, value } => {
|
Expr::TypedString { data_type, value } => {
|
||||||
write!(f, "{data_type}")?;
|
write!(f, "{data_type}")?;
|
||||||
write!(f, " '{}'", &value::escape_single_quote_string(value))
|
write!(f, " '{}'", &value::escape_single_quote_string(value))
|
||||||
|
|
|
@ -734,6 +734,17 @@ impl<'a> Parser<'a> {
|
||||||
Ok(Expr::CompoundIdentifier(id_parts))
|
Ok(Expr::CompoundIdentifier(id_parts))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
|
||||||
|
Token::SingleQuotedString(_)
|
||||||
|
| Token::DoubleQuotedString(_)
|
||||||
|
| Token::HexStringLiteral(_)
|
||||||
|
if w.value.starts_with('_') =>
|
||||||
|
{
|
||||||
|
Ok(Expr::IntroducedString {
|
||||||
|
introducer: w.value,
|
||||||
|
value: self.parse_introduced_string_value()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
_ => Ok(Expr::Identifier(w.to_ident())),
|
_ => Ok(Expr::Identifier(w.to_ident())),
|
||||||
},
|
},
|
||||||
}, // End of Token::Word
|
}, // End of Token::Word
|
||||||
|
@ -784,7 +795,6 @@ impl<'a> Parser<'a> {
|
||||||
self.prev_token();
|
self.prev_token();
|
||||||
Ok(Expr::Value(self.parse_value()?))
|
Ok(Expr::Value(self.parse_value()?))
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::LParen => {
|
Token::LParen => {
|
||||||
let expr =
|
let expr =
|
||||||
if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) {
|
if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) {
|
||||||
|
@ -4142,6 +4152,23 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_introduced_string_value(&mut self) -> Result<Value, ParserError> {
|
||||||
|
let next_token = self.next_token();
|
||||||
|
let location = next_token.location;
|
||||||
|
match next_token.token {
|
||||||
|
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
||||||
|
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
||||||
|
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
||||||
|
unexpected => self.expected(
|
||||||
|
"a string value",
|
||||||
|
TokenWithLocation {
|
||||||
|
token: unexpected,
|
||||||
|
location,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse an unsigned literal integer/long
|
/// Parse an unsigned literal integer/long
|
||||||
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
|
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
|
||||||
let next_token = self.next_token();
|
let next_token = self.next_token();
|
||||||
|
|
|
@ -546,12 +546,12 @@ impl<'a> Tokenizer<'a> {
|
||||||
// identifier or keyword
|
// identifier or keyword
|
||||||
ch if self.dialect.is_identifier_start(ch) => {
|
ch if self.dialect.is_identifier_start(ch) => {
|
||||||
chars.next(); // consume the first char
|
chars.next(); // consume the first char
|
||||||
let s = self.tokenize_word(ch, chars);
|
let word = self.tokenize_word(ch, chars);
|
||||||
|
|
||||||
// TODO: implement parsing of exponent here
|
// TODO: implement parsing of exponent here
|
||||||
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
||||||
let mut inner_state = State {
|
let mut inner_state = State {
|
||||||
peekable: s.chars().peekable(),
|
peekable: word.chars().peekable(),
|
||||||
line: 0,
|
line: 0,
|
||||||
col: 0,
|
col: 0,
|
||||||
};
|
};
|
||||||
|
@ -562,7 +562,8 @@ impl<'a> Tokenizer<'a> {
|
||||||
s += s2.as_str();
|
s += s2.as_str();
|
||||||
return Ok(Some(Token::Number(s, false)));
|
return Ok(Some(Token::Number(s, false)));
|
||||||
}
|
}
|
||||||
Ok(Some(Token::make_word(&s, None)))
|
|
||||||
|
Ok(Some(Token::make_word(&word, None)))
|
||||||
}
|
}
|
||||||
// single quoted string
|
// single quoted string
|
||||||
'\'' => {
|
'\'' => {
|
||||||
|
|
|
@ -1264,3 +1264,44 @@ fn parse_values() {
|
||||||
mysql().verified_stmt("VALUES ROW(1, true, 'a')");
|
mysql().verified_stmt("VALUES ROW(1, true, 'a')");
|
||||||
mysql().verified_stmt("SELECT a, c FROM (VALUES ROW(1, true, 'a'), ROW(2, false, 'b'), ROW(3, false, 'c')) AS t (a, b, c)");
|
mysql().verified_stmt("SELECT a, c FROM (VALUES ROW(1, true, 'a'), ROW(2, false, 'b'), ROW(3, false, 'c')) AS t (a, b, c)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_hex_string_introducer() {
|
||||||
|
assert_eq!(
|
||||||
|
mysql().verified_stmt("SELECT _latin1 X'4D7953514C'"),
|
||||||
|
Statement::Query(Box::new(Query {
|
||||||
|
with: None,
|
||||||
|
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||||
|
distinct: false,
|
||||||
|
top: None,
|
||||||
|
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
|
||||||
|
introducer: "_latin1".to_string(),
|
||||||
|
value: Value::HexStringLiteral("4D7953514C".to_string())
|
||||||
|
})],
|
||||||
|
from: vec![],
|
||||||
|
lateral_views: vec![],
|
||||||
|
selection: None,
|
||||||
|
group_by: vec![],
|
||||||
|
cluster_by: vec![],
|
||||||
|
distribute_by: vec![],
|
||||||
|
sort_by: vec![],
|
||||||
|
having: None,
|
||||||
|
qualify: None,
|
||||||
|
into: None
|
||||||
|
}))),
|
||||||
|
order_by: vec![],
|
||||||
|
limit: None,
|
||||||
|
offset: None,
|
||||||
|
fetch: None,
|
||||||
|
locks: vec![],
|
||||||
|
}))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_string_introducers() {
|
||||||
|
mysql().verified_stmt("SELECT _binary 'abc'");
|
||||||
|
mysql().one_statement_parses_to("SELECT _utf8'abc'", "SELECT _utf8 'abc'");
|
||||||
|
mysql().one_statement_parses_to("SELECT _utf8mb4'abc'", "SELECT _utf8mb4 'abc'");
|
||||||
|
mysql().verified_stmt("SELECT _binary 'abc', _utf8mb4 'abc'");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue