mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-16 20:20:15 +00:00
Support MySQL Character Set Introducers (#788)
* MySQL Character Set Introducers * Documentation fix * Parsing string introducer from Token::word * Fixed lint * fix clippy --------- Co-authored-by: Maciej Skrzypkowski <maciej.skrzypkowski@satoricyber.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
parent
b31ede7733
commit
488e8a8156
4 changed files with 77 additions and 5 deletions
|
@ -437,6 +437,8 @@ pub enum Expr {
|
|||
Nested(Box<Expr>),
|
||||
/// A literal value, such as string, number, date or NULL
|
||||
Value(Value),
|
||||
/// <https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html>
|
||||
IntroducedString { introducer: String, value: Value },
|
||||
/// A constant of form `<data_type> 'value'`.
|
||||
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
||||
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
||||
|
@ -696,6 +698,7 @@ impl fmt::Display for Expr {
|
|||
Expr::Collate { expr, collation } => write!(f, "{expr} COLLATE {collation}"),
|
||||
Expr::Nested(ast) => write!(f, "({ast})"),
|
||||
Expr::Value(v) => write!(f, "{v}"),
|
||||
Expr::IntroducedString { introducer, value } => write!(f, "{introducer} {value}"),
|
||||
Expr::TypedString { data_type, value } => {
|
||||
write!(f, "{data_type}")?;
|
||||
write!(f, " '{}'", &value::escape_single_quote_string(value))
|
||||
|
|
|
@ -734,6 +734,17 @@ impl<'a> Parser<'a> {
|
|||
Ok(Expr::CompoundIdentifier(id_parts))
|
||||
}
|
||||
}
|
||||
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
|
||||
Token::SingleQuotedString(_)
|
||||
| Token::DoubleQuotedString(_)
|
||||
| Token::HexStringLiteral(_)
|
||||
if w.value.starts_with('_') =>
|
||||
{
|
||||
Ok(Expr::IntroducedString {
|
||||
introducer: w.value,
|
||||
value: self.parse_introduced_string_value()?,
|
||||
})
|
||||
}
|
||||
_ => Ok(Expr::Identifier(w.to_ident())),
|
||||
},
|
||||
}, // End of Token::Word
|
||||
|
@ -784,7 +795,6 @@ impl<'a> Parser<'a> {
|
|||
self.prev_token();
|
||||
Ok(Expr::Value(self.parse_value()?))
|
||||
}
|
||||
|
||||
Token::LParen => {
|
||||
let expr =
|
||||
if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) {
|
||||
|
@ -4142,6 +4152,23 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_introduced_string_value(&mut self) -> Result<Value, ParserError> {
|
||||
let next_token = self.next_token();
|
||||
let location = next_token.location;
|
||||
match next_token.token {
|
||||
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
|
||||
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
|
||||
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
|
||||
unexpected => self.expected(
|
||||
"a string value",
|
||||
TokenWithLocation {
|
||||
token: unexpected,
|
||||
location,
|
||||
},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an unsigned literal integer/long
|
||||
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
|
||||
let next_token = self.next_token();
|
||||
|
|
|
@ -546,12 +546,12 @@ impl<'a> Tokenizer<'a> {
|
|||
// identifier or keyword
|
||||
ch if self.dialect.is_identifier_start(ch) => {
|
||||
chars.next(); // consume the first char
|
||||
let s = self.tokenize_word(ch, chars);
|
||||
let word = self.tokenize_word(ch, chars);
|
||||
|
||||
// TODO: implement parsing of exponent here
|
||||
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
||||
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
||||
let mut inner_state = State {
|
||||
peekable: s.chars().peekable(),
|
||||
peekable: word.chars().peekable(),
|
||||
line: 0,
|
||||
col: 0,
|
||||
};
|
||||
|
@ -562,7 +562,8 @@ impl<'a> Tokenizer<'a> {
|
|||
s += s2.as_str();
|
||||
return Ok(Some(Token::Number(s, false)));
|
||||
}
|
||||
Ok(Some(Token::make_word(&s, None)))
|
||||
|
||||
Ok(Some(Token::make_word(&word, None)))
|
||||
}
|
||||
// single quoted string
|
||||
'\'' => {
|
||||
|
|
|
@ -1264,3 +1264,44 @@ fn parse_values() {
|
|||
mysql().verified_stmt("VALUES ROW(1, true, 'a')");
|
||||
mysql().verified_stmt("SELECT a, c FROM (VALUES ROW(1, true, 'a'), ROW(2, false, 'b'), ROW(3, false, 'c')) AS t (a, b, c)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_hex_string_introducer() {
|
||||
assert_eq!(
|
||||
mysql().verified_stmt("SELECT _latin1 X'4D7953514C'"),
|
||||
Statement::Query(Box::new(Query {
|
||||
with: None,
|
||||
body: Box::new(SetExpr::Select(Box::new(Select {
|
||||
distinct: false,
|
||||
top: None,
|
||||
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
|
||||
introducer: "_latin1".to_string(),
|
||||
value: Value::HexStringLiteral("4D7953514C".to_string())
|
||||
})],
|
||||
from: vec![],
|
||||
lateral_views: vec![],
|
||||
selection: None,
|
||||
group_by: vec![],
|
||||
cluster_by: vec![],
|
||||
distribute_by: vec![],
|
||||
sort_by: vec![],
|
||||
having: None,
|
||||
qualify: None,
|
||||
into: None
|
||||
}))),
|
||||
order_by: vec![],
|
||||
limit: None,
|
||||
offset: None,
|
||||
fetch: None,
|
||||
locks: vec![],
|
||||
}))
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_string_introducers() {
|
||||
mysql().verified_stmt("SELECT _binary 'abc'");
|
||||
mysql().one_statement_parses_to("SELECT _utf8'abc'", "SELECT _utf8 'abc'");
|
||||
mysql().one_statement_parses_to("SELECT _utf8mb4'abc'", "SELECT _utf8mb4 'abc'");
|
||||
mysql().verified_stmt("SELECT _binary 'abc', _utf8mb4 'abc'");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue