mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Ignore escaped LIKE wildcards in MySQL (#1735)
This commit is contained in:
parent
ed416548dc
commit
a629ddf89b
5 changed files with 89 additions and 15 deletions
|
@ -201,6 +201,33 @@ pub trait Dialect: Debug + Any {
|
|||
false
|
||||
}
|
||||
|
||||
/// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _).
|
||||
///
|
||||
/// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped
|
||||
/// so they can be used in LIKE patterns without double-escaping (as is necessary in other
|
||||
/// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules
|
||||
/// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero
|
||||
/// byte), and the default if an escaped character does not have a specific escaping rule is to
|
||||
/// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case
|
||||
/// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`.
|
||||
/// This applies to all string literals though, not just those used in LIKE patterns.
|
||||
///
|
||||
/// ```text
|
||||
/// mysql> select '\_', hex('\\'), hex('_'), hex('\_');
|
||||
/// +----+-----------+----------+-----------+
|
||||
/// | \_ | hex('\\') | hex('_') | hex('\_') |
|
||||
/// +----+-----------+----------+-----------+
|
||||
/// | \_ | 5C | 5F | 5C5F |
|
||||
/// +----+-----------+----------+-----------+
|
||||
/// 1 row in set (0.00 sec)
|
||||
/// ```
|
||||
///
|
||||
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
|
||||
/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes
|
||||
fn ignores_wildcard_escapes(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Determine if the dialect supports string literals with `U&` prefix.
|
||||
/// This is used to specify Unicode code points in string literals.
|
||||
/// For example, in PostgreSQL, the following is a valid string literal:
|
||||
|
|
|
@ -62,6 +62,10 @@ impl Dialect for MySqlDialect {
|
|||
true
|
||||
}
|
||||
|
||||
fn ignores_wildcard_escapes(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_numeric_prefix(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
|
|
@ -2011,8 +2011,13 @@ impl<'a> Tokenizer<'a> {
|
|||
num_consecutive_quotes = 0;
|
||||
|
||||
if let Some(next) = chars.peek() {
|
||||
if !self.unescape {
|
||||
// In no-escape mode, the given query has to be saved completely including backslashes.
|
||||
if !self.unescape
|
||||
|| (self.dialect.ignores_wildcard_escapes()
|
||||
&& (*next == '%' || *next == '_'))
|
||||
{
|
||||
// In no-escape mode, the given query has to be saved completely
|
||||
// including backslashes. Similarly, with ignore_like_wildcard_escapes,
|
||||
// the backslash is not stripped.
|
||||
s.push(ch);
|
||||
s.push(*next);
|
||||
chars.next(); // consume next
|
||||
|
@ -3585,6 +3590,9 @@ mod tests {
|
|||
(r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
|
||||
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
|
||||
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
|
||||
(r#"'\q'"#, r#"\q"#, r#"q"#),
|
||||
(r#"'\%\_'"#, r#"\%\_"#, r#"%_"#),
|
||||
(r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#),
|
||||
] {
|
||||
let tokens = Tokenizer::new(&dialect, sql)
|
||||
.with_unescape(false)
|
||||
|
@ -3618,6 +3626,16 @@ mod tests {
|
|||
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
// MySQL special case for LIKE escapes
|
||||
for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] {
|
||||
let dialect = MySqlDialect {};
|
||||
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
|
||||
|
||||
let expected = vec![Token::SingleQuotedString(expected.to_string())];
|
||||
|
||||
compare(expected, tokens);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -10387,15 +10387,8 @@ fn parse_with_recursion_limit() {
|
|||
|
||||
#[test]
|
||||
fn parse_escaped_string_with_unescape() {
|
||||
fn assert_mysql_query_value(sql: &str, quoted: &str) {
|
||||
let stmt = TestedDialects::new(vec![
|
||||
Box::new(MySqlDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(SnowflakeDialect {}),
|
||||
])
|
||||
.one_statement_parses_to(sql, "");
|
||||
|
||||
match stmt {
|
||||
fn assert_mysql_query_value(dialects: &TestedDialects, sql: &str, quoted: &str) {
|
||||
match dialects.one_statement_parses_to(sql, "") {
|
||||
Statement::Query(query) => match *query.body {
|
||||
SetExpr::Select(value) => {
|
||||
let expr = expr_from_projection(only(&value.projection));
|
||||
|
@ -10411,17 +10404,38 @@ fn parse_escaped_string_with_unescape() {
|
|||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
let escaping_dialects =
|
||||
&all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape());
|
||||
let no_wildcard_exception = &all_dialects_where(|dialect| {
|
||||
dialect.supports_string_literal_backslash_escape() && !dialect.ignores_wildcard_escapes()
|
||||
});
|
||||
let with_wildcard_exception = &all_dialects_where(|dialect| {
|
||||
dialect.supports_string_literal_backslash_escape() && dialect.ignores_wildcard_escapes()
|
||||
});
|
||||
|
||||
let sql = r"SELECT 'I\'m fine'";
|
||||
assert_mysql_query_value(sql, "I'm fine");
|
||||
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
|
||||
|
||||
let sql = r#"SELECT 'I''m fine'"#;
|
||||
assert_mysql_query_value(sql, "I'm fine");
|
||||
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
|
||||
|
||||
let sql = r#"SELECT 'I\"m fine'"#;
|
||||
assert_mysql_query_value(sql, "I\"m fine");
|
||||
assert_mysql_query_value(escaping_dialects, sql, "I\"m fine");
|
||||
|
||||
let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '";
|
||||
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ");
|
||||
assert_mysql_query_value(
|
||||
no_wildcard_exception,
|
||||
sql,
|
||||
"Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ",
|
||||
);
|
||||
|
||||
// check MySQL doesn't remove backslash from escaped LIKE wildcards
|
||||
assert_mysql_query_value(
|
||||
with_wildcard_exception,
|
||||
sql,
|
||||
"Testing: \0 \\ \\% \\_ \u{8} \n \r \t \u{1a} \u{7} h ",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -2627,6 +2627,17 @@ fn parse_rlike_and_regexp() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_like_with_escape() {
|
||||
// verify backslash is not stripped for escaped wildcards
|
||||
mysql().verified_only_select(r#"SELECT 'a\%c' LIKE 'a\%c'"#);
|
||||
mysql().verified_only_select(r#"SELECT 'a\_c' LIKE 'a\_c'"#);
|
||||
mysql().verified_only_select(r#"SELECT '%\_\%' LIKE '%\_\%'"#);
|
||||
mysql().verified_only_select(r#"SELECT '\_\%' LIKE CONCAT('\_', '\%')"#);
|
||||
mysql().verified_only_select(r#"SELECT 'a%c' LIKE 'a$%c' ESCAPE '$'"#);
|
||||
mysql().verified_only_select(r#"SELECT 'a_c' LIKE 'a#_c' ESCAPE '#'"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_kill() {
|
||||
let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue