Ignore escaped LIKE wildcards in MySQL (#1735)

This commit is contained in:
Michael Victor Zink 2025-02-28 22:07:39 -08:00 committed by GitHub
parent ed416548dc
commit a629ddf89b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 89 additions and 15 deletions

View file

@ -201,6 +201,33 @@ pub trait Dialect: Debug + Any {
false
}
/// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _).
///
/// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped
/// so they can be used in LIKE patterns without double-escaping (as is necessary in other
/// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules
/// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero
/// byte), and the default if an escaped character does not have a specific escaping rule is to
/// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case
/// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`.
/// This applies to all string literals though, not just those used in LIKE patterns.
///
/// ```text
/// mysql> select '\_', hex('\\'), hex('_'), hex('\_');
/// +----+-----------+----------+-----------+
/// | \_ | hex('\\') | hex('_') | hex('\_') |
/// +----+-----------+----------+-----------+
/// | \_ | 5C | 5F | 5C5F |
/// +----+-----------+----------+-----------+
/// 1 row in set (0.00 sec)
/// ```
///
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes
fn ignores_wildcard_escapes(&self) -> bool {
false
}
/// Determine if the dialect supports string literals with `U&` prefix.
/// This is used to specify Unicode code points in string literals.
/// For example, in PostgreSQL, the following is a valid string literal:

View file

@ -62,6 +62,10 @@ impl Dialect for MySqlDialect {
true
}
fn ignores_wildcard_escapes(&self) -> bool {
true
}
fn supports_numeric_prefix(&self) -> bool {
true
}

View file

@ -2011,8 +2011,13 @@ impl<'a> Tokenizer<'a> {
num_consecutive_quotes = 0;
if let Some(next) = chars.peek() {
if !self.unescape {
// In no-escape mode, the given query has to be saved completely including backslashes.
if !self.unescape
|| (self.dialect.ignores_wildcard_escapes()
&& (*next == '%' || *next == '_'))
{
// In no-escape mode, the given query has to be saved completely
// including backslashes. Similarly, with ignore_like_wildcard_escapes,
// the backslash is not stripped.
s.push(ch);
s.push(*next);
chars.next(); // consume next
@ -3585,6 +3590,9 @@ mod tests {
(r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
(r#"'\q'"#, r#"\q"#, r#"q"#),
(r#"'\%\_'"#, r#"\%\_"#, r#"%_"#),
(r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#),
] {
let tokens = Tokenizer::new(&dialect, sql)
.with_unescape(false)
@ -3618,6 +3626,16 @@ mod tests {
compare(expected, tokens);
}
// MySQL special case for LIKE escapes
for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] {
let dialect = MySqlDialect {};
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
let expected = vec![Token::SingleQuotedString(expected.to_string())];
compare(expected, tokens);
}
}
#[test]

View file

@ -10387,15 +10387,8 @@ fn parse_with_recursion_limit() {
#[test]
fn parse_escaped_string_with_unescape() {
fn assert_mysql_query_value(sql: &str, quoted: &str) {
let stmt = TestedDialects::new(vec![
Box::new(MySqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SnowflakeDialect {}),
])
.one_statement_parses_to(sql, "");
match stmt {
fn assert_mysql_query_value(dialects: &TestedDialects, sql: &str, quoted: &str) {
match dialects.one_statement_parses_to(sql, "") {
Statement::Query(query) => match *query.body {
SetExpr::Select(value) => {
let expr = expr_from_projection(only(&value.projection));
@ -10411,17 +10404,38 @@ fn parse_escaped_string_with_unescape() {
_ => unreachable!(),
};
}
let escaping_dialects =
&all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape());
let no_wildcard_exception = &all_dialects_where(|dialect| {
dialect.supports_string_literal_backslash_escape() && !dialect.ignores_wildcard_escapes()
});
let with_wildcard_exception = &all_dialects_where(|dialect| {
dialect.supports_string_literal_backslash_escape() && dialect.ignores_wildcard_escapes()
});
let sql = r"SELECT 'I\'m fine'";
assert_mysql_query_value(sql, "I'm fine");
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
let sql = r#"SELECT 'I''m fine'"#;
assert_mysql_query_value(sql, "I'm fine");
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
let sql = r#"SELECT 'I\"m fine'"#;
assert_mysql_query_value(sql, "I\"m fine");
assert_mysql_query_value(escaping_dialects, sql, "I\"m fine");
let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '";
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ");
assert_mysql_query_value(
no_wildcard_exception,
sql,
"Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ",
);
// check MySQL doesn't remove backslash from escaped LIKE wildcards
assert_mysql_query_value(
with_wildcard_exception,
sql,
"Testing: \0 \\ \\% \\_ \u{8} \n \r \t \u{1a} \u{7} h ",
);
}
#[test]

View file

@ -2627,6 +2627,17 @@ fn parse_rlike_and_regexp() {
}
}
#[test]
fn parse_like_with_escape() {
// verify backslash is not stripped for escaped wildcards
mysql().verified_only_select(r#"SELECT 'a\%c' LIKE 'a\%c'"#);
mysql().verified_only_select(r#"SELECT 'a\_c' LIKE 'a\_c'"#);
mysql().verified_only_select(r#"SELECT '%\_\%' LIKE '%\_\%'"#);
mysql().verified_only_select(r#"SELECT '\_\%' LIKE CONCAT('\_', '\%')"#);
mysql().verified_only_select(r#"SELECT 'a%c' LIKE 'a$%c' ESCAPE '$'"#);
mysql().verified_only_select(r#"SELECT 'a_c' LIKE 'a#_c' ESCAPE '#'"#);
}
#[test]
fn parse_kill() {
let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5");