Only support escape literals for Postgres, Redshift and generic dialect (#1674)

2025-07-07 17:04:59 +00:00 · 2025-01-24 09:02:53 +01:00 · 2025-01-24 09:02:53 +01:00 · ef072be9e1
commit ef072be9e1
parent 4f7154288e
6 changed files with 69 additions and 2 deletions
--- a/src/dialect/generic.rs
+++ b/src/dialect/generic.rs
@ -139,4 +139,8 @@ impl Dialect for GenericDialect {
    fn supports_user_host_grantee(&self) -> bool {
        true
    }
+
+    fn supports_string_escape_constant(&self) -> bool {
+        true
+    }
 }
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@ -840,6 +840,13 @@ pub trait Dialect: Debug + Any {
    fn supports_timestamp_versioning(&self) -> bool {
        false
    }
+
+    /// Returns true if this dialect supports the E'...' syntax for string literals
+    ///
+    /// Postgres: <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE>
+    fn supports_string_escape_constant(&self) -> bool {
+        false
+    }
 }

 /// This represents the operators for which precedence must be defined
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@ -245,6 +245,10 @@ impl Dialect for PostgreSqlDialect {
    fn supports_nested_comments(&self) -> bool {
        true
    }
+
+    fn supports_string_escape_constant(&self) -> bool {
+        true
+    }
 }

 pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
--- a/src/dialect/redshift.rs
+++ b/src/dialect/redshift.rs
@ -109,4 +109,8 @@ impl Dialect for RedshiftSqlDialect {
    fn supports_partiql(&self) -> bool {
        true
    }
+
+    fn supports_string_escape_constant(&self) -> bool {
+        true
+    }
 }
--- a/src/test_utils.rs
+++ b/src/test_utils.rs
@ -240,13 +240,17 @@ impl TestedDialects {

    /// Check that the tokenizer returns the expected tokens for the given SQL.
    pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
+        if self.dialects.is_empty() {
+            panic!("No dialects to test");
+        }
+
        self.dialects.iter().for_each(|dialect| {
            let mut tokenizer = Tokenizer::new(&**dialect, sql);
            if let Some(options) = &self.options {
                tokenizer = tokenizer.with_unescape(options.unescape);
            }
            let tokens = tokenizer.tokenize().unwrap();
-            assert_eq!(expected, tokens);
+            assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect);
        });
    }
 }
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -985,7 +985,7 @@ impl<'a> Tokenizer<'a> {
                    }
                }
                // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
-                x @ 'e' | x @ 'E' => {
+                x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
                    let starting_loc = chars.location();
                    chars.next(); // consume, to check the next char
                    match chars.peek() {
@ -3573,4 +3573,48 @@ mod tests {
                ],
            );
    }
+
+    #[test]
+    fn test_string_escape_constant_not_supported() {
+        all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
+            "select e'...'",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("e", None),
+                Token::SingleQuotedString("...".to_string()),
+            ],
+        );
+
+        all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
+            "select E'...'",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("E", None),
+                Token::SingleQuotedString("...".to_string()),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_string_escape_constant_supported() {
+        all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
+            "select e'\\''",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::EscapedStringLiteral("'".to_string()),
+            ],
+        );
+
+        all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
+            "select E'\\''",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::EscapedStringLiteral("'".to_string()),
+            ],
+        );
+    }
 }