From ef072be9e1b1ecbf8032bd2040131a9d5b00de5d Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Fri, 24 Jan 2025 09:02:53 +0100 Subject: [PATCH] Only support escape literals for Postgres, Redshift and generic dialect (#1674) --- src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 7 ++++++ src/dialect/postgresql.rs | 4 ++++ src/dialect/redshift.rs | 4 ++++ src/test_utils.rs | 6 ++++- src/tokenizer.rs | 46 ++++++++++++++++++++++++++++++++++++++- 6 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index d696861b..4021b575 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -139,4 +139,8 @@ impl Dialect for GenericDialect { fn supports_user_host_grantee(&self) -> bool { true } + + fn supports_string_escape_constant(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 119bb3cf..79260326 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -840,6 +840,13 @@ pub trait Dialect: Debug + Any { fn supports_timestamp_versioning(&self) -> bool { false } + + /// Returns true if this dialect supports the E'...' syntax for string literals + /// + /// Postgres: + fn supports_string_escape_constant(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 170b0a7c..d4f2a032 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -245,6 +245,10 @@ impl Dialect for PostgreSqlDialect { fn supports_nested_comments(&self) -> bool { true } + + fn supports_string_escape_constant(&self) -> bool { + true + } } pub fn parse_create(parser: &mut Parser) -> Option> { diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 55405ba5..a4522bbf 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -109,4 +109,8 @@ impl Dialect for RedshiftSqlDialect { fn supports_partiql(&self) -> bool { true } + + fn supports_string_escape_constant(&self) -> bool { + true + } } diff --git a/src/test_utils.rs b/src/test_utils.rs index 51e4fd74..1c322f65 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -240,13 +240,17 @@ impl TestedDialects { /// Check that the tokenizer returns the expected tokens for the given SQL. pub fn tokenizes_to(&self, sql: &str, expected: Vec) { + if self.dialects.is_empty() { + panic!("No dialects to test"); + } + self.dialects.iter().for_each(|dialect| { let mut tokenizer = Tokenizer::new(&**dialect, sql); if let Some(options) = &self.options { tokenizer = tokenizer.with_unescape(options.unescape); } let tokens = tokenizer.tokenize().unwrap(); - assert_eq!(expected, tokens); + assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect); }); } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 08e233b6..309f09d8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -985,7 +985,7 @@ impl<'a> Tokenizer<'a> { } } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. - x @ 'e' | x @ 'E' => { + x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); chars.next(); // consume, to check the next char match chars.peek() { @@ -3573,4 +3573,48 @@ mod tests { ], ); } + + #[test] + fn test_string_escape_constant_not_supported() { + all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to( + "select e'...'", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("e", None), + Token::SingleQuotedString("...".to_string()), + ], + ); + + all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to( + "select E'...'", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("E", None), + Token::SingleQuotedString("...".to_string()), + ], + ); + } + + #[test] + fn test_string_escape_constant_supported() { + all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to( + "select e'\\''", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::EscapedStringLiteral("'".to_string()), + ], + ); + + all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to( + "select E'\\''", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::EscapedStringLiteral("'".to_string()), + ], + ); + } }