Only support escape literals for Postgres, Redshift and generic dialect (#1674)

This commit is contained in:
Hans Ott 2025-01-24 09:02:53 +01:00 committed by GitHub
parent 4f7154288e
commit ef072be9e1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 69 additions and 2 deletions

View file

@ -139,4 +139,8 @@ impl Dialect for GenericDialect {
fn supports_user_host_grantee(&self) -> bool {
true
}
fn supports_string_escape_constant(&self) -> bool {
true
}
}

View file

@ -840,6 +840,13 @@ pub trait Dialect: Debug + Any {
fn supports_timestamp_versioning(&self) -> bool {
false
}
/// Returns true if this dialect supports the E'...' syntax for string literals
///
/// Postgres: <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE>
fn supports_string_escape_constant(&self) -> bool {
false
}
}
/// This represents the operators for which precedence must be defined

View file

@ -245,6 +245,10 @@ impl Dialect for PostgreSqlDialect {
fn supports_nested_comments(&self) -> bool {
true
}
fn supports_string_escape_constant(&self) -> bool {
true
}
}
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

View file

@ -109,4 +109,8 @@ impl Dialect for RedshiftSqlDialect {
fn supports_partiql(&self) -> bool {
true
}
fn supports_string_escape_constant(&self) -> bool {
true
}
}

View file

@ -240,13 +240,17 @@ impl TestedDialects {
/// Check that the tokenizer returns the expected tokens for the given SQL.
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
if self.dialects.is_empty() {
panic!("No dialects to test");
}
self.dialects.iter().for_each(|dialect| {
let mut tokenizer = Tokenizer::new(&**dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(expected, tokens);
assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect);
});
}
}

View file

@ -985,7 +985,7 @@ impl<'a> Tokenizer<'a> {
}
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' => {
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
let starting_loc = chars.location();
chars.next(); // consume, to check the next char
match chars.peek() {
@ -3573,4 +3573,48 @@ mod tests {
],
);
}
#[test]
fn test_string_escape_constant_not_supported() {
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
"select e'...'",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::make_word("e", None),
Token::SingleQuotedString("...".to_string()),
],
);
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
"select E'...'",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::make_word("E", None),
Token::SingleQuotedString("...".to_string()),
],
);
}
#[test]
fn test_string_escape_constant_supported() {
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
"select e'\\''",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::EscapedStringLiteral("'".to_string()),
],
);
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
"select E'\\''",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::EscapedStringLiteral("'".to_string()),
],
);
}
}