National strings: check if dialect supports backslash escape (#1672)

This commit is contained in:
Hans Ott 2025-01-23 17:16:53 +01:00 committed by GitHub
parent c7c0de6551
commit 4f7154288e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 44 additions and 2 deletions

View file

@ -33,7 +33,7 @@ use core::fmt::Debug;
use crate::dialect::*;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Tokenizer;
use crate::tokenizer::{Token, Tokenizer};
use crate::{ast::*, parser::ParserOptions};
#[cfg(test)]
@ -237,6 +237,18 @@ impl TestedDialects {
pub fn verified_expr(&self, sql: &str) -> Expr {
self.expr_parses_to(sql, sql)
}
/// Check that the tokenizer returns the expected tokens for the given SQL.
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
self.dialects.iter().for_each(|dialect| {
let mut tokenizer = Tokenizer::new(&**dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(expected, tokens);
});
}
}
/// Returns all available dialects.

View file

@ -971,7 +971,10 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('\'') => {
// N'...' - a <national character string literal>
let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
let backslash_escape =
self.dialect.supports_string_literal_backslash_escape();
let s =
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
_ => {
@ -2155,6 +2158,7 @@ mod tests {
use crate::dialect::{
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
};
use crate::test_utils::all_dialects_where;
use core::fmt::Debug;
#[test]
@ -3543,4 +3547,30 @@ mod tests {
];
compare(expected, tokens);
}
#[test]
fn test_national_strings_backslash_escape_not_supported() {
all_dialects_where(|dialect| !dialect.supports_string_literal_backslash_escape())
.tokenizes_to(
"select n'''''\\'",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::NationalStringLiteral("''\\".to_string()),
],
);
}
#[test]
fn test_national_strings_backslash_escape_supported() {
all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape())
.tokenizes_to(
"select n'''''\\''",
vec![
Token::make_keyword("select"),
Token::Whitespace(Whitespace::Space),
Token::NationalStringLiteral("'''".to_string()),
],
);
}
}