feat: support raw string literal of BigQuery (#812)

* add tests

* feat: parse raw literal of bq

* merge double quoted & single quoted to raw string literal

* Update src/ast/value.rs

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
Y Togami 2023-03-02 04:11:42 +09:00 committed by GitHub
parent 70917a59ed
commit 58de3c1222
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 0 deletions

View file

@ -62,6 +62,8 @@ pub enum Token {
SingleQuotedByteStringLiteral(String),
/// Byte string literal: i.e: b"string" or B"string"
DoubleQuotedByteStringLiteral(String),
/// Raw string literal: i.e: r'string' or R'string' or r"string" or R"string"
RawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
@ -195,6 +197,7 @@ impl fmt::Display for Token {
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
Token::RawStringLiteral(ref s) => write!(f, "R'{s}'"),
Token::Comma => f.write_str(","),
Token::Whitespace(ws) => write!(f, "{ws}"),
Token::DoubleEq => f.write_str("=="),
@ -518,6 +521,25 @@ impl<'a> Tokenizer<'a> {
}
}
}
// BigQuery uses r or R for raw string literal
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
chars.next(); // consume
match chars.peek() {
Some('\'') => {
let s = self.tokenize_quoted_string(chars, '\'')?;
Ok(Some(Token::RawStringLiteral(s)))
}
Some('\"') => {
let s = self.tokenize_quoted_string(chars, '\"')?;
Ok(Some(Token::RawStringLiteral(s)))
}
_ => {
// regular identifier starting with an "r" or "R"
let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
// Redshift uses lower case n for national string literal
n @ 'N' | n @ 'n' => {
chars.next(); // consume, to check the next char