Support national string literals (N'...')

Widely used in MS SQL and specified in ANSI.
This commit is contained in:
Nickolay Ponomarev 2019-02-03 05:49:15 +03:00
parent b9f4b503b6
commit 35dd9342e2
4 changed files with 76 additions and 33 deletions

View file

@ -13,6 +13,8 @@ pub enum Value {
Uuid(Uuid),
/// 'string value'
SingleQuotedString(String),
/// N'string value'
NationalStringLiteral(String),
/// Boolean value true or false,
Boolean(bool),
/// Date value
@ -34,6 +36,7 @@ impl ToString for Value {
Value::Double(v) => v.to_string(),
Value::Uuid(v) => v.to_string(),
Value::SingleQuotedString(v) => format!("'{}'", v),
Value::NationalStringLiteral(v) => format!("N'{}'", v),
Value::Boolean(v) => v.to_string(),
Value::Date(v) => v.to_string(),
Value::Time(v) => v.to_string(),

View file

@ -191,7 +191,9 @@ impl Parser {
},
},
Token::Mult => Ok(ASTNode::SQLWildcard),
Token::Number(_) | Token::SingleQuotedString(_) => {
Token::Number(_)
| Token::SingleQuotedString(_)
| Token::NationalStringLiteral(_) => {
self.prev_token();
self.parse_sql_value()
}
@ -205,7 +207,7 @@ impl Parser {
Ok(expr)
}
_ => parser_err!(format!(
"Prefix parser expected a keyword but found {:?}",
"Did not expect {:?} at the beginning of an expression",
t
)),
},
@ -790,7 +792,10 @@ impl Parser {
Token::SingleQuotedString(ref s) => {
Ok(Value::SingleQuotedString(s.to_string()))
}
_ => parser_err!(format!("Unsupported value: {:?}", self.peek_token())),
Token::NationalStringLiteral(ref s) => {
Ok(Value::NationalStringLiteral(s.to_string()))
}
_ => parser_err!(format!("Unsupported value: {:?}", t)),
}
}
None => parser_err!("Expecting a value, but found EOF"),

View file

@ -35,6 +35,8 @@ pub enum Token {
Char(char),
/// Single quoted string: i.e: 'string'
SingleQuotedString(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// Comma
Comma,
/// Whitespace (space, tab, etc)
@ -94,6 +96,7 @@ impl ToString for Token {
Token::Number(ref n) => n.to_string(),
Token::Char(ref c) => c.to_string(),
Token::SingleQuotedString(ref s) => format!("'{}'", s),
Token::NationalStringLiteral(ref s) => format!("N'{}'", s),
Token::Comma => ",".to_string(),
Token::Whitespace(ws) => ws.to_string(),
Token::Eq => "=".to_string(),
@ -265,40 +268,30 @@ impl<'a> Tokenizer<'a> {
chars.next();
Ok(Some(Token::Whitespace(Whitespace::Newline)))
}
// identifier or keyword
ch if self.dialect.is_identifier_start(ch) => {
let mut s = String::new();
chars.next(); // consume
s.push(ch);
while let Some(&ch) = chars.peek() {
if self.dialect.is_identifier_part(ch) {
chars.next(); // consume
s.push(ch);
} else {
break;
'N' => {
chars.next(); // consume, to check the next char
match chars.peek() {
Some('\'') => {
// N'...' - a <national character string literal>
let s = self.tokenize_single_quoted_string(chars);
Ok(Some(Token::NationalStringLiteral(s)))
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word('N', chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
// identifier or keyword
ch if self.dialect.is_identifier_start(ch) => {
chars.next(); // consume the first char
let s = self.tokenize_word(ch, chars);
Ok(Some(Token::make_word(&s, None)))
}
// string
'\'' => {
//TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote
let mut s = String::new();
chars.next(); // consume
while let Some(&ch) = chars.peek() {
match ch {
'\'' => {
chars.next(); // consume
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
}
}
let s = self.tokenize_single_quoted_string(chars);
Ok(Some(Token::SingleQuotedString(s)))
}
// delimited (quoted) identifier
@ -403,6 +396,44 @@ impl<'a> Tokenizer<'a> {
}
}
/// Tokenize an identifier or keyword, after the first char is already consumed.
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars>) -> String {
let mut s = String::new();
s.push(first_char);
while let Some(&ch) = chars.peek() {
if self.dialect.is_identifier_part(ch) {
chars.next(); // consume
s.push(ch);
} else {
break;
}
}
s
}
/// Read a single quoted string, starting with the opening quote.
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars>) -> String {
//TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote
//TODO: handle 'string' <white space> 'string continuation'
let mut s = String::new();
chars.next(); // consume the opening quote
while let Some(&ch) = chars.peek() {
match ch {
'\'' => {
chars.next(); // consume
break;
}
_ => {
chars.next(); // consume
s.push(ch);
}
}
}
s
}
fn consume_and_return(
&self,
chars: &mut Peekable<Chars>,

View file

@ -368,13 +368,17 @@ fn parse_aggregate_with_group_by() {
#[test]
fn parse_literal_string() {
let sql = "SELECT 'one'";
let sql = "SELECT 'one', N'national string'";
let select = verified_only_select(sql);
assert_eq!(1, select.projection.len());
assert_eq!(2, select.projection.len());
assert_eq!(
&ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&ASTNode::SQLValue(Value::NationalStringLiteral("national string".to_string())),
expr_from_projection(&select.projection[1])
);
}
#[test]