From 35dd9342e2fa318be60b2dc5fb29d43eeb07680d Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 05:49:15 +0300 Subject: [PATCH] Support national string literals (N'...') Widely used in MS SQL and specified in ANSI. --- src/sqlast/value.rs | 3 ++ src/sqlparser.rs | 11 +++-- src/sqltokenizer.rs | 87 ++++++++++++++++++++++++++------------ tests/sqlparser_generic.rs | 8 +++- 4 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/sqlast/value.rs b/src/sqlast/value.rs index ec11b17e..a061080a 100644 --- a/src/sqlast/value.rs +++ b/src/sqlast/value.rs @@ -13,6 +13,8 @@ pub enum Value { Uuid(Uuid), /// 'string value' SingleQuotedString(String), + /// N'string value' + NationalStringLiteral(String), /// Boolean value true or false, Boolean(bool), /// Date value @@ -34,6 +36,7 @@ impl ToString for Value { Value::Double(v) => v.to_string(), Value::Uuid(v) => v.to_string(), Value::SingleQuotedString(v) => format!("'{}'", v), + Value::NationalStringLiteral(v) => format!("N'{}'", v), Value::Boolean(v) => v.to_string(), Value::Date(v) => v.to_string(), Value::Time(v) => v.to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b6325b10..a6784aa1 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -191,7 +191,9 @@ impl Parser { }, }, Token::Mult => Ok(ASTNode::SQLWildcard), - Token::Number(_) | Token::SingleQuotedString(_) => { + Token::Number(_) + | Token::SingleQuotedString(_) + | Token::NationalStringLiteral(_) => { self.prev_token(); self.parse_sql_value() } @@ -205,7 +207,7 @@ impl Parser { Ok(expr) } _ => parser_err!(format!( - "Prefix parser expected a keyword but found {:?}", + "Did not expect {:?} at the beginning of an expression", t )), }, @@ -790,7 +792,10 @@ impl Parser { Token::SingleQuotedString(ref s) => { Ok(Value::SingleQuotedString(s.to_string())) } - _ => parser_err!(format!("Unsupported value: {:?}", self.peek_token())), + Token::NationalStringLiteral(ref s) => { + Ok(Value::NationalStringLiteral(s.to_string())) + } + _ => parser_err!(format!("Unsupported value: {:?}", t)), } } None => parser_err!("Expecting a value, but found EOF"), diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 602db561..aec177d7 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -35,6 +35,8 @@ pub enum Token { Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), + /// "National" string literal: i.e: N'string' + NationalStringLiteral(String), /// Comma Comma, /// Whitespace (space, tab, etc) @@ -94,6 +96,7 @@ impl ToString for Token { Token::Number(ref n) => n.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), + Token::NationalStringLiteral(ref s) => format!("N'{}'", s), Token::Comma => ",".to_string(), Token::Whitespace(ws) => ws.to_string(), Token::Eq => "=".to_string(), @@ -265,40 +268,30 @@ impl<'a> Tokenizer<'a> { chars.next(); Ok(Some(Token::Whitespace(Whitespace::Newline))) } - // identifier or keyword - ch if self.dialect.is_identifier_start(ch) => { - let mut s = String::new(); - chars.next(); // consume - s.push(ch); - while let Some(&ch) = chars.peek() { - if self.dialect.is_identifier_part(ch) { - chars.next(); // consume - s.push(ch); - } else { - break; + 'N' => { + chars.next(); // consume, to check the next char + match chars.peek() { + Some('\'') => { + // N'...' - a + let s = self.tokenize_single_quoted_string(chars); + Ok(Some(Token::NationalStringLiteral(s))) + } + _ => { + // regular identifier starting with an "N" + let s = self.tokenize_word('N', chars); + Ok(Some(Token::make_word(&s, None))) } } + } + // identifier or keyword + ch if self.dialect.is_identifier_start(ch) => { + chars.next(); // consume the first char + let s = self.tokenize_word(ch, chars); Ok(Some(Token::make_word(&s, None))) } // string '\'' => { - //TODO: handle escaped quotes in string - //TODO: handle newlines in string - //TODO: handle EOF before terminating quote - let mut s = String::new(); - chars.next(); // consume - while let Some(&ch) = chars.peek() { - match ch { - '\'' => { - chars.next(); // consume - break; - } - _ => { - chars.next(); // consume - s.push(ch); - } - } - } + let s = self.tokenize_single_quoted_string(chars); Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier @@ -403,6 +396,44 @@ impl<'a> Tokenizer<'a> { } } + /// Tokenize an identifier or keyword, after the first char is already consumed. + fn tokenize_word(&self, first_char: char, chars: &mut Peekable) -> String { + let mut s = String::new(); + s.push(first_char); + while let Some(&ch) = chars.peek() { + if self.dialect.is_identifier_part(ch) { + chars.next(); // consume + s.push(ch); + } else { + break; + } + } + s + } + + /// Read a single quoted string, starting with the opening quote. + fn tokenize_single_quoted_string(&self, chars: &mut Peekable) -> String { + //TODO: handle escaped quotes in string + //TODO: handle newlines in string + //TODO: handle EOF before terminating quote + //TODO: handle 'string' 'string continuation' + let mut s = String::new(); + chars.next(); // consume the opening quote + while let Some(&ch) = chars.peek() { + match ch { + '\'' => { + chars.next(); // consume + break; + } + _ => { + chars.next(); // consume + s.push(ch); + } + } + } + s + } + fn consume_and_return( &self, chars: &mut Peekable, diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index bb2778aa..9784eed7 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -368,13 +368,17 @@ fn parse_aggregate_with_group_by() { #[test] fn parse_literal_string() { - let sql = "SELECT 'one'"; + let sql = "SELECT 'one', N'national string'"; let select = verified_only_select(sql); - assert_eq!(1, select.projection.len()); + assert_eq!(2, select.projection.len()); assert_eq!( &ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())), expr_from_projection(&select.projection[0]) ); + assert_eq!( + &ASTNode::SQLValue(Value::NationalStringLiteral("national string".to_string())), + expr_from_projection(&select.projection[1]) + ); } #[test]