From d0a65ffd053b68dd6841815ae5069f8563e32900 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 01:33:41 +0300 Subject: [PATCH 1/4] Remove Token::String, as it's never emitted Indeed, given that there is Token::SingleQuotedString and Token::Identifier, there's no other "string" that would make sense... --- src/sqlparser.rs | 3 --- src/sqltokenizer.rs | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 25043087..0633500f 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -158,7 +158,6 @@ impl Parser { } } Token::Number(_) - | Token::String(_) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) => { self.prev_token(); @@ -755,7 +754,6 @@ impl Parser { Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)), }, Token::Identifier(id) => Ok(Value::String(id.to_string())), - Token::String(ref s) => Ok(Value::String(s.to_string())), Token::SingleQuotedString(ref s) => { Ok(Value::SingleQuotedString(s.to_string())) } @@ -792,7 +790,6 @@ impl Parser { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { - Some(Token::String(ref s)) => Ok(s.clone()), Some(Token::SingleQuotedString(ref s)) => Ok(s.clone()), Some(Token::DoubleQuotedString(ref s)) => Ok(s.clone()), other => parser_err!(format!("Expected literal string, found {:?}", other)), diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 8f3e15d7..50408822 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -32,8 +32,7 @@ pub enum Token { Keyword(String), /// Numeric literal Number(String), - /// String literal - String(String), + /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), @@ -97,7 +96,6 @@ impl ToString for Token { Token::Identifier(ref id) => id.to_string(), Token::Keyword(ref k) => k.to_string(), Token::Number(ref n) => n.to_string(), - Token::String(ref s) => s.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), Token::DoubleQuotedString(ref s) => format!("\"{}\"", s), @@ -194,7 +192,6 @@ impl<'a> Tokenizer<'a> { Token::Identifier(s) => self.col += s.len() as u64, Token::Keyword(s) => self.col += s.len() as u64, Token::Number(s) => self.col += s.len() as u64, - Token::String(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, Token::DoubleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, From efdbf0f9dc0560e749bab93a560ba2a2117a2a20 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 01:43:28 +0300 Subject: [PATCH 2/4] Remove Token::Identifier match arm from parse_value An identifier is not a literal value, and parse_value is not called on such a token anyway. --- src/sqlparser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 0633500f..3ef4c095 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -734,6 +734,7 @@ impl Parser { Ok(values) } + /// Parse a literal value (numbers, strings, date/time, booleans) fn parse_value(&mut self) -> Result { match self.next_token() { Some(t) => { @@ -753,7 +754,6 @@ impl Parser { Ok(n) => Ok(Value::Long(n)), Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)), }, - Token::Identifier(id) => Ok(Value::String(id.to_string())), Token::SingleQuotedString(ref s) => { Ok(Value::SingleQuotedString(s.to_string())) } From 56884dc7005ddd370d896f31c0c85094e05c550d Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 01:51:07 +0300 Subject: [PATCH 3/4] Remove Value::DoubleQuotedString ...and parser support for the corresponding token, as "..." in SQL[*] is not a literal string like we parse it - but a quoted identifier (which I intend to implement later). [*] in all the RBDMSes I know, except for sqlite which has complex rules in the name of "compatibility": https://www.sqlite.org/lang_keywords.html --- src/sqlast/value.rs | 3 --- src/sqlparser.rs | 8 +------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/sqlast/value.rs b/src/sqlast/value.rs index 5bfb5299..a441987a 100644 --- a/src/sqlast/value.rs +++ b/src/sqlast/value.rs @@ -15,8 +15,6 @@ pub enum Value { Uuid(Uuid), /// 'string value' SingleQuotedString(String), - /// "string value" - DoubleQuotedString(String), /// Boolean value true or false, Boolean(bool), /// Date value @@ -39,7 +37,6 @@ impl ToString for Value { Value::String(v) => v.to_string(), Value::Uuid(v) => v.to_string(), Value::SingleQuotedString(v) => format!("'{}'", v), - Value::DoubleQuotedString(v) => format!("\"{}\"", v), Value::Boolean(v) => v.to_string(), Value::Date(v) => v.to_string(), Value::Time(v) => v.to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 3ef4c095..54652560 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -157,9 +157,7 @@ impl Parser { } } } - Token::Number(_) - | Token::SingleQuotedString(_) - | Token::DoubleQuotedString(_) => { + Token::Number(_) | Token::SingleQuotedString(_) => { self.prev_token(); self.parse_sql_value() } @@ -757,9 +755,6 @@ impl Parser { Token::SingleQuotedString(ref s) => { Ok(Value::SingleQuotedString(s.to_string())) } - Token::DoubleQuotedString(ref s) => { - Ok(Value::DoubleQuotedString(s.to_string())) - } _ => parser_err!(format!("Unsupported value: {:?}", self.peek_token())), } } @@ -791,7 +786,6 @@ impl Parser { pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { Some(Token::SingleQuotedString(ref s)) => Ok(s.clone()), - Some(Token::DoubleQuotedString(ref s)) => Ok(s.clone()), other => parser_err!(format!("Expected literal string, found {:?}", other)), } } From 078eb677a1f713cd01b73719864b47fcdc2c9c1c Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 13 Jan 2019 02:08:44 +0300 Subject: [PATCH 4/4] Remove Value::String Its existence alongside SingleQuotedString simply doesn't make sense: `'a string'` is a string literal, while `a string` is not a "value". It's only used in postgresql-specific tab-separated-values parser to store the string representation of a field's value. For that use-case Option looks like a more appropriate choice than Value. --- src/sqlast/mod.rs | 4 ++-- src/sqlast/value.rs | 5 +---- src/sqlparser.rs | 10 +++++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 66040ea5..54b650a8 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -103,7 +103,7 @@ pub enum ASTNode { /// COLUMNS columns: Vec, /// VALUES a vector of values to be copied - values: Vec, + values: Vec>, }, /// UPDATE SQLUpdate { @@ -290,7 +290,7 @@ impl ToString for ASTNode { "\n{}", values .iter() - .map(|v| v.to_string()) + .map(|v| v.clone().unwrap_or("\\N".to_string())) .collect::>() .join("\t") ); diff --git a/src/sqlast/value.rs b/src/sqlast/value.rs index a441987a..ec11b17e 100644 --- a/src/sqlast/value.rs +++ b/src/sqlast/value.rs @@ -2,15 +2,13 @@ use chrono::{offset::FixedOffset, DateTime, NaiveDate, NaiveDateTime, NaiveTime} use uuid::Uuid; -/// SQL values such as int, double, string timestamp +/// SQL values such as int, double, string, timestamp #[derive(Debug, Clone, PartialEq)] pub enum Value { /// Literal signed long Long(i64), /// Literal floating point value Double(f64), - /// Unquoted string - String(String), /// Uuid value Uuid(Uuid), /// 'string value' @@ -34,7 +32,6 @@ impl ToString for Value { match self { Value::Long(v) => v.to_string(), Value::Double(v) => v.to_string(), - Value::String(v) => v.to_string(), Value::Uuid(v) => v.to_string(), Value::SingleQuotedString(v) => format!("'{}'", v), Value::Boolean(v) => v.to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 54652560..42a39b01 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -690,7 +690,7 @@ impl Parser { /// Parse a tab separated values in /// COPY payload - fn parse_tsv(&mut self) -> Result, ParserError> { + fn parse_tsv(&mut self) -> Result>, ParserError> { let values = self.parse_tab_value()?; Ok(values) } @@ -699,17 +699,17 @@ impl Parser { Ok(ASTNode::SQLValue(self.parse_value()?)) } - fn parse_tab_value(&mut self) -> Result, ParserError> { + fn parse_tab_value(&mut self) -> Result>, ParserError> { let mut values = vec![]; let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { match t { Token::Whitespace(Whitespace::Tab) => { - values.push(Value::String(content.to_string())); + values.push(Some(content.to_string())); content.clear(); } Token::Whitespace(Whitespace::Newline) => { - values.push(Value::String(content.to_string())); + values.push(Some(content.to_string())); content.clear(); } Token::Backslash => { @@ -718,7 +718,7 @@ impl Parser { } if let Some(token) = self.next_token() { if token == Token::Identifier("N".to_string()) { - values.push(Value::Null); + values.push(None); } } else { continue;