Merge pull request #165 from nickolay/pr/unterminated-string-literal

Report an error on unterminated string literals (and more)
This commit is contained in:
Nickolay Ponomarev 2020-05-26 06:42:35 +03:00 committed by GitHub
commit f614481133
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 61 additions and 15 deletions

View file

@ -6,10 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
Given that the parser produces a typed AST, any changes to the AST will technically be breaking and thus will result in a `0.(N+1)` version. We document changes that break via addition as "Added".
## [Unreleased]
Nothing here yet! Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
### Changed
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
## [0.5.0] - 2019-10-10
### Added
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
- Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson!
- Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno!
### Fixed
- Report an error for unterminated string literals (#165)
## [0.5.0] - 2019-10-10
### Changed
- Replace the `Value::Long(u64)` and `Value::Double(f64)` variants with `Value::Number(String)` to avoid losing precision when parsing decimal literals (#130) - thanks @benesch!

View file

@ -12,8 +12,6 @@
#![warn(clippy::all)]
use simple_logger;
///! A small command-line app to run the parser.
/// Run with `cargo run --example cli`
use std::fs;

View file

@ -1324,6 +1324,16 @@ impl Parser {
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
//
// Note, that this conflicts with an obscure rule from the SQL
// standard, which we don't implement:
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
// "[Obscure Rule] SQL allows you to break a long <character
// string literal> up into two or more smaller <character string
// literal>s, split by a <separator> that includes a newline
// character. When it sees such a <literal>, your DBMS will
// ignore the <separator> and treat the multiple strings as
// a single <literal>."
Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))),
not_an_ident => {
if after_as {

View file

@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('\'') => {
// N'...' - a <national character string literal>
let s = self.tokenize_single_quoted_string(chars);
let s = self.tokenize_single_quoted_string(chars)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
_ => {
@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> {
match chars.peek() {
Some('\'') => {
// X'...' - a <binary string literal>
let s = self.tokenize_single_quoted_string(chars);
let s = self.tokenize_single_quoted_string(chars)?;
Ok(Some(Token::HexStringLiteral(s)))
}
_ => {
@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> {
}
// string
'\'' => {
let s = self.tokenize_single_quoted_string(chars);
let s = self.tokenize_single_quoted_string(chars)?;
Ok(Some(Token::SingleQuotedString(s)))
}
// delimited (quoted) identifier
@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> {
}
/// Read a single quoted string, starting with the opening quote.
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars<'_>>) -> String {
//TODO: handle escaped quotes in string
//TODO: handle newlines in string
//TODO: handle EOF before terminating quote
//TODO: handle 'string' <white space> 'string continuation'
fn tokenize_single_quoted_string(
&self,
chars: &mut Peekable<Chars<'_>>,
) -> Result<String, TokenizerError> {
let mut s = String::new();
chars.next(); // consume the opening quote
while let Some(&ch) = chars.peek() {
@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> {
s.push('\'');
chars.next();
} else {
break;
return Ok(s);
}
}
_ => {
@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> {
}
}
}
s
Err(TokenizerError(format!(
"Unterminated string literal at Line: {}, Col: {}",
self.line, self.col
)))
}
fn tokenize_multiline_comment(
@ -640,6 +642,31 @@ mod tests {
compare(expected, tokens);
}
#[test]
fn tokenize_newline_in_string_literal() {
let sql = String::from("'foo\r\nbar\nbaz'");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
compare(expected, tokens);
}
#[test]
fn tokenize_unterminated_string_literal() {
let sql = String::from("select 'foo");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
assert_eq!(
tokenizer.tokenize(),
Err(TokenizerError(
"Unterminated string literal at Line: 1, Col: 8".to_string()
))
);
}
#[test]
fn tokenize_invalid_string_cols() {
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");

View file

@ -1031,7 +1031,7 @@ fn parse_create_external_table() {
name VARCHAR(100) NOT NULL,\
lat DOUBLE NULL,\
lng DOUBLE)\
STORED AS TEXTFILE LOCATION '/tmp/example.csv";
STORED AS TEXTFILE LOCATION '/tmp/example.csv'";
let ast = one_statement_parses_to(
sql,
"CREATE EXTERNAL TABLE uk_cities (\