mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-23 15:34:09 +00:00
Merge pull request #165 from nickolay/pr/unterminated-string-literal
Report an error on unterminated string literals (and more)
This commit is contained in:
commit
f614481133
5 changed files with 61 additions and 15 deletions
13
CHANGELOG.md
13
CHANGELOG.md
|
@ -6,8 +6,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|||
Given that the parser produces a typed AST, any changes to the AST will technically be breaking and thus will result in a `0.(N+1)` version. We document changes that break via addition as "Added".
|
||||
|
||||
## [Unreleased]
|
||||
Nothing here yet! Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
|
||||
Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
|
||||
|
||||
### Changed
|
||||
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
|
||||
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
|
||||
|
||||
### Added
|
||||
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
|
||||
- Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson!
|
||||
- Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno!
|
||||
|
||||
### Fixed
|
||||
- Report an error for unterminated string literals (#165)
|
||||
|
||||
## [0.5.0] - 2019-10-10
|
||||
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
|
||||
#![warn(clippy::all)]
|
||||
|
||||
use simple_logger;
|
||||
|
||||
///! A small command-line app to run the parser.
|
||||
/// Run with `cargo run --example cli`
|
||||
use std::fs;
|
||||
|
|
|
@ -1324,6 +1324,16 @@ impl Parser {
|
|||
}
|
||||
// MSSQL supports single-quoted strings as aliases for columns
|
||||
// We accept them as table aliases too, although MSSQL does not.
|
||||
//
|
||||
// Note, that this conflicts with an obscure rule from the SQL
|
||||
// standard, which we don't implement:
|
||||
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
|
||||
// "[Obscure Rule] SQL allows you to break a long <character
|
||||
// string literal> up into two or more smaller <character string
|
||||
// literal>s, split by a <separator> that includes a newline
|
||||
// character. When it sees such a <literal>, your DBMS will
|
||||
// ignore the <separator> and treat the multiple strings as
|
||||
// a single <literal>."
|
||||
Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))),
|
||||
not_an_ident => {
|
||||
if after_as {
|
||||
|
|
|
@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> {
|
|||
match chars.peek() {
|
||||
Some('\'') => {
|
||||
// N'...' - a <national character string literal>
|
||||
let s = self.tokenize_single_quoted_string(chars);
|
||||
let s = self.tokenize_single_quoted_string(chars)?;
|
||||
Ok(Some(Token::NationalStringLiteral(s)))
|
||||
}
|
||||
_ => {
|
||||
|
@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> {
|
|||
match chars.peek() {
|
||||
Some('\'') => {
|
||||
// X'...' - a <binary string literal>
|
||||
let s = self.tokenize_single_quoted_string(chars);
|
||||
let s = self.tokenize_single_quoted_string(chars)?;
|
||||
Ok(Some(Token::HexStringLiteral(s)))
|
||||
}
|
||||
_ => {
|
||||
|
@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
// string
|
||||
'\'' => {
|
||||
let s = self.tokenize_single_quoted_string(chars);
|
||||
let s = self.tokenize_single_quoted_string(chars)?;
|
||||
Ok(Some(Token::SingleQuotedString(s)))
|
||||
}
|
||||
// delimited (quoted) identifier
|
||||
|
@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
|
||||
/// Read a single quoted string, starting with the opening quote.
|
||||
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars<'_>>) -> String {
|
||||
//TODO: handle escaped quotes in string
|
||||
//TODO: handle newlines in string
|
||||
//TODO: handle EOF before terminating quote
|
||||
//TODO: handle 'string' <white space> 'string continuation'
|
||||
fn tokenize_single_quoted_string(
|
||||
&self,
|
||||
chars: &mut Peekable<Chars<'_>>,
|
||||
) -> Result<String, TokenizerError> {
|
||||
let mut s = String::new();
|
||||
chars.next(); // consume the opening quote
|
||||
while let Some(&ch) = chars.peek() {
|
||||
|
@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> {
|
|||
s.push('\'');
|
||||
chars.next();
|
||||
} else {
|
||||
break;
|
||||
return Ok(s);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
|
@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
s
|
||||
Err(TokenizerError(format!(
|
||||
"Unterminated string literal at Line: {}, Col: {}",
|
||||
self.line, self.col
|
||||
)))
|
||||
}
|
||||
|
||||
fn tokenize_multiline_comment(
|
||||
|
@ -640,6 +642,31 @@ mod tests {
|
|||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_newline_in_string_literal() {
|
||||
let sql = String::from("'foo\r\nbar\nbaz'");
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_unterminated_string_literal() {
|
||||
let sql = String::from("select 'foo");
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
assert_eq!(
|
||||
tokenizer.tokenize(),
|
||||
Err(TokenizerError(
|
||||
"Unterminated string literal at Line: 1, Col: 8".to_string()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_invalid_string_cols() {
|
||||
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");
|
||||
|
|
|
@ -1031,7 +1031,7 @@ fn parse_create_external_table() {
|
|||
name VARCHAR(100) NOT NULL,\
|
||||
lat DOUBLE NULL,\
|
||||
lng DOUBLE)\
|
||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv";
|
||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv'";
|
||||
let ast = one_statement_parses_to(
|
||||
sql,
|
||||
"CREATE EXTERNAL TABLE uk_cities (\
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue