mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-22 23:14:07 +00:00
Support general "typed string" literals (#187)
Fixes #168 by enabling `DATE` and other keywords to be used as identifiers when not followed by a string literal. A "typed string" is our term for generalized version of `DATE '...'`/`TIME '...'`/ `TIMESTAMP '...'` literals, represented as `TypedString { data_type, value }` in the AST. Unlike DATE/TIME/TIMESTAMP literals, this is a non-standard extension supported by PostgreSQL at least. This is a port of MaterializeInc/materialize#3146 Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com> Co-authored-by: Nickolay Ponomarev <asqueella@gmail.com>
This commit is contained in:
parent
34548e890b
commit
6cdd4a146d
5 changed files with 116 additions and 43 deletions
|
@ -13,7 +13,9 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
|
||||||
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
|
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
|
||||||
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
|
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
|
||||||
- Add line and column number to TokenizerError (#194) - thanks @Dandandan!
|
- Add line and column number to TokenizerError (#194) - thanks @Dandandan!
|
||||||
|
- Use Token::EOF instead of Option<Token> (#195)
|
||||||
- Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman!
|
- Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman!
|
||||||
|
- Generalize `DATE`/`TIME`/`TIMESTAMP` literals representation in the AST (`TypedString { data_type, value }`) and allow `DATE` and other keywords to be used as identifiers when not followed by a string (#187) - thanks @maxcountryman!
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
|
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
|
||||||
|
@ -26,6 +28,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
|
||||||
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
|
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
|
||||||
- Support the string concatentation operator `||` (#178) - thanks @Dandandan!
|
- Support the string concatentation operator `||` (#178) - thanks @Dandandan!
|
||||||
- Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan!
|
- Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan!
|
||||||
|
- Add serde support to AST structs and enums (#196) - thanks @panarch!
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Report an error for unterminated string literals (#165)
|
- Report an error for unterminated string literals (#165)
|
||||||
|
|
|
@ -210,6 +210,10 @@ pub enum Expr {
|
||||||
Nested(Box<Expr>),
|
Nested(Box<Expr>),
|
||||||
/// A literal value, such as string, number, date or NULL
|
/// A literal value, such as string, number, date or NULL
|
||||||
Value(Value),
|
Value(Value),
|
||||||
|
/// A constant of form `<data_type> 'value'`.
|
||||||
|
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
|
||||||
|
/// as well as constants of other types (a non-standard PostgreSQL extension).
|
||||||
|
TypedString { data_type: DataType, value: String },
|
||||||
/// Scalar function call e.g. `LEFT(foo, 5)`
|
/// Scalar function call e.g. `LEFT(foo, 5)`
|
||||||
Function(Function),
|
Function(Function),
|
||||||
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
|
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
|
||||||
|
@ -284,6 +288,10 @@ impl fmt::Display for Expr {
|
||||||
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),
|
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),
|
||||||
Expr::Nested(ast) => write!(f, "({})", ast),
|
Expr::Nested(ast) => write!(f, "({})", ast),
|
||||||
Expr::Value(v) => write!(f, "{}", v),
|
Expr::Value(v) => write!(f, "{}", v),
|
||||||
|
Expr::TypedString { data_type, value } => {
|
||||||
|
write!(f, "{}", data_type)?;
|
||||||
|
write!(f, " '{}'", &value::escape_single_quote_string(value))
|
||||||
|
}
|
||||||
Expr::Function(fun) => write!(f, "{}", fun),
|
Expr::Function(fun) => write!(f, "{}", fun),
|
||||||
Expr::Case {
|
Expr::Case {
|
||||||
operand,
|
operand,
|
||||||
|
|
|
@ -33,12 +33,6 @@ pub enum Value {
|
||||||
HexStringLiteral(String),
|
HexStringLiteral(String),
|
||||||
/// Boolean value true or false
|
/// Boolean value true or false
|
||||||
Boolean(bool),
|
Boolean(bool),
|
||||||
/// `DATE '...'` literals
|
|
||||||
Date(String),
|
|
||||||
/// `TIME '...'` literals
|
|
||||||
Time(String),
|
|
||||||
/// `TIMESTAMP '...'` literals
|
|
||||||
Timestamp(String),
|
|
||||||
/// INTERVAL literals, roughly in the following format:
|
/// INTERVAL literals, roughly in the following format:
|
||||||
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
|
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
|
||||||
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
|
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
|
||||||
|
@ -70,9 +64,6 @@ impl fmt::Display for Value {
|
||||||
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
|
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
|
||||||
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
|
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
|
||||||
Value::Boolean(v) => write!(f, "{}", v),
|
Value::Boolean(v) => write!(f, "{}", v),
|
||||||
Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)),
|
|
||||||
Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)),
|
|
||||||
Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)),
|
|
||||||
Value::Interval {
|
Value::Interval {
|
||||||
value,
|
value,
|
||||||
leading_field: Some(DateTimeField::Second),
|
leading_field: Some(DateTimeField::Second),
|
||||||
|
|
105
src/parser.rs
105
src/parser.rs
|
@ -35,6 +35,15 @@ macro_rules! parser_err {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns a successful result if the optional expression is some
|
||||||
|
macro_rules! return_ok_if_some {
|
||||||
|
($e:expr) => {{
|
||||||
|
if let Some(v) = $e {
|
||||||
|
return Ok(v);
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(PartialEq)]
|
#[derive(PartialEq)]
|
||||||
pub enum IsOptional {
|
pub enum IsOptional {
|
||||||
Optional,
|
Optional,
|
||||||
|
@ -172,6 +181,40 @@ impl Parser {
|
||||||
|
|
||||||
/// Parse an expression prefix
|
/// Parse an expression prefix
|
||||||
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
|
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
|
||||||
|
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
|
||||||
|
// string literal represents a literal of that type. Some examples:
|
||||||
|
//
|
||||||
|
// DATE '2020-05-20'
|
||||||
|
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
|
||||||
|
// BOOL 'true'
|
||||||
|
//
|
||||||
|
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
|
||||||
|
// matters is the fact that INTERVAL string literals may optionally be followed by special
|
||||||
|
// keywords, e.g.:
|
||||||
|
//
|
||||||
|
// INTERVAL '7' DAY
|
||||||
|
//
|
||||||
|
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
|
||||||
|
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
|
||||||
|
// expression that should parse as the column name "date".
|
||||||
|
return_ok_if_some!(self.maybe_parse(|parser| {
|
||||||
|
match parser.parse_data_type()? {
|
||||||
|
DataType::Interval => parser.parse_literal_interval(),
|
||||||
|
// PosgreSQL allows almost any identifier to be used as custom data type name,
|
||||||
|
// and we support that in `parse_data_type()`. But unlike Postgres we don't
|
||||||
|
// have a list of globally reserved keywords (since they vary across dialects),
|
||||||
|
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
|
||||||
|
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
|
||||||
|
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
|
||||||
|
// `type 'string'` syntax for the custom data types at all.
|
||||||
|
DataType::Custom(..) => parser_err!("dummy"),
|
||||||
|
data_type => Ok(Expr::TypedString {
|
||||||
|
data_type,
|
||||||
|
value: parser.parse_literal_string()?,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
let expr = match self.next_token() {
|
let expr = match self.next_token() {
|
||||||
Token::Word(w) => match w.keyword {
|
Token::Word(w) => match w.keyword {
|
||||||
Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
|
Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
|
||||||
|
@ -180,7 +223,6 @@ impl Parser {
|
||||||
}
|
}
|
||||||
Keyword::CASE => self.parse_case_expr(),
|
Keyword::CASE => self.parse_case_expr(),
|
||||||
Keyword::CAST => self.parse_cast_expr(),
|
Keyword::CAST => self.parse_cast_expr(),
|
||||||
Keyword::DATE => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
|
|
||||||
Keyword::EXISTS => self.parse_exists_expr(),
|
Keyword::EXISTS => self.parse_exists_expr(),
|
||||||
Keyword::EXTRACT => self.parse_extract_expr(),
|
Keyword::EXTRACT => self.parse_extract_expr(),
|
||||||
Keyword::INTERVAL => self.parse_literal_interval(),
|
Keyword::INTERVAL => self.parse_literal_interval(),
|
||||||
|
@ -189,10 +231,6 @@ impl Parser {
|
||||||
op: UnaryOperator::Not,
|
op: UnaryOperator::Not,
|
||||||
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
|
||||||
}),
|
}),
|
||||||
Keyword::TIME => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
|
|
||||||
Keyword::TIMESTAMP => {
|
|
||||||
Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?)))
|
|
||||||
}
|
|
||||||
// Here `w` is a word, check if it's a part of a multi-part
|
// Here `w` is a word, check if it's a part of a multi-part
|
||||||
// identifier, a function call, or a simple identifier:
|
// identifier, a function call, or a simple identifier:
|
||||||
_ => match self.peek_token() {
|
_ => match self.peek_token() {
|
||||||
|
@ -907,6 +945,22 @@ impl Parser {
|
||||||
Ok(values)
|
Ok(values)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run a parser method `f`, reverting back to the current position
|
||||||
|
/// if unsuccessful.
|
||||||
|
#[must_use]
|
||||||
|
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
|
||||||
|
where
|
||||||
|
F: FnMut(&mut Parser) -> Result<T, ParserError>,
|
||||||
|
{
|
||||||
|
let index = self.index;
|
||||||
|
if let Ok(t) = f(self) {
|
||||||
|
Some(t)
|
||||||
|
} else {
|
||||||
|
self.index = index;
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
|
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
|
||||||
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
|
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
|
||||||
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
|
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
|
||||||
|
@ -1898,7 +1952,6 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.consume_token(&Token::LParen) {
|
if self.consume_token(&Token::LParen) {
|
||||||
let index = self.index;
|
|
||||||
// A left paren introduces either a derived table (i.e., a subquery)
|
// A left paren introduces either a derived table (i.e., a subquery)
|
||||||
// or a nested join. It's nearly impossible to determine ahead of
|
// or a nested join. It's nearly impossible to determine ahead of
|
||||||
// time which it is... so we just try to parse both.
|
// time which it is... so we just try to parse both.
|
||||||
|
@ -1915,30 +1968,26 @@ impl Parser {
|
||||||
// | (2) starts a nested join
|
// | (2) starts a nested join
|
||||||
// (1) an additional set of parens around a nested join
|
// (1) an additional set of parens around a nested join
|
||||||
//
|
//
|
||||||
match self.parse_derived_table_factor(NotLateral) {
|
|
||||||
// The recently consumed '(' started a derived table, and we've
|
|
||||||
// parsed the subquery, followed by the closing ')', and the
|
|
||||||
// alias of the derived table. In the example above this is
|
|
||||||
// case (3), and the next token would be `NATURAL`.
|
|
||||||
Ok(table_factor) => Ok(table_factor),
|
|
||||||
Err(_) => {
|
|
||||||
// A parsing error from `parse_derived_table_factor` indicates that
|
|
||||||
// the '(' we've recently consumed does not start a derived table
|
|
||||||
// (cases 1, 2, or 4). Ignore the error and back up to where we
|
|
||||||
// were before - right after the opening '('.
|
|
||||||
self.index = index;
|
|
||||||
|
|
||||||
// Inside the parentheses we expect to find a table factor
|
// If the recently consumed '(' starts a derived table, the call to
|
||||||
// followed by some joins or another level of nesting.
|
// `parse_derived_table_factor` below will return success after parsing the
|
||||||
let table_and_joins = self.parse_table_and_joins()?;
|
// subquery, followed by the closing ')', and the alias of the derived table.
|
||||||
self.expect_token(&Token::RParen)?;
|
// In the example above this is case (3).
|
||||||
// The SQL spec prohibits derived and bare tables from appearing
|
return_ok_if_some!(
|
||||||
// alone in parentheses. We don't enforce this as some databases
|
self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))
|
||||||
// (e.g. Snowflake) allow such syntax.
|
);
|
||||||
|
// A parsing error from `parse_derived_table_factor` indicates that the '(' we've
|
||||||
|
// recently consumed does not start a derived table (cases 1, 2, or 4).
|
||||||
|
// `maybe_parse` will ignore such an error and rewind to be after the opening '('.
|
||||||
|
|
||||||
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
|
// Inside the parentheses we expect to find a table factor
|
||||||
}
|
// followed by some joins or another level of nesting.
|
||||||
}
|
let table_and_joins = self.parse_table_and_joins()?;
|
||||||
|
self.expect_token(&Token::RParen)?;
|
||||||
|
// The SQL spec prohibits derived and bare tables from appearing
|
||||||
|
// alone in parentheses. We don't enforce this as some databases
|
||||||
|
// (e.g. Snowflake) allow such syntax.
|
||||||
|
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
|
||||||
} else {
|
} else {
|
||||||
let name = self.parse_object_name()?;
|
let name = self.parse_object_name()?;
|
||||||
// Postgres, MSSQL: table-valued functions:
|
// Postgres, MSSQL: table-valued functions:
|
||||||
|
|
|
@ -413,6 +413,19 @@ fn parse_null_in_select() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_with_date_column_name() {
|
||||||
|
let sql = "SELECT date";
|
||||||
|
let select = verified_only_select(sql);
|
||||||
|
assert_eq!(
|
||||||
|
&Expr::Identifier(Ident {
|
||||||
|
value: "date".into(),
|
||||||
|
quote_style: None
|
||||||
|
}),
|
||||||
|
expr_from_projection(only(&select.projection)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_escaped_single_quote_string_predicate() {
|
fn parse_escaped_single_quote_string_predicate() {
|
||||||
use self::BinaryOperator::*;
|
use self::BinaryOperator::*;
|
||||||
|
@ -1426,30 +1439,39 @@ fn parse_literal_string() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_literal_date() {
|
fn parse_literal_date() {
|
||||||
let sql = "SELECT DATE '1999-01-01'";
|
let sql = "SELECT date '1999-01-01'";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::Date("1999-01-01".into())),
|
&Expr::TypedString {
|
||||||
|
data_type: DataType::Date,
|
||||||
|
value: "1999-01-01".into()
|
||||||
|
},
|
||||||
expr_from_projection(only(&select.projection)),
|
expr_from_projection(only(&select.projection)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_literal_time() {
|
fn parse_literal_time() {
|
||||||
let sql = "SELECT TIME '01:23:34'";
|
let sql = "SELECT time '01:23:34'";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::Time("01:23:34".into())),
|
&Expr::TypedString {
|
||||||
|
data_type: DataType::Time,
|
||||||
|
value: "01:23:34".into()
|
||||||
|
},
|
||||||
expr_from_projection(only(&select.projection)),
|
expr_from_projection(only(&select.projection)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_literal_timestamp() {
|
fn parse_literal_timestamp() {
|
||||||
let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'";
|
let sql = "SELECT timestamp '1999-01-01 01:23:34'";
|
||||||
let select = verified_only_select(sql);
|
let select = verified_only_select(sql);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())),
|
&Expr::TypedString {
|
||||||
|
data_type: DataType::Timestamp,
|
||||||
|
value: "1999-01-01 01:23:34".into()
|
||||||
|
},
|
||||||
expr_from_projection(only(&select.projection)),
|
expr_from_projection(only(&select.projection)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue