Support general "typed string" literals (#187)

Fixes #168 by enabling `DATE` and other keywords to be used as
identifiers when not followed by a string literal.

A "typed string" is our term for generalized version of `DATE '...'`/`TIME '...'`/
`TIMESTAMP '...'` literals, represented as `TypedString { data_type, value }`
in the AST.

Unlike DATE/TIME/TIMESTAMP literals, this is a non-standard extension
supported by PostgreSQL at least.

This is a port of MaterializeInc/materialize#3146

Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
Co-authored-by: Nickolay Ponomarev <asqueella@gmail.com>
This commit is contained in:
Max Countryman 2020-06-11 14:04:43 -07:00 committed by GitHub
parent 34548e890b
commit 6cdd4a146d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 116 additions and 43 deletions

View file

@ -35,6 +35,15 @@ macro_rules! parser_err {
};
}
// Returns a successful result if the optional expression is some
macro_rules! return_ok_if_some {
($e:expr) => {{
if let Some(v) = $e {
return Ok(v);
}
}};
}
#[derive(PartialEq)]
pub enum IsOptional {
Optional,
@ -172,6 +181,40 @@ impl Parser {
/// Parse an expression prefix
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
// string literal represents a literal of that type. Some examples:
//
// DATE '2020-05-20'
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
// BOOL 'true'
//
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
// matters is the fact that INTERVAL string literals may optionally be followed by special
// keywords, e.g.:
//
// INTERVAL '7' DAY
//
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
return_ok_if_some!(self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_literal_interval(),
// PosgreSQL allows almost any identifier to be used as custom data type name,
// and we support that in `parse_data_type()`. But unlike Postgres we don't
// have a list of globally reserved keywords (since they vary across dialects),
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
// `type 'string'` syntax for the custom data types at all.
DataType::Custom(..) => parser_err!("dummy"),
data_type => Ok(Expr::TypedString {
data_type,
value: parser.parse_literal_string()?,
}),
}
}));
let expr = match self.next_token() {
Token::Word(w) => match w.keyword {
Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
@ -180,7 +223,6 @@ impl Parser {
}
Keyword::CASE => self.parse_case_expr(),
Keyword::CAST => self.parse_cast_expr(),
Keyword::DATE => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
Keyword::EXISTS => self.parse_exists_expr(),
Keyword::EXTRACT => self.parse_extract_expr(),
Keyword::INTERVAL => self.parse_literal_interval(),
@ -189,10 +231,6 @@ impl Parser {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
}),
Keyword::TIME => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
Keyword::TIMESTAMP => {
Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?)))
}
// Here `w` is a word, check if it's a part of a multi-part
// identifier, a function call, or a simple identifier:
_ => match self.peek_token() {
@ -907,6 +945,22 @@ impl Parser {
Ok(values)
}
/// Run a parser method `f`, reverting back to the current position
/// if unsuccessful.
#[must_use]
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
where
F: FnMut(&mut Parser) -> Result<T, ParserError>,
{
let index = self.index;
if let Ok(t) = f(self) {
Some(t)
} else {
self.index = index;
None
}
}
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
@ -1898,7 +1952,6 @@ impl Parser {
}
if self.consume_token(&Token::LParen) {
let index = self.index;
// A left paren introduces either a derived table (i.e., a subquery)
// or a nested join. It's nearly impossible to determine ahead of
// time which it is... so we just try to parse both.
@ -1915,30 +1968,26 @@ impl Parser {
// | (2) starts a nested join
// (1) an additional set of parens around a nested join
//
match self.parse_derived_table_factor(NotLateral) {
// The recently consumed '(' started a derived table, and we've
// parsed the subquery, followed by the closing ')', and the
// alias of the derived table. In the example above this is
// case (3), and the next token would be `NATURAL`.
Ok(table_factor) => Ok(table_factor),
Err(_) => {
// A parsing error from `parse_derived_table_factor` indicates that
// the '(' we've recently consumed does not start a derived table
// (cases 1, 2, or 4). Ignore the error and back up to where we
// were before - right after the opening '('.
self.index = index;
// Inside the parentheses we expect to find a table factor
// followed by some joins or another level of nesting.
let table_and_joins = self.parse_table_and_joins()?;
self.expect_token(&Token::RParen)?;
// The SQL spec prohibits derived and bare tables from appearing
// alone in parentheses. We don't enforce this as some databases
// (e.g. Snowflake) allow such syntax.
// If the recently consumed '(' starts a derived table, the call to
// `parse_derived_table_factor` below will return success after parsing the
// subquery, followed by the closing ')', and the alias of the derived table.
// In the example above this is case (3).
return_ok_if_some!(
self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))
);
// A parsing error from `parse_derived_table_factor` indicates that the '(' we've
// recently consumed does not start a derived table (cases 1, 2, or 4).
// `maybe_parse` will ignore such an error and rewind to be after the opening '('.
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
}
}
// Inside the parentheses we expect to find a table factor
// followed by some joins or another level of nesting.
let table_and_joins = self.parse_table_and_joins()?;
self.expect_token(&Token::RParen)?;
// The SQL spec prohibits derived and bare tables from appearing
// alone in parentheses. We don't enforce this as some databases
// (e.g. Snowflake) allow such syntax.
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
} else {
let name = self.parse_object_name()?;
// Postgres, MSSQL: table-valued functions: