Implement Spanned to retrieve source locations on AST nodes (#1435)

Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
Mark-Oliver Junge 2024-11-26 17:22:30 +01:00 committed by GitHub
parent 0adec33b94
commit 3c8fd74804
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 3092 additions and 399 deletions

View file

@ -24,6 +24,7 @@ use core::{
fmt::{self, Display},
str::FromStr,
};
use helpers::attached_token::AttachedToken;
use log::debug;
@ -371,7 +372,7 @@ impl<'a> Parser<'a> {
.into_iter()
.map(|token| TokenWithLocation {
token,
location: Location { line: 0, column: 0 },
span: Span::empty(),
})
.collect();
self.with_tokens_with_locations(tokens_with_locations)
@ -613,7 +614,7 @@ impl<'a> Parser<'a> {
let mut export = false;
if !dialect_of!(self is MySqlDialect | GenericDialect) {
return parser_err!("Unsupported statement FLUSH", self.peek_token().location);
return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start);
}
let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) {
@ -914,7 +915,7 @@ impl<'a> Parser<'a> {
t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
if self.peek_token().token == Token::Period {
let mut id_parts: Vec<Ident> = vec![match t {
Token::Word(w) => w.to_ident(),
Token::Word(w) => w.to_ident(next_token.span),
Token::SingleQuotedString(s) => Ident::with_quote('\'', s),
_ => unreachable!(), // We matched above
}];
@ -922,13 +923,16 @@ impl<'a> Parser<'a> {
while self.consume_token(&Token::Period) {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => id_parts.push(w.to_ident()),
Token::Word(w) => id_parts.push(w.to_ident(next_token.span)),
Token::SingleQuotedString(s) => {
// SQLite has single-quoted identifiers
id_parts.push(Ident::with_quote('\'', s))
}
Token::Mul => {
return Ok(Expr::QualifiedWildcard(ObjectName(id_parts)));
return Ok(Expr::QualifiedWildcard(
ObjectName(id_parts),
AttachedToken(next_token),
));
}
_ => {
return self
@ -939,7 +943,7 @@ impl<'a> Parser<'a> {
}
}
Token::Mul => {
return Ok(Expr::Wildcard);
return Ok(Expr::Wildcard(AttachedToken(next_token)));
}
_ => (),
};
@ -1002,7 +1006,7 @@ impl<'a> Parser<'a> {
pub fn parse_unlisten(&mut self) -> Result<Statement, ParserError> {
let channel = if self.consume_token(&Token::Mul) {
Ident::new(Expr::Wildcard.to_string())
Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string())
} else {
match self.parse_identifier(false) {
Ok(expr) => expr,
@ -1030,6 +1034,7 @@ impl<'a> Parser<'a> {
fn parse_expr_prefix_by_reserved_word(
&mut self,
w: &Word,
w_span: Span,
) -> Result<Option<Expr>, ParserError> {
match w.keyword {
Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => {
@ -1047,7 +1052,7 @@ impl<'a> Parser<'a> {
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident()]),
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
@ -1061,7 +1066,7 @@ impl<'a> Parser<'a> {
| Keyword::CURRENT_DATE
| Keyword::LOCALTIME
| Keyword::LOCALTIMESTAMP => {
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident()]))?))
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?))
}
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
@ -1086,7 +1091,7 @@ impl<'a> Parser<'a> {
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
Keyword::POSITION if self.peek_token().token == Token::LParen => {
Ok(Some(self.parse_position_expr(w.to_ident())?))
Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
}
Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
@ -1105,7 +1110,7 @@ impl<'a> Parser<'a> {
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident()]),
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
@ -1134,20 +1139,24 @@ impl<'a> Parser<'a> {
}
// Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
fn parse_expr_prefix_by_unreserved_word(&mut self, w: &Word) -> Result<Expr, ParserError> {
fn parse_expr_prefix_by_unreserved_word(
&mut self,
w: &Word,
w_span: Span,
) -> Result<Expr, ParserError> {
match self.peek_token().token {
Token::LParen | Token::Period => {
let mut id_parts: Vec<Ident> = vec![w.to_ident()];
let mut ends_with_wildcard = false;
let mut id_parts: Vec<Ident> = vec![w.to_ident(w_span)];
let mut ending_wildcard: Option<TokenWithLocation> = None;
while self.consume_token(&Token::Period) {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => id_parts.push(w.to_ident()),
Token::Word(w) => id_parts.push(w.to_ident(next_token.span)),
Token::Mul => {
// Postgres explicitly allows funcnm(tablenm.*) and the
// function array_agg traverses this control flow
if dialect_of!(self is PostgreSqlDialect) {
ends_with_wildcard = true;
ending_wildcard = Some(next_token);
break;
} else {
return self.expected("an identifier after '.'", next_token);
@ -1160,8 +1169,11 @@ impl<'a> Parser<'a> {
}
}
if ends_with_wildcard {
Ok(Expr::QualifiedWildcard(ObjectName(id_parts)))
if let Some(wildcard_token) = ending_wildcard {
Ok(Expr::QualifiedWildcard(
ObjectName(id_parts),
AttachedToken(wildcard_token),
))
} else if self.consume_token(&Token::LParen) {
if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
&& self.consume_tokens(&[Token::Plus, Token::RParen])
@ -1194,11 +1206,11 @@ impl<'a> Parser<'a> {
Token::Arrow if self.dialect.supports_lambda_functions() => {
self.expect_token(&Token::Arrow)?;
Ok(Expr::Lambda(LambdaFunction {
params: OneOrManyWithParens::One(w.to_ident()),
params: OneOrManyWithParens::One(w.to_ident(w_span)),
body: Box::new(self.parse_expr()?),
}))
}
_ => Ok(Expr::Identifier(w.to_ident())),
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
}
}
@ -1225,7 +1237,7 @@ impl<'a> Parser<'a> {
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
let loc = self.peek_token().location;
let loc = self.peek_token().span.start;
let opt_expr = self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_interval(),
@ -1259,12 +1271,14 @@ impl<'a> Parser<'a> {
//
// We first try to parse the word and following tokens as a special expression, and if that fails,
// we rollback and try to parse it as an identifier.
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w)) {
match self.try_parse(|parser| {
parser.parse_expr_prefix_by_reserved_word(&w, next_token.span)
}) {
// This word indicated an expression prefix and parsing was successful
Ok(Some(expr)) => Ok(expr),
// No expression prefix associated with this word
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w)?),
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?),
// If parsing of the word as a special expression failed, we are facing two options:
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
@ -1275,7 +1289,7 @@ impl<'a> Parser<'a> {
Err(e) => {
if !self.dialect.is_reserved_for_identifier(w.keyword) {
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
parser.parse_expr_prefix_by_unreserved_word(&w)
parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span)
}) {
return Ok(expr);
}
@ -1377,11 +1391,11 @@ impl<'a> Parser<'a> {
} else {
let tok = self.next_token();
let key = match tok.token {
Token::Word(word) => word.to_ident(),
Token::Word(word) => word.to_ident(tok.span),
_ => {
return parser_err!(
format!("Expected identifier, found: {tok}"),
tok.location
tok.span.start
)
}
};
@ -1471,7 +1485,7 @@ impl<'a> Parser<'a> {
while p.consume_token(&Token::Period) {
let tok = p.next_token();
let name = match tok.token {
Token::Word(word) => word.to_ident(),
Token::Word(word) => word.to_ident(tok.span),
_ => return p.expected("identifier", tok),
};
let func = match p.parse_function(ObjectName(vec![name]))? {
@ -2290,7 +2304,7 @@ impl<'a> Parser<'a> {
} else if self.dialect.require_interval_qualifier() {
return parser_err!(
"INTERVAL requires a unit after the literal value",
self.peek_token().location
self.peek_token().span.start
);
} else {
None
@ -2381,7 +2395,10 @@ impl<'a> Parser<'a> {
let (fields, trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
if trailing_bracket.0 {
return parser_err!("unmatched > in STRUCT literal", self.peek_token().location);
return parser_err!(
"unmatched > in STRUCT literal",
self.peek_token().span.start
);
}
self.expect_token(&Token::LParen)?;
@ -2411,7 +2428,7 @@ impl<'a> Parser<'a> {
if typed_syntax {
return parser_err!("Typed syntax does not allow AS", {
self.prev_token();
self.peek_token().location
self.peek_token().span.start
});
}
let field_name = self.parse_identifier(false)?;
@ -2464,7 +2481,7 @@ impl<'a> Parser<'a> {
// we've matched all field types for the current struct.
// e.g. this is invalid syntax `STRUCT<STRUCT<INT>>>, INT>(NULL)`
if trailing_bracket.0 {
return parser_err!("unmatched > in STRUCT definition", start_token.location);
return parser_err!("unmatched > in STRUCT definition", start_token.span.start);
}
};
@ -2833,7 +2850,7 @@ impl<'a> Parser<'a> {
format!(
"Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}"
),
tok.location
tok.span.start
);
};
@ -2959,7 +2976,7 @@ impl<'a> Parser<'a> {
// Can only happen if `get_next_precedence` got out of sync with this function
_ => parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
tok.span.start
),
}
} else if Token::DoubleColon == tok {
@ -2990,7 +3007,7 @@ impl<'a> Parser<'a> {
// Can only happen if `get_next_precedence` got out of sync with this function
parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
tok.span.start
)
}
}
@ -3298,14 +3315,14 @@ impl<'a> Parser<'a> {
index += 1;
if let Some(TokenWithLocation {
token: Token::Whitespace(_),
location: _,
span: _,
}) = token
{
continue;
}
break token.cloned().unwrap_or(TokenWithLocation {
token: Token::EOF,
location: Location { line: 0, column: 0 },
span: Span::empty(),
});
})
}
@ -3318,13 +3335,13 @@ impl<'a> Parser<'a> {
match self.tokens.get(index - 1) {
Some(TokenWithLocation {
token: Token::Whitespace(_),
location: _,
span: _,
}) => continue,
non_whitespace => {
if n == 0 {
return non_whitespace.cloned().unwrap_or(TokenWithLocation {
token: Token::EOF,
location: Location { line: 0, column: 0 },
span: Span::empty(),
});
}
n -= 1;
@ -3346,18 +3363,10 @@ impl<'a> Parser<'a> {
.cloned()
.unwrap_or(TokenWithLocation {
token: Token::EOF,
location: Location { line: 0, column: 0 },
span: Span::empty(),
})
}
/// Look for all of the expected keywords in sequence, without consuming them
fn peek_keyword(&mut self, expected: Keyword) -> bool {
let index = self.index;
let matched = self.parse_keyword(expected);
self.index = index;
matched
}
/// Look for all of the expected keywords in sequence, without consuming them
fn peek_keywords(&mut self, expected: &[Keyword]) -> bool {
let index = self.index;
@ -3375,7 +3384,7 @@ impl<'a> Parser<'a> {
match self.tokens.get(self.index - 1) {
Some(TokenWithLocation {
token: Token::Whitespace(_),
location: _,
span: _,
}) => continue,
token => {
return token
@ -3401,7 +3410,7 @@ impl<'a> Parser<'a> {
self.index -= 1;
if let Some(TokenWithLocation {
token: Token::Whitespace(_),
location: _,
span: _,
}) = self.tokens.get(self.index)
{
continue;
@ -3414,7 +3423,7 @@ impl<'a> Parser<'a> {
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
parser_err!(
format!("Expected: {expected}, found: {found}"),
found.location
found.span.start
)
}
@ -3422,15 +3431,22 @@ impl<'a> Parser<'a> {
/// true. Otherwise, no tokens are consumed and returns false.
#[must_use]
pub fn parse_keyword(&mut self, expected: Keyword) -> bool {
self.parse_keyword_token(expected).is_some()
}
#[must_use]
pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option<TokenWithLocation> {
match self.peek_token().token {
Token::Word(w) if expected == w.keyword => {
self.next_token();
true
}
_ => false,
Token::Word(w) if expected == w.keyword => Some(self.next_token()),
_ => None,
}
}
#[must_use]
pub fn peek_keyword(&mut self, expected: Keyword) -> bool {
matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword)
}
/// If the current token is the `expected` keyword followed by
/// specified tokens, consume them and returns true.
/// Otherwise, no tokens are consumed and returns false.
@ -3508,9 +3524,9 @@ impl<'a> Parser<'a> {
/// If the current token is the `expected` keyword, consume the token.
/// Otherwise, return an error.
pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> {
if self.parse_keyword(expected) {
Ok(())
pub fn expect_keyword(&mut self, expected: Keyword) -> Result<TokenWithLocation, ParserError> {
if let Some(token) = self.parse_keyword_token(expected) {
Ok(token)
} else {
self.expected(format!("{:?}", &expected).as_str(), self.peek_token())
}
@ -3552,9 +3568,9 @@ impl<'a> Parser<'a> {
}
/// Bail out if the current token is not an expected keyword, or consume it if it is
pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> {
if self.consume_token(expected) {
Ok(())
pub fn expect_token(&mut self, expected: &Token) -> Result<TokenWithLocation, ParserError> {
if self.peek_token() == *expected {
Ok(self.next_token())
} else {
self.expected(&expected.to_string(), self.peek_token())
}
@ -3749,7 +3765,7 @@ impl<'a> Parser<'a> {
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed
/// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let loc = self.peek_token().location;
let loc = self.peek_token().span.start;
let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT);
if !distinct {
@ -4828,7 +4844,7 @@ impl<'a> Parser<'a> {
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
.map_or(Location { line: 0, column: 0 }, |t| t.span.start);
match keyword {
Keyword::AUTHORIZATION => {
if authorization_owner.is_some() {
@ -5138,7 +5154,7 @@ impl<'a> Parser<'a> {
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let names = self.parse_comma_separated(|p| p.parse_object_name(false))?;
let loc = self.peek_token().location;
let loc = self.peek_token().span.start;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
let purge = self.parse_keyword(Keyword::PURGE);
@ -6029,7 +6045,7 @@ impl<'a> Parser<'a> {
let _ = self.consume_token(&Token::Eq);
let next_token = self.next_token();
match next_token.token {
Token::Number(s, _) => Some(Self::parse::<u32>(s, next_token.location)?),
Token::Number(s, _) => Some(Self::parse::<u32>(s, next_token.span.start)?),
_ => self.expected("literal int", next_token)?,
}
} else {
@ -6818,7 +6834,7 @@ impl<'a> Parser<'a> {
"FULLTEXT or SPATIAL option without constraint name",
TokenWithLocation {
token: Token::make_keyword(&name.to_string()),
location: next_token.location,
span: next_token.span,
},
);
}
@ -7527,7 +7543,7 @@ impl<'a> Parser<'a> {
Expr::Function(f) => Ok(Statement::Call(f)),
other => parser_err!(
format!("Expected a simple procedure call but found: {other}"),
self.peek_token().location
self.peek_token().span.start
),
}
} else {
@ -7731,7 +7747,7 @@ impl<'a> Parser<'a> {
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
.map_or(Location { line: 0, column: 0 }, |t| t.span.start);
return parser_err!(format!("Expect a char, found {s:?}"), loc);
}
Ok(s.chars().next().unwrap())
@ -7777,7 +7793,7 @@ impl<'a> Parser<'a> {
/// Parse a literal value (numbers, strings, date/time, booleans)
pub fn parse_value(&mut self) -> Result<Value, ParserError> {
let next_token = self.next_token();
let location = next_token.location;
let span = next_token.span;
match next_token.token {
Token::Word(w) => match w.keyword {
Keyword::TRUE if self.dialect.supports_boolean_literals() => {
@ -7794,7 +7810,7 @@ impl<'a> Parser<'a> {
"A value?",
TokenWithLocation {
token: Token::Word(w),
location,
span,
},
)?,
},
@ -7802,14 +7818,14 @@ impl<'a> Parser<'a> {
"a concrete value",
TokenWithLocation {
token: Token::Word(w),
location,
span,
},
),
},
// The call to n.parse() returns a bigdecimal when the
// bigdecimal feature is enabled, and is otherwise a no-op
// (i.e., it returns the input string).
Token::Number(n, l) => Ok(Value::Number(Self::parse(n, location)?, l)),
Token::Number(n, l) => Ok(Value::Number(Self::parse(n, span.start)?, l)),
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
Token::TripleSingleQuotedString(ref s) => {
@ -7853,7 +7869,7 @@ impl<'a> Parser<'a> {
// This because snowflake allows numbers as placeholders
let next_token = self.next_token();
let ident = match next_token.token {
Token::Word(w) => Ok(w.to_ident()),
Token::Word(w) => Ok(w.to_ident(next_token.span)),
Token::Number(w, false) => Ok(Ident::new(w)),
_ => self.expected("placeholder", next_token),
}?;
@ -7864,7 +7880,7 @@ impl<'a> Parser<'a> {
"a value",
TokenWithLocation {
token: unexpected,
location,
span,
},
),
}
@ -7904,7 +7920,7 @@ impl<'a> Parser<'a> {
fn parse_introduced_string_value(&mut self) -> Result<Value, ParserError> {
let next_token = self.next_token();
let location = next_token.location;
let span = next_token.span;
match next_token.token {
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
@ -7913,7 +7929,7 @@ impl<'a> Parser<'a> {
"a string value",
TokenWithLocation {
token: unexpected,
location,
span,
},
),
}
@ -7923,7 +7939,7 @@ impl<'a> Parser<'a> {
pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
let next_token = self.next_token();
match next_token.token {
Token::Number(s, _) => Self::parse::<u64>(s, next_token.location),
Token::Number(s, _) => Self::parse::<u64>(s, next_token.span.start),
_ => self.expected("literal int", next_token),
}
}
@ -8322,7 +8338,7 @@ impl<'a> Parser<'a> {
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
Ok(Some(w.to_ident()))
Ok(Some(w.to_ident(next_token.span)))
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
@ -8392,7 +8408,7 @@ impl<'a> Parser<'a> {
_ => {
return parser_err!(
"BUG: expected to match GroupBy modifier keyword",
self.peek_token().location
self.peek_token().span.start
)
}
});
@ -8455,6 +8471,7 @@ impl<'a> Parser<'a> {
.map(|value| Ident {
value: value.into(),
quote_style: ident.quote_style,
span: ident.span,
})
.collect::<Vec<_>>()
})
@ -8470,7 +8487,7 @@ impl<'a> Parser<'a> {
loop {
match self.peek_token().token {
Token::Word(w) => {
idents.push(w.to_ident());
idents.push(w.to_ident(self.peek_token().span));
}
Token::EOF | Token::Eq => break,
_ => {}
@ -8523,8 +8540,9 @@ impl<'a> Parser<'a> {
let mut idents = vec![];
// expecting at least one word for identifier
match self.next_token().token {
Token::Word(w) => idents.push(w.to_ident()),
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
Token::EOF => {
return Err(ParserError::ParserError(
"Empty input when parsing identifier".to_string(),
@ -8541,19 +8559,22 @@ impl<'a> Parser<'a> {
loop {
match self.next_token().token {
// ensure that optional period is succeeded by another identifier
Token::Period => match self.next_token().token {
Token::Word(w) => idents.push(w.to_ident()),
Token::EOF => {
return Err(ParserError::ParserError(
"Trailing period in identifier".to_string(),
))?
Token::Period => {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
Token::EOF => {
return Err(ParserError::ParserError(
"Trailing period in identifier".to_string(),
))?
}
token => {
return Err(ParserError::ParserError(format!(
"Unexpected token following period in identifier: {token}"
)))?
}
}
token => {
return Err(ParserError::ParserError(format!(
"Unexpected token following period in identifier: {token}"
)))?
}
},
}
Token::EOF => break,
token => {
return Err(ParserError::ParserError(format!(
@ -8575,7 +8596,7 @@ impl<'a> Parser<'a> {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => {
let mut ident = w.to_ident();
let mut ident = w.to_ident(next_token.span);
// On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or
// TABLE clause [0].
@ -9006,8 +9027,9 @@ impl<'a> Parser<'a> {
/// expect the initial keyword to be already consumed
pub fn parse_query(&mut self) -> Result<Box<Query>, ParserError> {
let _guard = self.recursion_counter.try_decrease()?;
let with = if self.parse_keyword(Keyword::WITH) {
let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) {
Some(With {
with_token: with_token.into(),
recursive: self.parse_keyword(Keyword::RECURSIVE),
cte_tables: self.parse_comma_separated(Parser::parse_cte)?,
})
@ -9265,8 +9287,10 @@ impl<'a> Parser<'a> {
}
}
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let closing_paren_token = self.expect_token(&Token::RParen)?;
let alias = TableAlias {
name,
columns: vec![],
@ -9276,6 +9300,7 @@ impl<'a> Parser<'a> {
query,
from: None,
materialized: is_materialized,
closing_paren_token: closing_paren_token.into(),
}
} else {
let columns = self.parse_table_alias_column_defs()?;
@ -9289,14 +9314,17 @@ impl<'a> Parser<'a> {
}
}
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let closing_paren_token = self.expect_token(&Token::RParen)?;
let alias = TableAlias { name, columns };
Cte {
alias,
query,
from: None,
materialized: is_materialized,
closing_paren_token: closing_paren_token.into(),
}
};
if self.parse_keyword(Keyword::FROM) {
@ -9316,7 +9344,7 @@ impl<'a> Parser<'a> {
pub fn parse_query_body(&mut self, precedence: u8) -> Result<Box<SetExpr>, ParserError> {
// We parse the expression using a Pratt parser, as in `parse_expr()`.
// Start by parsing a restricted SELECT or a `(subquery)`:
let expr = if self.parse_keyword(Keyword::SELECT) {
let expr = if self.peek_keyword(Keyword::SELECT) {
SetExpr::Select(self.parse_select().map(Box::new)?)
} else if self.consume_token(&Token::LParen) {
// CTEs are not allowed here, but the parser currently accepts them
@ -9405,9 +9433,9 @@ impl<'a> Parser<'a> {
}
}
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
/// assuming the initial `SELECT` was already consumed
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`)
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
let select_token = self.expect_keyword(Keyword::SELECT)?;
let value_table_mode =
if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) {
if self.parse_keyword(Keyword::VALUE) {
@ -9571,6 +9599,7 @@ impl<'a> Parser<'a> {
};
Ok(Select {
select_token: AttachedToken(select_token),
distinct,
top,
top_before_distinct,
@ -10656,7 +10685,7 @@ impl<'a> Parser<'a> {
return self.expected("literal number", next_token);
};
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::AtMost(Self::parse(n, token.location)?)
RepetitionQuantifier::AtMost(Self::parse(n, token.span.start)?)
}
Token::Number(n, _) if self.consume_token(&Token::Comma) => {
let next_token = self.next_token();
@ -10664,12 +10693,12 @@ impl<'a> Parser<'a> {
Token::Number(m, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Range(
Self::parse(n, token.location)?,
Self::parse(m, token.location)?,
Self::parse(n, token.span.start)?,
Self::parse(m, token.span.start)?,
)
}
Token::RBrace => {
RepetitionQuantifier::AtLeast(Self::parse(n, token.location)?)
RepetitionQuantifier::AtLeast(Self::parse(n, token.span.start)?)
}
_ => {
return self.expected("} or upper bound", next_token);
@ -10678,7 +10707,7 @@ impl<'a> Parser<'a> {
}
Token::Number(n, _) => {
self.expect_token(&Token::RBrace)?;
RepetitionQuantifier::Exactly(Self::parse(n, token.location)?)
RepetitionQuantifier::Exactly(Self::parse(n, token.span.start)?)
}
_ => return self.expected("quantifier range", token),
}
@ -11113,7 +11142,7 @@ impl<'a> Parser<'a> {
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
.then(|| self.parse_identifier(false).unwrap());
let loc = self.peek_token().location;
let loc = self.peek_token().span.start;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
if cascade && restrict {
@ -11132,7 +11161,10 @@ impl<'a> Parser<'a> {
/// Parse an REPLACE statement
pub fn parse_replace(&mut self) -> Result<Statement, ParserError> {
if !dialect_of!(self is MySqlDialect | GenericDialect) {
return parser_err!("Unsupported statement REPLACE", self.peek_token().location);
return parser_err!(
"Unsupported statement REPLACE",
self.peek_token().span.start
);
}
let mut insert = self.parse_insert()?;
@ -11593,7 +11625,7 @@ impl<'a> Parser<'a> {
}
fn parse_duplicate_treatment(&mut self) -> Result<Option<DuplicateTreatment>, ParserError> {
let loc = self.peek_token().location;
let loc = self.peek_token().span.start;
match (
self.parse_keyword(Keyword::ALL),
self.parse_keyword(Keyword::DISTINCT),
@ -11608,17 +11640,17 @@ impl<'a> Parser<'a> {
/// Parse a comma-delimited list of projections after SELECT
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
match self.parse_wildcard_expr()? {
Expr::QualifiedWildcard(prefix) => Ok(SelectItem::QualifiedWildcard(
Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard(
prefix,
self.parse_wildcard_additional_options()?,
self.parse_wildcard_additional_options(token.0)?,
)),
Expr::Wildcard => Ok(SelectItem::Wildcard(
self.parse_wildcard_additional_options()?,
Expr::Wildcard(token) => Ok(SelectItem::Wildcard(
self.parse_wildcard_additional_options(token.0)?,
)),
Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => {
parser_err!(
format!("Expected an expression, found: {}", v),
self.peek_token().location
self.peek_token().span.start
)
}
Expr::BinaryOp {
@ -11631,7 +11663,7 @@ impl<'a> Parser<'a> {
let Expr::Identifier(alias) = *left else {
return parser_err!(
"BUG: expected identifier expression as alias",
self.peek_token().location
self.peek_token().span.start
);
};
Ok(SelectItem::ExprWithAlias {
@ -11653,6 +11685,7 @@ impl<'a> Parser<'a> {
/// If it is not possible to parse it, will return an option.
pub fn parse_wildcard_additional_options(
&mut self,
wildcard_token: TokenWithLocation,
) -> Result<WildcardAdditionalOptions, ParserError> {
let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) {
self.parse_optional_select_item_ilike()?
@ -11684,6 +11717,7 @@ impl<'a> Parser<'a> {
};
Ok(WildcardAdditionalOptions {
wildcard_token: wildcard_token.into(),
opt_ilike,
opt_exclude,
opt_except,
@ -11931,7 +11965,7 @@ impl<'a> Parser<'a> {
} else {
let next_token = self.next_token();
let quantity = match next_token.token {
Token::Number(s, _) => Self::parse::<u64>(s, next_token.location)?,
Token::Number(s, _) => Self::parse::<u64>(s, next_token.span.start)?,
_ => self.expected("literal int", next_token)?,
};
Some(TopQuantity::Constant(quantity))
@ -12812,10 +12846,11 @@ impl<'a> Parser<'a> {
}
impl Word {
pub fn to_ident(&self) -> Ident {
pub fn to_ident(&self, span: Span) -> Ident {
Ident {
value: self.value.clone(),
quote_style: self.quote_style,
span,
}
}
}
@ -13389,14 +13424,17 @@ mod tests {
Ident {
value: "CATALOG".to_string(),
quote_style: None,
span: Span::empty(),
},
Ident {
value: "F(o)o. \"bar".to_string(),
quote_style: Some('"'),
span: Span::empty(),
},
Ident {
value: "table".to_string(),
quote_style: None,
span: Span::empty(),
},
];
dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| {
@ -13409,10 +13447,12 @@ mod tests {
Ident {
value: "CATALOG".to_string(),
quote_style: None,
span: Span::empty(),
},
Ident {
value: "table".to_string(),
quote_style: None,
span: Span::empty(),
},
];
dialect.run_parser_method("CATALOG . table", |parser| {