feat: show location info in parse errors (#958)

This commit is contained in:
dawg 2023-09-07 22:23:09 +02:00 committed by GitHub
parent 4c3a4ad5a8
commit b02c3f87ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 116 additions and 71 deletions

View file

@ -44,8 +44,8 @@ pub enum ParserError {
// Use `Parser::expected` instead, if possible
macro_rules! parser_err {
($MSG:expr) => {
Err(ParserError::ParserError($MSG.to_string()))
($MSG:expr, $loc:expr) => {
Err(ParserError::ParserError(format!("{}{}", $MSG, $loc)))
};
}
@ -368,8 +368,8 @@ impl<'a> Parser<'a> {
debug!("Parsing sql '{}'...", sql);
let tokens = Tokenizer::new(self.dialect, sql)
.with_unescape(self.options.unescape)
.tokenize()?;
Ok(self.with_tokens(tokens))
.tokenize_with_location()?;
Ok(self.with_tokens_with_locations(tokens))
}
/// Parse potentially multiple statements
@ -724,6 +724,7 @@ impl<'a> Parser<'a> {
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
let loc = self.peek_token().location;
return_ok_if_some!(self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_interval(),
@ -734,7 +735,7 @@ impl<'a> Parser<'a> {
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
// `type 'string'` syntax for the custom data types at all.
DataType::Custom(..) => parser_err!("dummy"),
DataType::Custom(..) => parser_err!("dummy", loc),
data_type => Ok(Expr::TypedString {
data_type,
value: parser.parse_literal_string()?,
@ -909,7 +910,12 @@ impl<'a> Parser<'a> {
let tok = self.next_token();
let key = match tok.token {
Token::Word(word) => word.to_ident(),
_ => return parser_err!(format!("Expected identifier, found: {tok}")),
_ => {
return parser_err!(
format!("Expected identifier, found: {tok}"),
tok.location
)
}
};
Ok(Expr::CompositeAccess {
expr: Box::new(expr),
@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> {
r#in: Box::new(from),
})
} else {
parser_err!("Position function must include IN keyword".to_string())
parser_err!(
"Position function must include IN keyword".to_string(),
self.peek_token().location
)
}
}
@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> {
}
}
// Can only happen if `get_next_precedence` got out of sync with this function
_ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
_ => parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
),
}
} else if Token::DoubleColon == tok {
self.parse_pg_cast(expr)
@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> {
})
} else {
// Can only happen if `get_next_precedence` got out of sync with this function
parser_err!(format!("No infix parser for token {:?}", tok.token))
parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
)
}
}
@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> {
/// Report unexpected token
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
parser_err!(format!("Expected {expected}, found: {found}"))
parser_err!(
format!("Expected {expected}, found: {found}"),
found.location
)
}
/// Look for an expected keyword and consume it if it exists
@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> {
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let loc = self.peek_token().location;
let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT);
if !distinct {
return Ok(None);
}
if all {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc);
}
let on = self.parse_keyword(Keyword::ON);
if !on {
@ -2986,10 +3005,14 @@ impl<'a> Parser<'a> {
let mut admin = vec![];
while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
match keyword {
Keyword::AUTHORIZATION => {
if authorization_owner.is_some() {
parser_err!("Found multiple AUTHORIZATION")
parser_err!("Found multiple AUTHORIZATION", loc)
} else {
authorization_owner = Some(self.parse_object_name()?);
Ok(())
@ -2997,7 +3020,7 @@ impl<'a> Parser<'a> {
}
Keyword::LOGIN | Keyword::NOLOGIN => {
if login.is_some() {
parser_err!("Found multiple LOGIN or NOLOGIN")
parser_err!("Found multiple LOGIN or NOLOGIN", loc)
} else {
login = Some(keyword == Keyword::LOGIN);
Ok(())
@ -3005,7 +3028,7 @@ impl<'a> Parser<'a> {
}
Keyword::INHERIT | Keyword::NOINHERIT => {
if inherit.is_some() {
parser_err!("Found multiple INHERIT or NOINHERIT")
parser_err!("Found multiple INHERIT or NOINHERIT", loc)
} else {
inherit = Some(keyword == Keyword::INHERIT);
Ok(())
@ -3013,7 +3036,7 @@ impl<'a> Parser<'a> {
}
Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => {
if bypassrls.is_some() {
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS")
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc)
} else {
bypassrls = Some(keyword == Keyword::BYPASSRLS);
Ok(())
@ -3021,7 +3044,7 @@ impl<'a> Parser<'a> {
}
Keyword::CREATEDB | Keyword::NOCREATEDB => {
if create_db.is_some() {
parser_err!("Found multiple CREATEDB or NOCREATEDB")
parser_err!("Found multiple CREATEDB or NOCREATEDB", loc)
} else {
create_db = Some(keyword == Keyword::CREATEDB);
Ok(())
@ -3029,7 +3052,7 @@ impl<'a> Parser<'a> {
}
Keyword::CREATEROLE | Keyword::NOCREATEROLE => {
if create_role.is_some() {
parser_err!("Found multiple CREATEROLE or NOCREATEROLE")
parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc)
} else {
create_role = Some(keyword == Keyword::CREATEROLE);
Ok(())
@ -3037,7 +3060,7 @@ impl<'a> Parser<'a> {
}
Keyword::SUPERUSER | Keyword::NOSUPERUSER => {
if superuser.is_some() {
parser_err!("Found multiple SUPERUSER or NOSUPERUSER")
parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc)
} else {
superuser = Some(keyword == Keyword::SUPERUSER);
Ok(())
@ -3045,7 +3068,7 @@ impl<'a> Parser<'a> {
}
Keyword::REPLICATION | Keyword::NOREPLICATION => {
if replication.is_some() {
parser_err!("Found multiple REPLICATION or NOREPLICATION")
parser_err!("Found multiple REPLICATION or NOREPLICATION", loc)
} else {
replication = Some(keyword == Keyword::REPLICATION);
Ok(())
@ -3053,7 +3076,7 @@ impl<'a> Parser<'a> {
}
Keyword::PASSWORD => {
if password.is_some() {
parser_err!("Found multiple PASSWORD")
parser_err!("Found multiple PASSWORD", loc)
} else {
password = if self.parse_keyword(Keyword::NULL) {
Some(Password::NullPassword)
@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> {
Keyword::CONNECTION => {
self.expect_keyword(Keyword::LIMIT)?;
if connection_limit.is_some() {
parser_err!("Found multiple CONNECTION LIMIT")
parser_err!("Found multiple CONNECTION LIMIT", loc)
} else {
connection_limit = Some(Expr::Value(self.parse_number_value()?));
Ok(())
@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> {
Keyword::VALID => {
self.expect_keyword(Keyword::UNTIL)?;
if valid_until.is_some() {
parser_err!("Found multiple VALID UNTIL")
parser_err!("Found multiple VALID UNTIL", loc)
} else {
valid_until = Some(Expr::Value(self.parse_value()?));
Ok(())
@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> {
Keyword::IN => {
if self.parse_keyword(Keyword::ROLE) {
if !in_role.is_empty() {
parser_err!("Found multiple IN ROLE")
parser_err!("Found multiple IN ROLE", loc)
} else {
in_role = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
}
} else if self.parse_keyword(Keyword::GROUP) {
if !in_group.is_empty() {
parser_err!("Found multiple IN GROUP")
parser_err!("Found multiple IN GROUP", loc)
} else {
in_group = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
@ -3102,7 +3125,7 @@ impl<'a> Parser<'a> {
}
Keyword::ROLE => {
if !role.is_empty() {
parser_err!("Found multiple ROLE")
parser_err!("Found multiple ROLE", loc)
} else {
role = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
@ -3110,7 +3133,7 @@ impl<'a> Parser<'a> {
}
Keyword::USER => {
if !user.is_empty() {
parser_err!("Found multiple USER")
parser_err!("Found multiple USER", loc)
} else {
user = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
@ -3118,7 +3141,7 @@ impl<'a> Parser<'a> {
}
Keyword::ADMIN => {
if !admin.is_empty() {
parser_err!("Found multiple ADMIN")
parser_err!("Found multiple ADMIN", loc)
} else {
admin = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> {
// specifying multiple objects to delete in a single statement
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let names = self.parse_comma_separated(Parser::parse_object_name)?;
let loc = self.peek_token().location;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
let purge = self.parse_keyword(Keyword::PURGE);
if cascade && restrict {
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc);
}
if object_type == ObjectType::Role && (cascade || restrict || purge) {
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE");
return parser_err!(
"Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE",
loc
);
}
Ok(Statement::Drop {
object_type,
@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> {
fn parse_literal_char(&mut self) -> Result<char, ParserError> {
let s = self.parse_literal_string()?;
if s.len() != 1 {
return parser_err!(format!("Expect a char, found {s:?}"));
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
return parser_err!(format!("Expect a char, found {s:?}"), loc);
}
Ok(s.chars().next().unwrap())
}
@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> {
// (i.e., it returns the input string).
Token::Number(ref n, l) => match n.parse() {
Ok(n) => Ok(Value::Number(n, l)),
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location),
},
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> {
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
.then(|| self.parse_identifier().unwrap());
let loc = self.peek_token().location;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
if cascade && restrict {
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE");
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc);
}
Ok(Statement::Revoke {
@ -7929,14 +7962,12 @@ mod tests {
#[test]
fn test_parser_error_loc() {
// TODO: Once we thread token locations through the parser, we should update this
// test to assert the locations of the referenced token
let sql = "SELECT this is a syntax error";
let ast = Parser::parse_sql(&GenericDialect, sql);
assert_eq!(
ast,
Err(ParserError::ParserError(
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a"
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16"
.to_string()
))
);

View file

@ -28,6 +28,7 @@ use core::fmt::Debug;
use crate::dialect::*;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Tokenizer;
use crate::{ast::*, parser::ParserOptions};
/// Tests use the methods on this struct to invoke the parser on one or
@ -82,8 +83,13 @@ impl TestedDialects {
/// the result is the same for all tested dialects.
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
self.one_of_identical_results(|dialect| {
let mut tokenizer = Tokenizer::new(dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize()?;
self.new_parser(dialect)
.try_with_sql(sql)?
.with_tokens(tokens)
.parse_statements()
})
// To fail the `ensure_multiple_dialects_are_tested` test:

View file

@ -350,7 +350,7 @@ impl fmt::Display for Whitespace {
}
/// Location in input string
#[derive(Debug, Eq, PartialEq, Clone)]
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub struct Location {
/// Line number, starting from 1
pub line: u64,
@ -358,6 +358,20 @@ pub struct Location {
pub column: u64,
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.line == 0 {
return Ok(());
}
write!(
f,
// TODO: use standard compiler location syntax (<path>:<line>:<col>)
" at Line: {}, Column {}",
self.line, self.column,
)
}
}
/// A [Token] with [Location] attached to it
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct TokenWithLocation {
@ -400,17 +414,12 @@ impl fmt::Display for TokenWithLocation {
#[derive(Debug, PartialEq, Eq)]
pub struct TokenizerError {
pub message: String,
pub line: u64,
pub col: u64,
pub location: Location,
}
impl fmt::Display for TokenizerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} at Line: {}, Column {}",
self.message, self.line, self.col
)
write!(f, "{}{}", self.message, self.location,)
}
}
@ -546,10 +555,7 @@ impl<'a> Tokenizer<'a> {
let mut location = state.location();
while let Some(token) = self.next_token(&mut state)? {
tokens.push(TokenWithLocation {
token,
location: location.clone(),
});
tokens.push(TokenWithLocation { token, location });
location = state.location();
}
@ -1122,8 +1128,7 @@ impl<'a> Tokenizer<'a> {
) -> Result<R, TokenizerError> {
Err(TokenizerError {
message: message.into(),
col: loc.column,
line: loc.line,
location: loc,
})
}
@ -1368,8 +1373,7 @@ mod tests {
fn tokenizer_error_impl() {
let err = TokenizerError {
message: "test".into(),
line: 1,
col: 1,
location: Location { line: 1, column: 1 },
};
#[cfg(feature = "std")]
{
@ -1694,8 +1698,7 @@ mod tests {
tokenizer.tokenize(),
Err(TokenizerError {
message: "Unterminated string literal".to_string(),
line: 1,
col: 8
location: Location { line: 1, column: 8 },
})
);
}
@ -1710,8 +1713,10 @@ mod tests {
tokenizer.tokenize(),
Err(TokenizerError {
message: "Unterminated string literal".to_string(),
line: 1,
col: 35
location: Location {
line: 1,
column: 35
}
})
);
}
@ -1873,8 +1878,7 @@ mod tests {
tokenizer.tokenize(),
Err(TokenizerError {
message: "Expected close delimiter '\"' before EOF.".to_string(),
line: 1,
col: 1
location: Location { line: 1, column: 1 },
})
);
}

View file

@ -836,7 +836,12 @@ fn test_eof_after_as() {
#[test]
fn test_no_infix_error() {
let res = Parser::parse_sql(&ClickHouseDialect {}, "ASSERT-URA<<");
let dialects = TestedDialects {
dialects: vec![Box::new(ClickHouseDialect {})],
options: None,
};
let res = dialects.parse_sql_statements("ASSERT-URA<<");
assert_eq!(
ParserError::ParserError("No infix parser for token ShiftLeft".to_string()),
res.unwrap_err()
@ -3238,19 +3243,21 @@ fn parse_alter_table_alter_column_type() {
_ => unreachable!(),
}
let res = Parser::parse_sql(
&GenericDialect {},
&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT"),
);
let dialect = TestedDialects {
dialects: vec![Box::new(GenericDialect {})],
options: None,
};
let res =
dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT"));
assert_eq!(
ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()),
res.unwrap_err()
);
let res = Parser::parse_sql(
&GenericDialect {},
&format!("{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"),
);
let res = dialect.parse_sql_statements(&format!(
"{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"
));
assert_eq!(
ParserError::ParserError("Expected end of statement, found: USING".to_string()),
res.unwrap_err()
@ -3295,10 +3302,7 @@ fn parse_alter_table_drop_constraint() {
_ => unreachable!(),
}
let res = Parser::parse_sql(
&GenericDialect {},
&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT"),
);
let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT"));
assert_eq!(
ParserError::ParserError("Expected end of statement, found: TEXT".to_string()),
res.unwrap_err()