Support parsing scientific notation (such as 10e5) (#768)

This commit is contained in:
Jeffrey 2022-12-29 00:28:53 +11:00 committed by GitHub
parent 2d801c9fb6
commit 2c20ec0be5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 118 additions and 0 deletions

View file

@ -144,6 +144,7 @@ pub fn all_dialects() -> TestedDialects {
Box::new(RedshiftSqlDialect {}), Box::new(RedshiftSqlDialect {}),
Box::new(MySqlDialect {}), Box::new(MySqlDialect {}),
Box::new(BigQueryDialect {}), Box::new(BigQueryDialect {}),
Box::new(SQLiteDialect {}),
], ],
} }
} }

View file

@ -541,6 +541,7 @@ impl<'a> Tokenizer<'a> {
chars.next(); // consume the first char chars.next(); // consume the first char
let s = self.tokenize_word(ch, chars); let s = self.tokenize_word(ch, chars);
// TODO: implement parsing of exponent here
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
let mut inner_state = State { let mut inner_state = State {
peekable: s.chars().peekable(), peekable: s.chars().peekable(),
@ -617,6 +618,36 @@ impl<'a> Tokenizer<'a> {
return Ok(Some(Token::Period)); return Ok(Some(Token::Period));
} }
// Parse exponent as number
if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
let mut char_clone = chars.peekable.clone();
let mut exponent_part = String::new();
exponent_part.push(char_clone.next().unwrap());
// Optional sign
match char_clone.peek() {
Some(&c) if matches!(c, '+' | '-') => {
exponent_part.push(c);
char_clone.next();
}
_ => (),
}
match char_clone.peek() {
// Definitely an exponent, get original iterator up to speed and use it
Some(&c) if matches!(c, '0'..='9') => {
for _ in 0..exponent_part.len() {
chars.next();
}
exponent_part +=
&peeking_take_while(chars, |ch| matches!(ch, '0'..='9'));
s += exponent_part.as_str();
}
// Not an exponent, discard the work done
_ => (),
}
}
let long = if chars.peek() == Some(&'L') { let long = if chars.peek() == Some(&'L') {
chars.next(); chars.next();
true true
@ -1091,6 +1122,41 @@ mod tests {
compare(expected, tokens); compare(expected, tokens);
} }
#[test]
fn tokenize_select_exponent() {
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, &sql);
let tokens = tokenizer.tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1e10"), false),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1e-10"), false),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1e+10"), false),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1"), false),
Token::make_word("ea", None),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1e-10"), false),
Token::make_word("a", None),
Token::Comma,
Token::Whitespace(Whitespace::Space),
Token::Number(String::from("1e-10"), false),
Token::Minus,
Token::Number(String::from("10"), false),
];
compare(expected, tokens);
}
#[test] #[test]
fn tokenize_scalar_function() { fn tokenize_scalar_function() {
let sql = String::from("SELECT sqrt(1)"); let sql = String::from("SELECT sqrt(1)");

View file

@ -775,6 +775,57 @@ fn parse_null_in_select() {
); );
} }
#[test]
fn parse_exponent_in_select() -> Result<(), ParserError> {
// all except Hive, as it allows numbers to start an identifier
let dialects = TestedDialects {
dialects: vec![
Box::new(AnsiDialect {}),
Box::new(BigQueryDialect {}),
Box::new(ClickHouseDialect {}),
Box::new(GenericDialect {}),
// Box::new(HiveDialect {}),
Box::new(MsSqlDialect {}),
Box::new(MySqlDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(RedshiftSqlDialect {}),
Box::new(SnowflakeDialect {}),
Box::new(SQLiteDialect {}),
],
};
let sql = "SELECT 10e-20, 1e3, 1e+3, 1e3a, 1e, 0.5e2";
let mut select = dialects.parse_sql_statements(sql)?;
let select = match select.pop().unwrap() {
Statement::Query(inner) => *inner,
_ => panic!("Expected Query"),
};
let select = match *select.body {
SetExpr::Select(inner) => *inner,
_ => panic!("Expected SetExpr::Select"),
};
assert_eq!(
&vec![
SelectItem::UnnamedExpr(Expr::Value(number("10e-20"))),
SelectItem::UnnamedExpr(Expr::Value(number("1e3"))),
SelectItem::UnnamedExpr(Expr::Value(number("1e+3"))),
SelectItem::ExprWithAlias {
expr: Expr::Value(number("1e3")),
alias: Ident::new("a")
},
SelectItem::ExprWithAlias {
expr: Expr::Value(number("1")),
alias: Ident::new("e")
},
SelectItem::UnnamedExpr(Expr::Value(number("0.5e2"))),
],
&select.projection
);
Ok(())
}
#[test] #[test]
fn parse_select_with_date_column_name() { fn parse_select_with_date_column_name() {
let sql = "SELECT date"; let sql = "SELECT date";