mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-17 04:30:16 +00:00
Support parsing scientific notation (such as 10e5
) (#768)
This commit is contained in:
parent
2d801c9fb6
commit
2c20ec0be5
3 changed files with 118 additions and 0 deletions
|
@ -144,6 +144,7 @@ pub fn all_dialects() -> TestedDialects {
|
|||
Box::new(RedshiftSqlDialect {}),
|
||||
Box::new(MySqlDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(SQLiteDialect {}),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
|
|
@ -541,6 +541,7 @@ impl<'a> Tokenizer<'a> {
|
|||
chars.next(); // consume the first char
|
||||
let s = self.tokenize_word(ch, chars);
|
||||
|
||||
// TODO: implement parsing of exponent here
|
||||
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
||||
let mut inner_state = State {
|
||||
peekable: s.chars().peekable(),
|
||||
|
@ -617,6 +618,36 @@ impl<'a> Tokenizer<'a> {
|
|||
return Ok(Some(Token::Period));
|
||||
}
|
||||
|
||||
// Parse exponent as number
|
||||
if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
|
||||
let mut char_clone = chars.peekable.clone();
|
||||
let mut exponent_part = String::new();
|
||||
exponent_part.push(char_clone.next().unwrap());
|
||||
|
||||
// Optional sign
|
||||
match char_clone.peek() {
|
||||
Some(&c) if matches!(c, '+' | '-') => {
|
||||
exponent_part.push(c);
|
||||
char_clone.next();
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
match char_clone.peek() {
|
||||
// Definitely an exponent, get original iterator up to speed and use it
|
||||
Some(&c) if matches!(c, '0'..='9') => {
|
||||
for _ in 0..exponent_part.len() {
|
||||
chars.next();
|
||||
}
|
||||
exponent_part +=
|
||||
&peeking_take_while(chars, |ch| matches!(ch, '0'..='9'));
|
||||
s += exponent_part.as_str();
|
||||
}
|
||||
// Not an exponent, discard the work done
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let long = if chars.peek() == Some(&'L') {
|
||||
chars.next();
|
||||
true
|
||||
|
@ -1091,6 +1122,41 @@ mod tests {
|
|||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_select_exponent() {
|
||||
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
|
||||
let dialect = GenericDialect {};
|
||||
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||
let tokens = tokenizer.tokenize().unwrap();
|
||||
|
||||
let expected = vec![
|
||||
Token::make_keyword("SELECT"),
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1e10"), false),
|
||||
Token::Comma,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1e-10"), false),
|
||||
Token::Comma,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1e+10"), false),
|
||||
Token::Comma,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1"), false),
|
||||
Token::make_word("ea", None),
|
||||
Token::Comma,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1e-10"), false),
|
||||
Token::make_word("a", None),
|
||||
Token::Comma,
|
||||
Token::Whitespace(Whitespace::Space),
|
||||
Token::Number(String::from("1e-10"), false),
|
||||
Token::Minus,
|
||||
Token::Number(String::from("10"), false),
|
||||
];
|
||||
|
||||
compare(expected, tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_scalar_function() {
|
||||
let sql = String::from("SELECT sqrt(1)");
|
||||
|
|
|
@ -775,6 +775,57 @@ fn parse_null_in_select() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_exponent_in_select() -> Result<(), ParserError> {
|
||||
// all except Hive, as it allows numbers to start an identifier
|
||||
let dialects = TestedDialects {
|
||||
dialects: vec![
|
||||
Box::new(AnsiDialect {}),
|
||||
Box::new(BigQueryDialect {}),
|
||||
Box::new(ClickHouseDialect {}),
|
||||
Box::new(GenericDialect {}),
|
||||
// Box::new(HiveDialect {}),
|
||||
Box::new(MsSqlDialect {}),
|
||||
Box::new(MySqlDialect {}),
|
||||
Box::new(PostgreSqlDialect {}),
|
||||
Box::new(RedshiftSqlDialect {}),
|
||||
Box::new(SnowflakeDialect {}),
|
||||
Box::new(SQLiteDialect {}),
|
||||
],
|
||||
};
|
||||
let sql = "SELECT 10e-20, 1e3, 1e+3, 1e3a, 1e, 0.5e2";
|
||||
let mut select = dialects.parse_sql_statements(sql)?;
|
||||
|
||||
let select = match select.pop().unwrap() {
|
||||
Statement::Query(inner) => *inner,
|
||||
_ => panic!("Expected Query"),
|
||||
};
|
||||
let select = match *select.body {
|
||||
SetExpr::Select(inner) => *inner,
|
||||
_ => panic!("Expected SetExpr::Select"),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
&vec![
|
||||
SelectItem::UnnamedExpr(Expr::Value(number("10e-20"))),
|
||||
SelectItem::UnnamedExpr(Expr::Value(number("1e3"))),
|
||||
SelectItem::UnnamedExpr(Expr::Value(number("1e+3"))),
|
||||
SelectItem::ExprWithAlias {
|
||||
expr: Expr::Value(number("1e3")),
|
||||
alias: Ident::new("a")
|
||||
},
|
||||
SelectItem::ExprWithAlias {
|
||||
expr: Expr::Value(number("1")),
|
||||
alias: Ident::new("e")
|
||||
},
|
||||
SelectItem::UnnamedExpr(Expr::Value(number("0.5e2"))),
|
||||
],
|
||||
&select.projection
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_select_with_date_column_name() {
|
||||
let sql = "SELECT date";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue