mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-17 12:40:17 +00:00
Support parsing scientific notation (such as 10e5
) (#768)
This commit is contained in:
parent
2d801c9fb6
commit
2c20ec0be5
3 changed files with 118 additions and 0 deletions
|
@ -144,6 +144,7 @@ pub fn all_dialects() -> TestedDialects {
|
||||||
Box::new(RedshiftSqlDialect {}),
|
Box::new(RedshiftSqlDialect {}),
|
||||||
Box::new(MySqlDialect {}),
|
Box::new(MySqlDialect {}),
|
||||||
Box::new(BigQueryDialect {}),
|
Box::new(BigQueryDialect {}),
|
||||||
|
Box::new(SQLiteDialect {}),
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -541,6 +541,7 @@ impl<'a> Tokenizer<'a> {
|
||||||
chars.next(); // consume the first char
|
chars.next(); // consume the first char
|
||||||
let s = self.tokenize_word(ch, chars);
|
let s = self.tokenize_word(ch, chars);
|
||||||
|
|
||||||
|
// TODO: implement parsing of exponent here
|
||||||
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
|
||||||
let mut inner_state = State {
|
let mut inner_state = State {
|
||||||
peekable: s.chars().peekable(),
|
peekable: s.chars().peekable(),
|
||||||
|
@ -617,6 +618,36 @@ impl<'a> Tokenizer<'a> {
|
||||||
return Ok(Some(Token::Period));
|
return Ok(Some(Token::Period));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse exponent as number
|
||||||
|
if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
|
||||||
|
let mut char_clone = chars.peekable.clone();
|
||||||
|
let mut exponent_part = String::new();
|
||||||
|
exponent_part.push(char_clone.next().unwrap());
|
||||||
|
|
||||||
|
// Optional sign
|
||||||
|
match char_clone.peek() {
|
||||||
|
Some(&c) if matches!(c, '+' | '-') => {
|
||||||
|
exponent_part.push(c);
|
||||||
|
char_clone.next();
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match char_clone.peek() {
|
||||||
|
// Definitely an exponent, get original iterator up to speed and use it
|
||||||
|
Some(&c) if matches!(c, '0'..='9') => {
|
||||||
|
for _ in 0..exponent_part.len() {
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
exponent_part +=
|
||||||
|
&peeking_take_while(chars, |ch| matches!(ch, '0'..='9'));
|
||||||
|
s += exponent_part.as_str();
|
||||||
|
}
|
||||||
|
// Not an exponent, discard the work done
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let long = if chars.peek() == Some(&'L') {
|
let long = if chars.peek() == Some(&'L') {
|
||||||
chars.next();
|
chars.next();
|
||||||
true
|
true
|
||||||
|
@ -1091,6 +1122,41 @@ mod tests {
|
||||||
compare(expected, tokens);
|
compare(expected, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_select_exponent() {
|
||||||
|
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
|
||||||
|
let dialect = GenericDialect {};
|
||||||
|
let mut tokenizer = Tokenizer::new(&dialect, &sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::make_keyword("SELECT"),
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1e10"), false),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1e-10"), false),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1e+10"), false),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1"), false),
|
||||||
|
Token::make_word("ea", None),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1e-10"), false),
|
||||||
|
Token::make_word("a", None),
|
||||||
|
Token::Comma,
|
||||||
|
Token::Whitespace(Whitespace::Space),
|
||||||
|
Token::Number(String::from("1e-10"), false),
|
||||||
|
Token::Minus,
|
||||||
|
Token::Number(String::from("10"), false),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_scalar_function() {
|
fn tokenize_scalar_function() {
|
||||||
let sql = String::from("SELECT sqrt(1)");
|
let sql = String::from("SELECT sqrt(1)");
|
||||||
|
|
|
@ -775,6 +775,57 @@ fn parse_null_in_select() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_exponent_in_select() -> Result<(), ParserError> {
|
||||||
|
// all except Hive, as it allows numbers to start an identifier
|
||||||
|
let dialects = TestedDialects {
|
||||||
|
dialects: vec![
|
||||||
|
Box::new(AnsiDialect {}),
|
||||||
|
Box::new(BigQueryDialect {}),
|
||||||
|
Box::new(ClickHouseDialect {}),
|
||||||
|
Box::new(GenericDialect {}),
|
||||||
|
// Box::new(HiveDialect {}),
|
||||||
|
Box::new(MsSqlDialect {}),
|
||||||
|
Box::new(MySqlDialect {}),
|
||||||
|
Box::new(PostgreSqlDialect {}),
|
||||||
|
Box::new(RedshiftSqlDialect {}),
|
||||||
|
Box::new(SnowflakeDialect {}),
|
||||||
|
Box::new(SQLiteDialect {}),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
let sql = "SELECT 10e-20, 1e3, 1e+3, 1e3a, 1e, 0.5e2";
|
||||||
|
let mut select = dialects.parse_sql_statements(sql)?;
|
||||||
|
|
||||||
|
let select = match select.pop().unwrap() {
|
||||||
|
Statement::Query(inner) => *inner,
|
||||||
|
_ => panic!("Expected Query"),
|
||||||
|
};
|
||||||
|
let select = match *select.body {
|
||||||
|
SetExpr::Select(inner) => *inner,
|
||||||
|
_ => panic!("Expected SetExpr::Select"),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&vec![
|
||||||
|
SelectItem::UnnamedExpr(Expr::Value(number("10e-20"))),
|
||||||
|
SelectItem::UnnamedExpr(Expr::Value(number("1e3"))),
|
||||||
|
SelectItem::UnnamedExpr(Expr::Value(number("1e+3"))),
|
||||||
|
SelectItem::ExprWithAlias {
|
||||||
|
expr: Expr::Value(number("1e3")),
|
||||||
|
alias: Ident::new("a")
|
||||||
|
},
|
||||||
|
SelectItem::ExprWithAlias {
|
||||||
|
expr: Expr::Value(number("1")),
|
||||||
|
alias: Ident::new("e")
|
||||||
|
},
|
||||||
|
SelectItem::UnnamedExpr(Expr::Value(number("0.5e2"))),
|
||||||
|
],
|
||||||
|
&select.projection
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_select_with_date_column_name() {
|
fn parse_select_with_date_column_name() {
|
||||||
let sql = "SELECT date";
|
let sql = "SELECT date";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue