mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-03 04:37:21 +00:00
Merge branch 'master' into not
This commit is contained in:
commit
777fd4c2ee
8 changed files with 587 additions and 132 deletions
302
src/sqlparser.rs
302
src/sqlparser.rs
|
@ -95,15 +95,7 @@ impl Parser {
|
|||
"INSERT" => Ok(self.parse_insert()?),
|
||||
"ALTER" => Ok(self.parse_alter()?),
|
||||
"COPY" => Ok(self.parse_copy()?),
|
||||
"TRUE" => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
"FALSE" => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
"NULL" => {
|
||||
"TRUE" | "FALSE" | "NULL" => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
|
@ -120,7 +112,7 @@ impl Parser {
|
|||
self.parse_cast_expression()
|
||||
} else {
|
||||
match self.peek_token() {
|
||||
Some(Token::LParen) => self.parse_function_or_pg_cast(&id),
|
||||
Some(Token::LParen) => self.parse_function(&id),
|
||||
Some(Token::Period) => {
|
||||
let mut id_parts: Vec<String> = vec![id];
|
||||
while self.peek_token() == Some(Token::Period) {
|
||||
|
@ -140,19 +132,10 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
}
|
||||
Token::Number(_) => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
Token::String(_) => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
Token::SingleQuotedString(_) => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
Token::DoubleQuotedString(_) => {
|
||||
Token::Number(_)
|
||||
| Token::String(_)
|
||||
| Token::SingleQuotedString(_)
|
||||
| Token::DoubleQuotedString(_) => {
|
||||
self.prev_token();
|
||||
self.parse_sql_value()
|
||||
}
|
||||
|
@ -172,15 +155,6 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_function_or_pg_cast(&mut self, id: &str) -> Result<ASTNode, ParserError> {
|
||||
let func = self.parse_function(&id)?;
|
||||
if let Some(Token::DoubleColon) = self.peek_token() {
|
||||
self.parse_pg_cast(func)
|
||||
} else {
|
||||
Ok(func)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_function(&mut self, id: &str) -> Result<ASTNode, ParserError> {
|
||||
self.consume_token(&Token::LParen)?;
|
||||
if let Ok(true) = self.consume_token(&Token::RParen) {
|
||||
|
@ -245,25 +219,13 @@ impl Parser {
|
|||
})
|
||||
}
|
||||
|
||||
/// Parse a postgresql casting style which is in the form or expr::datatype
|
||||
/// Parse a postgresql casting style which is in the form of `expr::datatype`
|
||||
pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result<ASTNode, ParserError> {
|
||||
let _ = self.consume_token(&Token::DoubleColon)?;
|
||||
let datatype = if let Ok(data_type) = self.parse_data_type() {
|
||||
Ok(data_type)
|
||||
} else if let Ok(table_name) = self.parse_tablename() {
|
||||
Ok(SQLType::Custom(table_name))
|
||||
} else {
|
||||
parser_err!("Expecting datatype or identifier")
|
||||
};
|
||||
let pg_cast = ASTNode::SQLCast {
|
||||
Ok(ASTNode::SQLCast {
|
||||
expr: Box::new(expr),
|
||||
data_type: datatype?,
|
||||
};
|
||||
if let Some(Token::DoubleColon) = self.peek_token() {
|
||||
self.parse_pg_cast(pg_cast)
|
||||
} else {
|
||||
Ok(pg_cast)
|
||||
}
|
||||
data_type: self.parse_data_type()?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse an expression infix (typically an operator)
|
||||
|
@ -376,11 +338,17 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return first non-whitespace token that has not yet been processed
|
||||
pub fn peek_token(&self) -> Option<Token> {
|
||||
self.peek_token_skip_whitespace()
|
||||
if let Some(n) = self.til_non_whitespace() {
|
||||
self.token_at(n)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn skip_whitespace(&mut self) -> Option<Token> {
|
||||
/// Get the next token skipping whitespace and increment the token index
|
||||
pub fn next_token(&mut self) -> Option<Token> {
|
||||
loop {
|
||||
match self.next_token_no_skip() {
|
||||
Some(Token::Whitespace(_)) => {
|
||||
|
@ -420,19 +388,6 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn peek_token_skip_whitespace(&self) -> Option<Token> {
|
||||
if let Some(n) = self.til_non_whitespace() {
|
||||
self.token_at(n)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next token skipping whitespace and increment the token index
|
||||
pub fn next_token(&mut self) -> Option<Token> {
|
||||
self.skip_whitespace()
|
||||
}
|
||||
|
||||
pub fn next_token_no_skip(&mut self) -> Option<Token> {
|
||||
if self.index < self.tokens.len() {
|
||||
self.index = self.index + 1;
|
||||
|
@ -442,9 +397,9 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
/// if prev token is whitespace skip it
|
||||
/// if prev token is not whitespace skipt it as well
|
||||
pub fn prev_token_skip_whitespace(&mut self) -> Option<Token> {
|
||||
/// Push back the last one non-whitespace token
|
||||
pub fn prev_token(&mut self) -> Option<Token> {
|
||||
// TODO: returned value is unused (available via peek_token)
|
||||
loop {
|
||||
match self.prev_token_no_skip() {
|
||||
Some(Token::Whitespace(_)) => {
|
||||
|
@ -457,12 +412,8 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn prev_token(&mut self) -> Option<Token> {
|
||||
self.prev_token_skip_whitespace()
|
||||
}
|
||||
|
||||
/// Get the previous token and decrement the token index
|
||||
pub fn prev_token_no_skip(&mut self) -> Option<Token> {
|
||||
fn prev_token_no_skip(&mut self) -> Option<Token> {
|
||||
if self.index > 0 {
|
||||
self.index = self.index - 1;
|
||||
Some(self.tokens[self.index].clone())
|
||||
|
@ -500,6 +451,18 @@ impl Parser {
|
|||
true
|
||||
}
|
||||
|
||||
pub fn expect_keyword(&mut self, expected: &'static str) -> Result<(), ParserError> {
|
||||
if self.parse_keyword(expected) {
|
||||
Ok(())
|
||||
} else {
|
||||
parser_err!(format!(
|
||||
"Expected keyword {}, found {:?}",
|
||||
expected,
|
||||
self.peek_token()
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: this function is inconsistent and sometimes returns bool and sometimes fails
|
||||
|
||||
/// Consume the next token if it matches the expected token, otherwise return an error
|
||||
|
@ -745,30 +708,13 @@ impl Parser {
|
|||
"NULL" => Ok(Value::Null),
|
||||
_ => return parser_err!(format!("No value parser for keyword {}", k)),
|
||||
},
|
||||
//TODO: parse the timestamp here
|
||||
//TODO: parse the timestamp here (see parse_timestamp_value())
|
||||
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
|
||||
Ok(n) => Ok(Value::Double(n)),
|
||||
Err(e) => {
|
||||
let index = self.index;
|
||||
self.prev_token();
|
||||
if let Ok(timestamp) = self.parse_timestamp_value() {
|
||||
println!("timstamp: {:?}", timestamp);
|
||||
Ok(timestamp)
|
||||
} else {
|
||||
self.index = index;
|
||||
parser_err!(format!("Could not parse '{}' as i64: {}", n, e))
|
||||
}
|
||||
}
|
||||
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||
},
|
||||
Token::Number(ref n) => match n.parse::<i64>() {
|
||||
Ok(n) => {
|
||||
// if let Some(Token::Minus) = self.peek_token() {
|
||||
// self.prev_token();
|
||||
// self.parse_timestamp_value()
|
||||
// } else {
|
||||
Ok(Value::Long(n))
|
||||
// }
|
||||
}
|
||||
Ok(n) => Ok(Value::Long(n)),
|
||||
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||
},
|
||||
Token::Identifier(id) => Ok(Value::String(id.to_string())),
|
||||
|
@ -796,13 +742,13 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a literal integer/long
|
||||
/// Parse a literal double
|
||||
pub fn parse_literal_double(&mut self) -> Result<f64, ParserError> {
|
||||
match self.next_token() {
|
||||
Some(Token::Number(s)) => s.parse::<f64>().map_err(|e| {
|
||||
ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e))
|
||||
ParserError::ParserError(format!("Could not parse '{}' as f64: {}", s, e))
|
||||
}),
|
||||
other => parser_err!(format!("Expected literal int, found {:?}", other)),
|
||||
other => parser_err!(format!("Expected literal number, found {:?}", other)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -883,19 +829,17 @@ impl Parser {
|
|||
self.consume_token(&Token::Colon)?;
|
||||
let min = self.parse_literal_int()?;
|
||||
self.consume_token(&Token::Colon)?;
|
||||
// On one hand, the SQL specs defines <seconds fraction> ::= <unsigned integer>,
|
||||
// so it would be more correct to parse it as such
|
||||
let sec = self.parse_literal_double()?;
|
||||
let _ = (sec.fract() * 1000.0).round();
|
||||
if let Ok(true) = self.consume_token(&Token::Period) {
|
||||
let ms = self.parse_literal_int()?;
|
||||
Ok(NaiveTime::from_hms_milli(
|
||||
hour as u32,
|
||||
min as u32,
|
||||
sec as u32,
|
||||
ms as u32,
|
||||
))
|
||||
} else {
|
||||
Ok(NaiveTime::from_hms(hour as u32, min as u32, sec as u32))
|
||||
}
|
||||
// On the other, chrono only supports nanoseconds, which should(?) fit in seconds-as-f64...
|
||||
let nanos = (sec.fract() * 1_000_000_000.0).round();
|
||||
Ok(NaiveTime::from_hms_nano(
|
||||
hour as u32,
|
||||
min as u32,
|
||||
sec as u32,
|
||||
nanos as u32,
|
||||
))
|
||||
}
|
||||
|
||||
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
||||
|
@ -987,13 +931,10 @@ impl Parser {
|
|||
}
|
||||
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
|
||||
},
|
||||
Some(Token::Identifier(id)) => {
|
||||
if let Ok(true) = self.consume_token(&Token::Period) {
|
||||
let ids = self.parse_tablename()?;
|
||||
Ok(SQLType::Custom(format!("{}.{}", id, ids)))
|
||||
} else {
|
||||
Ok(SQLType::Custom(id))
|
||||
}
|
||||
Some(Token::Identifier(_)) => {
|
||||
self.prev_token();
|
||||
let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type
|
||||
Ok(SQLType::Custom(type_name))
|
||||
}
|
||||
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
|
||||
}
|
||||
|
@ -1119,11 +1060,12 @@ impl Parser {
|
|||
pub fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
|
||||
let projection = self.parse_expr_list()?;
|
||||
|
||||
let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") {
|
||||
//TODO: add support for JOIN
|
||||
Some(Box::new(self.parse_expr(0)?))
|
||||
let (relation, joins): (Option<Box<ASTNode>>, Vec<Join>) = if self.parse_keyword("FROM") {
|
||||
let relation = Some(Box::new(self.parse_expr(0)?));
|
||||
let joins = self.parse_joins()?;
|
||||
(relation, joins)
|
||||
} else {
|
||||
None
|
||||
(None, vec![])
|
||||
};
|
||||
|
||||
let selection = if self.parse_keyword("WHERE") {
|
||||
|
@ -1169,6 +1111,7 @@ impl Parser {
|
|||
projection,
|
||||
selection,
|
||||
relation,
|
||||
joins,
|
||||
limit,
|
||||
order_by,
|
||||
group_by,
|
||||
|
@ -1177,6 +1120,131 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_join_constraint(&mut self, natural: bool) -> Result<JoinConstraint, ParserError> {
|
||||
if natural {
|
||||
Ok(JoinConstraint::Natural)
|
||||
} else if self.parse_keyword("ON") {
|
||||
let constraint = self.parse_expr(0)?;
|
||||
Ok(JoinConstraint::On(constraint))
|
||||
} else if self.parse_keyword("USING") {
|
||||
if self.consume_token(&Token::LParen)? {
|
||||
let attributes = self
|
||||
.parse_expr_list()?
|
||||
.into_iter()
|
||||
.map(|ast_node| match ast_node {
|
||||
ASTNode::SQLIdentifier(ident) => Ok(ident),
|
||||
unexpected => {
|
||||
parser_err!(format!("Expected identifier, found {:?}", unexpected))
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<String>, ParserError>>()?;
|
||||
|
||||
if self.consume_token(&Token::RParen)? {
|
||||
Ok(JoinConstraint::Using(attributes))
|
||||
} else {
|
||||
parser_err!(format!("Expected token ')', found {:?}", self.peek_token()))
|
||||
}
|
||||
} else {
|
||||
parser_err!(format!("Expected token '(', found {:?}", self.peek_token()))
|
||||
}
|
||||
} else {
|
||||
parser_err!(format!(
|
||||
"Unexpected token after JOIN: {:?}",
|
||||
self.peek_token()
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_joins(&mut self) -> Result<Vec<Join>, ParserError> {
|
||||
let mut joins = vec![];
|
||||
loop {
|
||||
let natural = match &self.peek_token() {
|
||||
Some(Token::Comma) => {
|
||||
self.next_token();
|
||||
let relation = self.parse_expr(0)?;
|
||||
let join = Join {
|
||||
relation,
|
||||
join_operator: JoinOperator::Implicit,
|
||||
};
|
||||
joins.push(join);
|
||||
continue;
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "CROSS" => {
|
||||
self.next_token();
|
||||
self.expect_keyword("JOIN")?;
|
||||
let relation = self.parse_expr(0)?;
|
||||
let join = Join {
|
||||
relation,
|
||||
join_operator: JoinOperator::Cross,
|
||||
};
|
||||
joins.push(join);
|
||||
continue;
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "NATURAL" => {
|
||||
self.next_token();
|
||||
true
|
||||
}
|
||||
Some(_) => false,
|
||||
None => return Ok(joins),
|
||||
};
|
||||
|
||||
let join = match &self.peek_token() {
|
||||
Some(Token::Keyword(kw)) if kw == "INNER" => {
|
||||
self.next_token();
|
||||
self.expect_keyword("JOIN")?;
|
||||
Join {
|
||||
relation: self.parse_expr(0)?,
|
||||
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
|
||||
}
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "JOIN" => {
|
||||
self.next_token();
|
||||
Join {
|
||||
relation: self.parse_expr(0)?,
|
||||
join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?),
|
||||
}
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "LEFT" => {
|
||||
self.next_token();
|
||||
self.parse_keyword("OUTER");
|
||||
self.expect_keyword("JOIN")?;
|
||||
Join {
|
||||
relation: self.parse_expr(0)?,
|
||||
join_operator: JoinOperator::LeftOuter(
|
||||
self.parse_join_constraint(natural)?,
|
||||
),
|
||||
}
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "RIGHT" => {
|
||||
self.next_token();
|
||||
self.parse_keyword("OUTER");
|
||||
self.expect_keyword("JOIN")?;
|
||||
Join {
|
||||
relation: self.parse_expr(0)?,
|
||||
join_operator: JoinOperator::RightOuter(
|
||||
self.parse_join_constraint(natural)?,
|
||||
),
|
||||
}
|
||||
}
|
||||
Some(Token::Keyword(kw)) if kw == "FULL" => {
|
||||
self.next_token();
|
||||
self.parse_keyword("OUTER");
|
||||
self.expect_keyword("JOIN")?;
|
||||
Join {
|
||||
relation: self.parse_expr(0)?,
|
||||
join_operator: JoinOperator::FullOuter(
|
||||
self.parse_join_constraint(natural)?,
|
||||
),
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
Ok(joins)
|
||||
}
|
||||
|
||||
/// Parse an INSERT statement
|
||||
pub fn parse_insert(&mut self) -> Result<ASTNode, ParserError> {
|
||||
self.parse_keyword("INTO");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue