mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-04 06:18:17 +00:00
Support bigquery CAST AS x [STRING|DATE] FORMAT
syntax (#978)
This commit is contained in:
parent
83cb734b3c
commit
c68e9775a2
6 changed files with 130 additions and 6 deletions
|
@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Options for `CAST` / `TRY_CAST`
|
||||||
|
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||||
|
pub enum CastFormat {
|
||||||
|
Value(Value),
|
||||||
|
ValueAtTimeZone(Value, Value),
|
||||||
|
}
|
||||||
|
|
||||||
/// An SQL expression of any type.
|
/// An SQL expression of any type.
|
||||||
///
|
///
|
||||||
/// The parser does not distinguish between expressions of different types
|
/// The parser does not distinguish between expressions of different types
|
||||||
|
@ -437,12 +447,18 @@ pub enum Expr {
|
||||||
Cast {
|
Cast {
|
||||||
expr: Box<Expr>,
|
expr: Box<Expr>,
|
||||||
data_type: DataType,
|
data_type: DataType,
|
||||||
|
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
|
||||||
|
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
|
||||||
|
format: Option<CastFormat>,
|
||||||
},
|
},
|
||||||
/// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))`
|
/// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))`
|
||||||
// this differs from CAST in the choice of how to implement invalid conversions
|
// this differs from CAST in the choice of how to implement invalid conversions
|
||||||
TryCast {
|
TryCast {
|
||||||
expr: Box<Expr>,
|
expr: Box<Expr>,
|
||||||
data_type: DataType,
|
data_type: DataType,
|
||||||
|
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
|
||||||
|
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
|
||||||
|
format: Option<CastFormat>,
|
||||||
},
|
},
|
||||||
/// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)`
|
/// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)`
|
||||||
// only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting
|
// only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting
|
||||||
|
@ -450,6 +466,9 @@ pub enum Expr {
|
||||||
SafeCast {
|
SafeCast {
|
||||||
expr: Box<Expr>,
|
expr: Box<Expr>,
|
||||||
data_type: DataType,
|
data_type: DataType,
|
||||||
|
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
|
||||||
|
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
|
||||||
|
format: Option<CastFormat>,
|
||||||
},
|
},
|
||||||
/// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'`
|
/// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'`
|
||||||
AtTimeZone {
|
AtTimeZone {
|
||||||
|
@ -597,6 +616,15 @@ pub enum Expr {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for CastFormat {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
CastFormat::Value(v) => write!(f, "{v}"),
|
||||||
|
CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for Expr {
|
impl fmt::Display for Expr {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
|
@ -753,9 +781,39 @@ impl fmt::Display for Expr {
|
||||||
write!(f, "{op}{expr}")
|
write!(f, "{op}{expr}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"),
|
Expr::Cast {
|
||||||
Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"),
|
expr,
|
||||||
Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"),
|
data_type,
|
||||||
|
format,
|
||||||
|
} => {
|
||||||
|
if let Some(format) = format {
|
||||||
|
write!(f, "CAST({expr} AS {data_type} FORMAT {format})")
|
||||||
|
} else {
|
||||||
|
write!(f, "CAST({expr} AS {data_type})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Expr::TryCast {
|
||||||
|
expr,
|
||||||
|
data_type,
|
||||||
|
format,
|
||||||
|
} => {
|
||||||
|
if let Some(format) = format {
|
||||||
|
write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})")
|
||||||
|
} else {
|
||||||
|
write!(f, "TRY_CAST({expr} AS {data_type})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Expr::SafeCast {
|
||||||
|
expr,
|
||||||
|
data_type,
|
||||||
|
format,
|
||||||
|
} => {
|
||||||
|
if let Some(format) = format {
|
||||||
|
write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})")
|
||||||
|
} else {
|
||||||
|
write!(f, "SAFE_CAST({expr} AS {data_type})")
|
||||||
|
}
|
||||||
|
}
|
||||||
Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"),
|
Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"),
|
||||||
Expr::Ceil { expr, field } => {
|
Expr::Ceil { expr, field } => {
|
||||||
if field == &DateTimeField::NoDateTime {
|
if field == &DateTimeField::NoDateTime {
|
||||||
|
|
|
@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_optional_cast_format(&mut self) -> Result<Option<CastFormat>, ParserError> {
|
||||||
|
if self.parse_keyword(Keyword::FORMAT) {
|
||||||
|
let value = self.parse_value()?;
|
||||||
|
if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) {
|
||||||
|
Ok(Some(CastFormat::ValueAtTimeZone(
|
||||||
|
value,
|
||||||
|
self.parse_value()?,
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(Some(CastFormat::Value(value)))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
|
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
|
||||||
pub fn parse_cast_expr(&mut self) -> Result<Expr, ParserError> {
|
pub fn parse_cast_expr(&mut self) -> Result<Expr, ParserError> {
|
||||||
self.expect_token(&Token::LParen)?;
|
self.expect_token(&Token::LParen)?;
|
||||||
let expr = self.parse_expr()?;
|
let expr = self.parse_expr()?;
|
||||||
self.expect_keyword(Keyword::AS)?;
|
self.expect_keyword(Keyword::AS)?;
|
||||||
let data_type = self.parse_data_type()?;
|
let data_type = self.parse_data_type()?;
|
||||||
|
let format = self.parse_optional_cast_format()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(Expr::Cast {
|
Ok(Expr::Cast {
|
||||||
expr: Box::new(expr),
|
expr: Box::new(expr),
|
||||||
data_type,
|
data_type,
|
||||||
|
format,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> {
|
||||||
let expr = self.parse_expr()?;
|
let expr = self.parse_expr()?;
|
||||||
self.expect_keyword(Keyword::AS)?;
|
self.expect_keyword(Keyword::AS)?;
|
||||||
let data_type = self.parse_data_type()?;
|
let data_type = self.parse_data_type()?;
|
||||||
|
let format = self.parse_optional_cast_format()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(Expr::TryCast {
|
Ok(Expr::TryCast {
|
||||||
expr: Box::new(expr),
|
expr: Box::new(expr),
|
||||||
data_type,
|
data_type,
|
||||||
|
format,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> {
|
||||||
let expr = self.parse_expr()?;
|
let expr = self.parse_expr()?;
|
||||||
self.expect_keyword(Keyword::AS)?;
|
self.expect_keyword(Keyword::AS)?;
|
||||||
let data_type = self.parse_data_type()?;
|
let data_type = self.parse_data_type()?;
|
||||||
|
let format = self.parse_optional_cast_format()?;
|
||||||
self.expect_token(&Token::RParen)?;
|
self.expect_token(&Token::RParen)?;
|
||||||
Ok(Expr::SafeCast {
|
Ok(Expr::SafeCast {
|
||||||
expr: Box::new(expr),
|
expr: Box::new(expr),
|
||||||
data_type,
|
data_type,
|
||||||
|
format,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> {
|
||||||
Ok(Expr::Cast {
|
Ok(Expr::Cast {
|
||||||
expr: Box::new(expr),
|
expr: Box::new(expr),
|
||||||
data_type: self.parse_data_type()?,
|
data_type: self.parse_data_type()?,
|
||||||
|
format: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -304,8 +304,39 @@ fn parse_trailing_comma() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_cast_type() {
|
fn parse_cast_type() {
|
||||||
let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#;
|
let sql = r"SELECT SAFE_CAST(1 AS INT64)";
|
||||||
bigquery().verified_only_select(sql);
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast_date_format() {
|
||||||
|
let sql =
|
||||||
|
r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo";
|
||||||
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast_time_format() {
|
||||||
|
let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string";
|
||||||
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast_timestamp_format_tz() {
|
||||||
|
let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string";
|
||||||
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast_string_to_bytes_format() {
|
||||||
|
let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes";
|
||||||
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast_bytes_to_string_format() {
|
||||||
|
let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string";
|
||||||
|
bigquery_and_generic().verified_only_select(sql);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -1934,6 +1934,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::BigInt(None),
|
data_type: DataType::BigInt(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -1944,6 +1945,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::TinyInt(None),
|
data_type: DataType::TinyInt(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -1970,6 +1972,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Nvarchar(Some(50)),
|
data_type: DataType::Nvarchar(Some(50)),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -1980,6 +1983,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Clob(None),
|
data_type: DataType::Clob(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -1990,6 +1994,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Clob(Some(50)),
|
data_type: DataType::Clob(Some(50)),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -2000,6 +2005,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Binary(Some(50)),
|
data_type: DataType::Binary(Some(50)),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -2010,6 +2016,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Varbinary(Some(50)),
|
data_type: DataType::Varbinary(Some(50)),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -2020,6 +2027,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Blob(None),
|
data_type: DataType::Blob(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -2030,6 +2038,7 @@ fn parse_cast() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::Blob(Some(50)),
|
data_type: DataType::Blob(Some(50)),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
@ -2043,6 +2052,7 @@ fn parse_try_cast() {
|
||||||
&Expr::TryCast {
|
&Expr::TryCast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
expr: Box::new(Expr::Identifier(Ident::new("id"))),
|
||||||
data_type: DataType::BigInt(None),
|
data_type: DataType::BigInt(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
|
|
@ -1782,7 +1782,8 @@ fn parse_array_index_expr() {
|
||||||
})),
|
})),
|
||||||
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
|
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
|
||||||
DataType::Int(None)
|
DataType::Int(None)
|
||||||
))))))
|
)))))),
|
||||||
|
format: None,
|
||||||
}))),
|
}))),
|
||||||
indexes: vec![num[1].clone(), num[2].clone()],
|
indexes: vec![num[1].clone(), num[2].clone()],
|
||||||
},
|
},
|
||||||
|
|
|
@ -167,6 +167,7 @@ fn parse_array() {
|
||||||
&Expr::Cast {
|
&Expr::Cast {
|
||||||
expr: Box::new(Expr::Identifier(Ident::new("a"))),
|
expr: Box::new(Expr::Identifier(Ident::new("a"))),
|
||||||
data_type: DataType::Array(None),
|
data_type: DataType::Array(None),
|
||||||
|
format: None,
|
||||||
},
|
},
|
||||||
expr_from_projection(only(&select.projection))
|
expr_from_projection(only(&select.projection))
|
||||||
);
|
);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue