Support bigquery CAST AS x [STRING|DATE] FORMAT syntax (#978)

This commit is contained in:
Lukasz Stefaniak 2023-10-20 20:33:12 +02:00 committed by GitHub
parent 83cb734b3c
commit c68e9775a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 130 additions and 6 deletions

View file

@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator {
} }
} }
/// Options for `CAST` / `TRY_CAST`
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum CastFormat {
Value(Value),
ValueAtTimeZone(Value, Value),
}
/// An SQL expression of any type. /// An SQL expression of any type.
/// ///
/// The parser does not distinguish between expressions of different types /// The parser does not distinguish between expressions of different types
@ -437,12 +447,18 @@ pub enum Expr {
Cast { Cast {
expr: Box<Expr>, expr: Box<Expr>,
data_type: DataType, data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
}, },
/// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))`
// this differs from CAST in the choice of how to implement invalid conversions // this differs from CAST in the choice of how to implement invalid conversions
TryCast { TryCast {
expr: Box<Expr>, expr: Box<Expr>,
data_type: DataType, data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
}, },
/// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)`
// only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting
@ -450,6 +466,9 @@ pub enum Expr {
SafeCast { SafeCast {
expr: Box<Expr>, expr: Box<Expr>,
data_type: DataType, data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
}, },
/// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'`
AtTimeZone { AtTimeZone {
@ -597,6 +616,15 @@ pub enum Expr {
}, },
} }
impl fmt::Display for CastFormat {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
CastFormat::Value(v) => write!(f, "{v}"),
CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"),
}
}
}
impl fmt::Display for Expr { impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
@ -753,9 +781,39 @@ impl fmt::Display for Expr {
write!(f, "{op}{expr}") write!(f, "{op}{expr}")
} }
} }
Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), Expr::Cast {
Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), expr,
Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), data_type,
format,
} => {
if let Some(format) = format {
write!(f, "CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "CAST({expr} AS {data_type})")
}
}
Expr::TryCast {
expr,
data_type,
format,
} => {
if let Some(format) = format {
write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "TRY_CAST({expr} AS {data_type})")
}
}
Expr::SafeCast {
expr,
data_type,
format,
} => {
if let Some(format) = format {
write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "SAFE_CAST({expr} AS {data_type})")
}
}
Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"),
Expr::Ceil { expr, field } => { Expr::Ceil { expr, field } => {
if field == &DateTimeField::NoDateTime { if field == &DateTimeField::NoDateTime {

View file

@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> {
}) })
} }
pub fn parse_optional_cast_format(&mut self) -> Result<Option<CastFormat>, ParserError> {
if self.parse_keyword(Keyword::FORMAT) {
let value = self.parse_value()?;
if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) {
Ok(Some(CastFormat::ValueAtTimeZone(
value,
self.parse_value()?,
)))
} else {
Ok(Some(CastFormat::Value(value)))
}
} else {
Ok(None)
}
}
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn parse_cast_expr(&mut self) -> Result<Expr, ParserError> { pub fn parse_cast_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?; self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?; self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?; let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?;
Ok(Expr::Cast { Ok(Expr::Cast {
expr: Box::new(expr), expr: Box::new(expr),
data_type, data_type,
format,
}) })
} }
@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> {
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?; self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?; let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?;
Ok(Expr::TryCast { Ok(Expr::TryCast {
expr: Box::new(expr), expr: Box::new(expr),
data_type, data_type,
format,
}) })
} }
@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> {
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?; self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?; let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?;
Ok(Expr::SafeCast { Ok(Expr::SafeCast {
expr: Box::new(expr), expr: Box::new(expr),
data_type, data_type,
format,
}) })
} }
@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> {
Ok(Expr::Cast { Ok(Expr::Cast {
expr: Box::new(expr), expr: Box::new(expr),
data_type: self.parse_data_type()?, data_type: self.parse_data_type()?,
format: None,
}) })
} }

View file

@ -304,8 +304,39 @@ fn parse_trailing_comma() {
#[test] #[test]
fn parse_cast_type() { fn parse_cast_type() {
let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; let sql = r"SELECT SAFE_CAST(1 AS INT64)";
bigquery().verified_only_select(sql); bigquery_and_generic().verified_only_select(sql);
}
#[test]
fn parse_cast_date_format() {
let sql =
r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo";
bigquery_and_generic().verified_only_select(sql);
}
#[test]
fn parse_cast_time_format() {
let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string";
bigquery_and_generic().verified_only_select(sql);
}
#[test]
fn parse_cast_timestamp_format_tz() {
let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string";
bigquery_and_generic().verified_only_select(sql);
}
#[test]
fn parse_cast_string_to_bytes_format() {
let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes";
bigquery_and_generic().verified_only_select(sql);
}
#[test]
fn parse_cast_bytes_to_string_format() {
let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string";
bigquery_and_generic().verified_only_select(sql);
} }
#[test] #[test]

View file

@ -1934,6 +1934,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::BigInt(None), data_type: DataType::BigInt(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -1944,6 +1945,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::TinyInt(None), data_type: DataType::TinyInt(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -1970,6 +1972,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Nvarchar(Some(50)), data_type: DataType::Nvarchar(Some(50)),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -1980,6 +1983,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Clob(None), data_type: DataType::Clob(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -1990,6 +1994,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Clob(Some(50)), data_type: DataType::Clob(Some(50)),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -2000,6 +2005,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Binary(Some(50)), data_type: DataType::Binary(Some(50)),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -2010,6 +2016,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Varbinary(Some(50)), data_type: DataType::Varbinary(Some(50)),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -2020,6 +2027,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Blob(None), data_type: DataType::Blob(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -2030,6 +2038,7 @@ fn parse_cast() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Blob(Some(50)), data_type: DataType::Blob(Some(50)),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );
@ -2043,6 +2052,7 @@ fn parse_try_cast() {
&Expr::TryCast { &Expr::TryCast {
expr: Box::new(Expr::Identifier(Ident::new("id"))), expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::BigInt(None), data_type: DataType::BigInt(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );

View file

@ -1782,7 +1782,8 @@ fn parse_array_index_expr() {
})), })),
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
DataType::Int(None) DataType::Int(None)
)))))) )))))),
format: None,
}))), }))),
indexes: vec![num[1].clone(), num[2].clone()], indexes: vec![num[1].clone(), num[2].clone()],
}, },

View file

@ -167,6 +167,7 @@ fn parse_array() {
&Expr::Cast { &Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("a"))), expr: Box::new(Expr::Identifier(Ident::new("a"))),
data_type: DataType::Array(None), data_type: DataType::Array(None),
format: None,
}, },
expr_from_projection(only(&select.projection)) expr_from_projection(only(&select.projection))
); );