mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Add support for IS [NOT] [form] NORMALIZED
(#1655)
Co-authored-by: Alexander Beedie <alexander.beedie@adia.ae>
This commit is contained in:
parent
3eeb9160ea
commit
e9498d538a
8 changed files with 185 additions and 17 deletions
|
@ -83,7 +83,7 @@ pub use self::trigger::{
|
|||
|
||||
pub use self::value::{
|
||||
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
|
||||
TrimWhereField, Value,
|
||||
NormalizationForm, TrimWhereField, Value,
|
||||
};
|
||||
|
||||
use crate::ast::helpers::stmt_data_loading::{
|
||||
|
@ -653,6 +653,12 @@ pub enum Expr {
|
|||
IsDistinctFrom(Box<Expr>, Box<Expr>),
|
||||
/// `IS NOT DISTINCT FROM` operator
|
||||
IsNotDistinctFrom(Box<Expr>, Box<Expr>),
|
||||
/// `<expr> IS [ NOT ] [ form ] NORMALIZED`
|
||||
IsNormalized {
|
||||
expr: Box<Expr>,
|
||||
form: Option<NormalizationForm>,
|
||||
negated: bool,
|
||||
},
|
||||
/// `[ NOT ] IN (val1, val2, ...)`
|
||||
InList {
|
||||
expr: Box<Expr>,
|
||||
|
@ -1118,7 +1124,7 @@ impl fmt::Display for LambdaFunction {
|
|||
/// `OneOrManyWithParens` implements `Deref<Target = [T]>` and `IntoIterator`,
|
||||
/// so you can call slice methods on it and iterate over items
|
||||
/// # Examples
|
||||
/// Acessing as a slice:
|
||||
/// Accessing as a slice:
|
||||
/// ```
|
||||
/// # use sqlparser::ast::OneOrManyWithParens;
|
||||
/// let one = OneOrManyWithParens::One("a");
|
||||
|
@ -1419,6 +1425,24 @@ impl fmt::Display for Expr {
|
|||
if *regexp { "REGEXP" } else { "RLIKE" },
|
||||
pattern
|
||||
),
|
||||
Expr::IsNormalized {
|
||||
expr,
|
||||
form,
|
||||
negated,
|
||||
} => {
|
||||
let not_ = if *negated { "NOT " } else { "" };
|
||||
if form.is_none() {
|
||||
write!(f, "{} IS {}NORMALIZED", expr, not_)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{} IS {}{} NORMALIZED",
|
||||
expr,
|
||||
not_,
|
||||
form.as_ref().unwrap()
|
||||
)
|
||||
}
|
||||
}
|
||||
Expr::SimilarTo {
|
||||
negated,
|
||||
expr,
|
||||
|
@ -7799,7 +7823,7 @@ where
|
|||
/// ```sql
|
||||
/// EXPLAIN (ANALYZE, VERBOSE TRUE, FORMAT TEXT) SELECT * FROM my_table;
|
||||
///
|
||||
/// VACCUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
|
||||
/// VACUUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
|
||||
/// ```
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
|
|
@ -2821,10 +2821,10 @@ impl fmt::Display for ValueTableMode {
|
|||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
pub enum UpdateTableFromKind {
|
||||
/// Update Statment where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake)
|
||||
/// Update Statement where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake)
|
||||
/// For Example: `UPDATE FROM t1 SET t1.name='aaa'`
|
||||
BeforeSet(TableWithJoins),
|
||||
/// Update Statment where the 'FROM' clause is after the 'SET' keyword (Which is the standard way)
|
||||
/// Update Statement where the 'FROM' clause is after the 'SET' keyword (Which is the standard way)
|
||||
/// For Example: `UPDATE SET t1.name='aaa' FROM t1`
|
||||
AfterSet(TableWithJoins),
|
||||
}
|
||||
|
|
|
@ -1325,6 +1325,12 @@ impl Spanned for Expr {
|
|||
escape_char: _,
|
||||
any: _,
|
||||
} => expr.span().union(&pattern.span()),
|
||||
Expr::RLike { .. } => Span::empty(),
|
||||
Expr::IsNormalized {
|
||||
expr,
|
||||
form: _,
|
||||
negated: _,
|
||||
} => expr.span(),
|
||||
Expr::SimilarTo {
|
||||
negated: _,
|
||||
expr,
|
||||
|
@ -1360,7 +1366,6 @@ impl Spanned for Expr {
|
|||
Expr::Array(array) => array.span(),
|
||||
Expr::MatchAgainst { .. } => Span::empty(),
|
||||
Expr::JsonAccess { value, path } => value.span().union(&path.span()),
|
||||
Expr::RLike { .. } => Span::empty(),
|
||||
Expr::AnyOp {
|
||||
left,
|
||||
compare_op: _,
|
||||
|
|
|
@ -270,6 +270,35 @@ impl fmt::Display for DateTimeField {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
|
||||
/// The Unicode Standard defines four normalization forms, which are intended to eliminate
|
||||
/// certain distinctions between visually or functionally identical characters.
|
||||
///
|
||||
/// See [Unicode Normalization Forms](https://unicode.org/reports/tr15/) for details.
|
||||
pub enum NormalizationForm {
|
||||
/// Canonical Decomposition, followed by Canonical Composition.
|
||||
NFC,
|
||||
/// Canonical Decomposition.
|
||||
NFD,
|
||||
/// Compatibility Decomposition, followed by Canonical Composition.
|
||||
NFKC,
|
||||
/// Compatibility Decomposition.
|
||||
NFKD,
|
||||
}
|
||||
|
||||
impl fmt::Display for NormalizationForm {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
NormalizationForm::NFC => write!(f, "NFC"),
|
||||
NormalizationForm::NFD => write!(f, "NFD"),
|
||||
NormalizationForm::NFKC => write!(f, "NFKC"),
|
||||
NormalizationForm::NFKD => write!(f, "NFKD"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EscapeQuotedString<'a> {
|
||||
string: &'a str,
|
||||
quote: char,
|
||||
|
|
|
@ -530,6 +530,10 @@ define_keywords!(
|
|||
NESTED,
|
||||
NEW,
|
||||
NEXT,
|
||||
NFC,
|
||||
NFD,
|
||||
NFKC,
|
||||
NFKD,
|
||||
NO,
|
||||
NOBYPASSRLS,
|
||||
NOCREATEDB,
|
||||
|
@ -540,6 +544,7 @@ define_keywords!(
|
|||
NOORDER,
|
||||
NOREPLICATION,
|
||||
NORMALIZE,
|
||||
NORMALIZED,
|
||||
NOSCAN,
|
||||
NOSUPERUSER,
|
||||
NOT,
|
||||
|
|
|
@ -3184,9 +3184,11 @@ impl<'a> Parser<'a> {
|
|||
{
|
||||
let expr2 = self.parse_expr()?;
|
||||
Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2)))
|
||||
} else if let Ok(is_normalized) = self.parse_unicode_is_normalized(expr) {
|
||||
Ok(is_normalized)
|
||||
} else {
|
||||
self.expected(
|
||||
"[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS",
|
||||
"[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS",
|
||||
self.peek_token(),
|
||||
)
|
||||
}
|
||||
|
@ -3851,7 +3853,7 @@ impl<'a> Parser<'a> {
|
|||
/// If the current token is the `expected` keyword, consume the token.
|
||||
/// Otherwise, return an error.
|
||||
///
|
||||
// todo deprecate infavor of expected_keyword_is
|
||||
// todo deprecate in favor of expected_keyword_is
|
||||
pub fn expect_keyword(&mut self, expected: Keyword) -> Result<TokenWithSpan, ParserError> {
|
||||
if self.parse_keyword(expected) {
|
||||
Ok(self.get_current_token().clone())
|
||||
|
@ -8453,6 +8455,33 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a literal unicode normalization clause
|
||||
pub fn parse_unicode_is_normalized(&mut self, expr: Expr) -> Result<Expr, ParserError> {
|
||||
let neg = self.parse_keyword(Keyword::NOT);
|
||||
let normalized_form = self.maybe_parse(|parser| {
|
||||
match parser.parse_one_of_keywords(&[
|
||||
Keyword::NFC,
|
||||
Keyword::NFD,
|
||||
Keyword::NFKC,
|
||||
Keyword::NFKD,
|
||||
]) {
|
||||
Some(Keyword::NFC) => Ok(NormalizationForm::NFC),
|
||||
Some(Keyword::NFD) => Ok(NormalizationForm::NFD),
|
||||
Some(Keyword::NFKC) => Ok(NormalizationForm::NFKC),
|
||||
Some(Keyword::NFKD) => Ok(NormalizationForm::NFKD),
|
||||
_ => parser.expected("unicode normalization form", parser.peek_token()),
|
||||
}
|
||||
})?;
|
||||
if self.parse_keyword(Keyword::NORMALIZED) {
|
||||
return Ok(Expr::IsNormalized {
|
||||
expr: Box::new(expr),
|
||||
form: normalized_form,
|
||||
negated: neg,
|
||||
});
|
||||
}
|
||||
self.expected("unicode normalization form", self.peek_token())
|
||||
}
|
||||
|
||||
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
|
||||
self.expect_token(&Token::LParen)?;
|
||||
let values = self.parse_comma_separated(|parser| {
|
||||
|
@ -8979,7 +9008,7 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a table object for insetion
|
||||
/// Parse a table object for insertion
|
||||
/// e.g. `some_database.some_table` or `FUNCTION some_table_func(...)`
|
||||
pub fn parse_table_object(&mut self) -> Result<TableObject, ParserError> {
|
||||
if self.dialect.supports_insert_table_function() && self.parse_keyword(Keyword::FUNCTION) {
|
||||
|
@ -11887,7 +11916,7 @@ impl<'a> Parser<'a> {
|
|||
} else {
|
||||
let mut name = self.parse_grantee_name()?;
|
||||
if self.consume_token(&Token::Colon) {
|
||||
// Redshift supports namespace prefix for extenrnal users and groups:
|
||||
// Redshift supports namespace prefix for external users and groups:
|
||||
// <Namespace>:<GroupName> or <Namespace>:<UserName>
|
||||
// https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-access-control-native-idp.html
|
||||
let ident = self.parse_identifier()?;
|
||||
|
@ -12883,7 +12912,7 @@ impl<'a> Parser<'a> {
|
|||
Ok(WithFill { from, to, step })
|
||||
}
|
||||
|
||||
// Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect)
|
||||
// Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect)
|
||||
// that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier
|
||||
pub fn parse_interpolations(&mut self) -> Result<Option<Interpolate>, ParserError> {
|
||||
if !self.parse_keyword(Keyword::INTERPOLATE) {
|
||||
|
@ -14432,7 +14461,7 @@ mod tests {
|
|||
assert_eq!(
|
||||
ast,
|
||||
Err(ParserError::ParserError(
|
||||
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16"
|
||||
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: a at Line: 1, Column: 16"
|
||||
.to_string()
|
||||
))
|
||||
);
|
||||
|
|
|
@ -4600,7 +4600,7 @@ fn run_explain_analyze(
|
|||
expected_verbose: bool,
|
||||
expected_analyze: bool,
|
||||
expected_format: Option<AnalyzeFormat>,
|
||||
exepcted_options: Option<Vec<UtilityOption>>,
|
||||
expected_options: Option<Vec<UtilityOption>>,
|
||||
) {
|
||||
match dialect.verified_stmt(query) {
|
||||
Statement::Explain {
|
||||
|
@ -4616,7 +4616,7 @@ fn run_explain_analyze(
|
|||
assert_eq!(verbose, expected_verbose);
|
||||
assert_eq!(analyze, expected_analyze);
|
||||
assert_eq!(format, expected_format);
|
||||
assert_eq!(options, exepcted_options);
|
||||
assert_eq!(options, expected_options);
|
||||
assert!(!query_plan);
|
||||
assert!(!estimate);
|
||||
assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string());
|
||||
|
@ -9317,6 +9317,46 @@ fn parse_is_boolean() {
|
|||
verified_expr(sql)
|
||||
);
|
||||
|
||||
let sql = "a IS NORMALIZED";
|
||||
assert_eq!(
|
||||
IsNormalized {
|
||||
expr: Box::new(Identifier(Ident::new("a"))),
|
||||
form: None,
|
||||
negated: false,
|
||||
},
|
||||
verified_expr(sql)
|
||||
);
|
||||
|
||||
let sql = "a IS NOT NORMALIZED";
|
||||
assert_eq!(
|
||||
IsNormalized {
|
||||
expr: Box::new(Identifier(Ident::new("a"))),
|
||||
form: None,
|
||||
negated: true,
|
||||
},
|
||||
verified_expr(sql)
|
||||
);
|
||||
|
||||
let sql = "a IS NFKC NORMALIZED";
|
||||
assert_eq!(
|
||||
IsNormalized {
|
||||
expr: Box::new(Identifier(Ident::new("a"))),
|
||||
form: Some(NormalizationForm::NFKC),
|
||||
negated: false,
|
||||
},
|
||||
verified_expr(sql)
|
||||
);
|
||||
|
||||
let sql = "a IS NOT NFKD NORMALIZED";
|
||||
assert_eq!(
|
||||
IsNormalized {
|
||||
expr: Box::new(Identifier(Ident::new("a"))),
|
||||
form: Some(NormalizationForm::NFKD),
|
||||
negated: true,
|
||||
},
|
||||
verified_expr(sql)
|
||||
);
|
||||
|
||||
let sql = "a IS UNKNOWN";
|
||||
assert_eq!(
|
||||
IsUnknown(Box::new(Identifier(Ident::new("a")))),
|
||||
|
@ -9335,6 +9375,12 @@ fn parse_is_boolean() {
|
|||
verified_stmt("SELECT f FROM foo WHERE field IS FALSE");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NOT FALSE");
|
||||
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NORMALIZED");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NFC NORMALIZED");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NFD NORMALIZED");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NOT NORMALIZED");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NOT NFKC NORMALIZED");
|
||||
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS UNKNOWN");
|
||||
verified_stmt("SELECT f FROM foo WHERE field IS NOT UNKNOWN");
|
||||
|
||||
|
@ -9342,7 +9388,37 @@ fn parse_is_boolean() {
|
|||
let res = parse_sql_statements(sql);
|
||||
assert_eq!(
|
||||
ParserError::ParserError(
|
||||
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0"
|
||||
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: 0"
|
||||
.to_string()
|
||||
),
|
||||
res.unwrap_err()
|
||||
);
|
||||
|
||||
let sql = "SELECT s, s IS XYZ NORMALIZED FROM foo";
|
||||
let res = parse_sql_statements(sql);
|
||||
assert_eq!(
|
||||
ParserError::ParserError(
|
||||
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: XYZ"
|
||||
.to_string()
|
||||
),
|
||||
res.unwrap_err()
|
||||
);
|
||||
|
||||
let sql = "SELECT s, s IS NFKC FROM foo";
|
||||
let res = parse_sql_statements(sql);
|
||||
assert_eq!(
|
||||
ParserError::ParserError(
|
||||
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: FROM"
|
||||
.to_string()
|
||||
),
|
||||
res.unwrap_err()
|
||||
);
|
||||
|
||||
let sql = "SELECT s, s IS TRIM(' NFKC ') FROM foo";
|
||||
let res = parse_sql_statements(sql);
|
||||
assert_eq!(
|
||||
ParserError::ParserError(
|
||||
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: TRIM"
|
||||
.to_string()
|
||||
),
|
||||
res.unwrap_err()
|
||||
|
@ -13003,7 +13079,7 @@ fn test_trailing_commas_in_from() {
|
|||
let sql = "SELECT a FROM b, WHERE c = 1";
|
||||
let _ = dialects.parse_sql_statements(sql).unwrap();
|
||||
|
||||
// nasted
|
||||
// nested
|
||||
let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),";
|
||||
let _ = dialects.parse_sql_statements(sql).unwrap();
|
||||
|
||||
|
|
|
@ -2572,7 +2572,7 @@ fn parse_kill() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_table_colum_option_on_update() {
|
||||
fn parse_table_column_option_on_update() {
|
||||
let sql1 = "CREATE TABLE foo (`modification_time` DATETIME ON UPDATE CURRENT_TIMESTAMP())";
|
||||
match mysql().verified_stmt(sql1) {
|
||||
Statement::CreateTable(CreateTable { name, columns, .. }) => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue