From 6f090e5547b6bc7e75004bfbaafe3b7c660c112d Mon Sep 17 00:00:00 2001 From: Jonathan Lehto Date: Fri, 1 Mar 2024 13:55:50 -0500 Subject: [PATCH] adding delimited (#1155) --- src/ast/mod.rs | 50 ++++++++++++++++++++++- src/keywords.rs | 8 ++++ src/parser/mod.rs | 87 ++++++++++++++++++++++++++++++++++++++++- tests/sqlparser_hive.rs | 6 +++ 4 files changed, 148 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 922f022e..145f0448 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3214,7 +3214,12 @@ impl fmt::Display for Statement { Some(HiveRowFormat::SERDE { class }) => { write!(f, " ROW FORMAT SERDE '{class}'")? } - Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + Some(HiveRowFormat::DELIMITED { delimiters }) => { + write!(f, " ROW FORMAT DELIMITED")?; + if !delimiters.is_empty() { + write!(f, " {}", display_separated(delimiters, " "))?; + } + } None => (), } match storage { @@ -4872,7 +4877,48 @@ pub enum HiveDistributionStyle { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum HiveRowFormat { SERDE { class: String }, - DELIMITED, + DELIMITED { delimiters: Vec }, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct HiveRowDelimiter { + pub delimiter: HiveDelimiter, + pub char: Ident, +} + +impl fmt::Display for HiveRowDelimiter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ", self.delimiter)?; + write!(f, "{}", self.char) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum HiveDelimiter { + FieldsTerminatedBy, + FieldsEscapedBy, + CollectionItemsTerminatedBy, + MapKeysTerminatedBy, + LinesTerminatedBy, + NullDefinedAs, +} + +impl fmt::Display for HiveDelimiter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use HiveDelimiter::*; + f.write_str(match self { + FieldsTerminatedBy => "FIELDS TERMINATED BY", + FieldsEscapedBy => "ESCAPED BY", + CollectionItemsTerminatedBy => "COLLECTION ITEMS TERMINATED BY", + MapKeysTerminatedBy => "MAP KEYS TERMINATED BY", + LinesTerminatedBy => "LINES TERMINATED BY", + NullDefinedAs => "NULL DEFINED AS", + }) + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index 10a13627..dee5eb5c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -153,6 +153,7 @@ define_keywords!( COLLATE, COLLATION, COLLECT, + COLLECTION, COLUMN, COLUMNS, COMMENT, @@ -212,6 +213,7 @@ define_keywords!( DEFAULT, DEFERRABLE, DEFERRED, + DEFINED, DELAYED, DELETE, DELIMITED, @@ -258,6 +260,7 @@ define_keywords!( EQUALS, ERROR, ESCAPE, + ESCAPED, EVENT, EVERY, EXCEPT, @@ -368,6 +371,7 @@ define_keywords!( ISOLATION, ISOWEEK, ISOYEAR, + ITEMS, JAR, JOIN, JSON, @@ -376,6 +380,7 @@ define_keywords!( JSON_TABLE, JULIAN, KEY, + KEYS, KILL, LAG, LANGUAGE, @@ -390,6 +395,7 @@ define_keywords!( LIKE, LIKE_REGEX, LIMIT, + LINES, LISTAGG, LN, LOAD, @@ -405,6 +411,7 @@ define_keywords!( LOW_PRIORITY, MACRO, MANAGEDLOCATION, + MAP, MATCH, MATCHED, MATERIALIZED, @@ -653,6 +660,7 @@ define_keywords!( TBLPROPERTIES, TEMP, TEMPORARY, + TERMINATED, TEXT, TEXTFILE, THEN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 436782fd..579130ba 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4405,7 +4405,92 @@ impl<'a> Parser<'a> { let class = self.parse_literal_string()?; Ok(HiveRowFormat::SERDE { class }) } - _ => Ok(HiveRowFormat::DELIMITED), + _ => { + let mut row_delimiters = vec![]; + + loop { + match self.parse_one_of_keywords(&[ + Keyword::FIELDS, + Keyword::COLLECTION, + Keyword::MAP, + Keyword::LINES, + Keyword::NULL, + ]) { + Some(Keyword::FIELDS) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier(false)?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsEscapedBy, + char: self.parse_identifier(false)?, + }); + } + } else { + break; + } + } + Some(Keyword::COLLECTION) => { + if self.parse_keywords(&[ + Keyword::ITEMS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::MAP) => { + if self.parse_keywords(&[ + Keyword::KEYS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::LINES) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::NULL) => { + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + _ => { + break; + } + } + } + + Ok(HiveRowFormat::DELIMITED { + delimiters: row_delimiters, + }) + } } } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index acf6c582..f363f984 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -193,6 +193,12 @@ fn create_temp_table() { hive().one_statement_parses_to(query2, query); } +#[test] +fn create_delimited_table() { + let query = "CREATE TABLE tab (cola STRING, colb BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\"' MAP KEYS TERMINATED BY '\"'"; + hive().verified_stmt(query); +} + #[test] fn create_local_directory() { let query =