adding delimited (#1155)

This commit is contained in:
Jonathan Lehto 2024-03-01 13:55:50 -05:00 committed by GitHub
parent fb7d4d40cc
commit 6f090e5547
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 148 additions and 3 deletions

View file

@ -3214,7 +3214,12 @@ impl fmt::Display for Statement {
Some(HiveRowFormat::SERDE { class }) => { Some(HiveRowFormat::SERDE { class }) => {
write!(f, " ROW FORMAT SERDE '{class}'")? write!(f, " ROW FORMAT SERDE '{class}'")?
} }
Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, Some(HiveRowFormat::DELIMITED { delimiters }) => {
write!(f, " ROW FORMAT DELIMITED")?;
if !delimiters.is_empty() {
write!(f, " {}", display_separated(delimiters, " "))?;
}
}
None => (), None => (),
} }
match storage { match storage {
@ -4872,7 +4877,48 @@ pub enum HiveDistributionStyle {
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum HiveRowFormat { pub enum HiveRowFormat {
SERDE { class: String }, SERDE { class: String },
DELIMITED, DELIMITED { delimiters: Vec<HiveRowDelimiter> },
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct HiveRowDelimiter {
pub delimiter: HiveDelimiter,
pub char: Ident,
}
impl fmt::Display for HiveRowDelimiter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} ", self.delimiter)?;
write!(f, "{}", self.char)
}
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum HiveDelimiter {
FieldsTerminatedBy,
FieldsEscapedBy,
CollectionItemsTerminatedBy,
MapKeysTerminatedBy,
LinesTerminatedBy,
NullDefinedAs,
}
impl fmt::Display for HiveDelimiter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use HiveDelimiter::*;
f.write_str(match self {
FieldsTerminatedBy => "FIELDS TERMINATED BY",
FieldsEscapedBy => "ESCAPED BY",
CollectionItemsTerminatedBy => "COLLECTION ITEMS TERMINATED BY",
MapKeysTerminatedBy => "MAP KEYS TERMINATED BY",
LinesTerminatedBy => "LINES TERMINATED BY",
NullDefinedAs => "NULL DEFINED AS",
})
}
} }
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

View file

@ -153,6 +153,7 @@ define_keywords!(
COLLATE, COLLATE,
COLLATION, COLLATION,
COLLECT, COLLECT,
COLLECTION,
COLUMN, COLUMN,
COLUMNS, COLUMNS,
COMMENT, COMMENT,
@ -212,6 +213,7 @@ define_keywords!(
DEFAULT, DEFAULT,
DEFERRABLE, DEFERRABLE,
DEFERRED, DEFERRED,
DEFINED,
DELAYED, DELAYED,
DELETE, DELETE,
DELIMITED, DELIMITED,
@ -258,6 +260,7 @@ define_keywords!(
EQUALS, EQUALS,
ERROR, ERROR,
ESCAPE, ESCAPE,
ESCAPED,
EVENT, EVENT,
EVERY, EVERY,
EXCEPT, EXCEPT,
@ -368,6 +371,7 @@ define_keywords!(
ISOLATION, ISOLATION,
ISOWEEK, ISOWEEK,
ISOYEAR, ISOYEAR,
ITEMS,
JAR, JAR,
JOIN, JOIN,
JSON, JSON,
@ -376,6 +380,7 @@ define_keywords!(
JSON_TABLE, JSON_TABLE,
JULIAN, JULIAN,
KEY, KEY,
KEYS,
KILL, KILL,
LAG, LAG,
LANGUAGE, LANGUAGE,
@ -390,6 +395,7 @@ define_keywords!(
LIKE, LIKE,
LIKE_REGEX, LIKE_REGEX,
LIMIT, LIMIT,
LINES,
LISTAGG, LISTAGG,
LN, LN,
LOAD, LOAD,
@ -405,6 +411,7 @@ define_keywords!(
LOW_PRIORITY, LOW_PRIORITY,
MACRO, MACRO,
MANAGEDLOCATION, MANAGEDLOCATION,
MAP,
MATCH, MATCH,
MATCHED, MATCHED,
MATERIALIZED, MATERIALIZED,
@ -653,6 +660,7 @@ define_keywords!(
TBLPROPERTIES, TBLPROPERTIES,
TEMP, TEMP,
TEMPORARY, TEMPORARY,
TERMINATED,
TEXT, TEXT,
TEXTFILE, TEXTFILE,
THEN, THEN,

View file

@ -4405,7 +4405,92 @@ impl<'a> Parser<'a> {
let class = self.parse_literal_string()?; let class = self.parse_literal_string()?;
Ok(HiveRowFormat::SERDE { class }) Ok(HiveRowFormat::SERDE { class })
} }
_ => Ok(HiveRowFormat::DELIMITED), _ => {
let mut row_delimiters = vec![];
loop {
match self.parse_one_of_keywords(&[
Keyword::FIELDS,
Keyword::COLLECTION,
Keyword::MAP,
Keyword::LINES,
Keyword::NULL,
]) {
Some(Keyword::FIELDS) => {
if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::FieldsTerminatedBy,
char: self.parse_identifier(false)?,
});
if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::FieldsEscapedBy,
char: self.parse_identifier(false)?,
});
}
} else {
break;
}
}
Some(Keyword::COLLECTION) => {
if self.parse_keywords(&[
Keyword::ITEMS,
Keyword::TERMINATED,
Keyword::BY,
]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::CollectionItemsTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::MAP) => {
if self.parse_keywords(&[
Keyword::KEYS,
Keyword::TERMINATED,
Keyword::BY,
]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::MapKeysTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::LINES) => {
if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::LinesTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::NULL) => {
if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::NullDefinedAs,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
_ => {
break;
}
}
}
Ok(HiveRowFormat::DELIMITED {
delimiters: row_delimiters,
})
}
} }
} }

View file

@ -193,6 +193,12 @@ fn create_temp_table() {
hive().one_statement_parses_to(query2, query); hive().one_statement_parses_to(query2, query);
} }
#[test]
fn create_delimited_table() {
let query = "CREATE TABLE tab (cola STRING, colb BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\"' MAP KEYS TERMINATED BY '\"'";
hive().verified_stmt(query);
}
#[test] #[test]
fn create_local_directory() { fn create_local_directory() {
let query = let query =