adding delimited (#1155)

This commit is contained in:
Jonathan Lehto 2024-03-01 13:55:50 -05:00 committed by GitHub
parent fb7d4d40cc
commit 6f090e5547
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 148 additions and 3 deletions

View file

@ -3214,7 +3214,12 @@ impl fmt::Display for Statement {
Some(HiveRowFormat::SERDE { class }) => {
write!(f, " ROW FORMAT SERDE '{class}'")?
}
Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?,
Some(HiveRowFormat::DELIMITED { delimiters }) => {
write!(f, " ROW FORMAT DELIMITED")?;
if !delimiters.is_empty() {
write!(f, " {}", display_separated(delimiters, " "))?;
}
}
None => (),
}
match storage {
@ -4872,7 +4877,48 @@ pub enum HiveDistributionStyle {
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum HiveRowFormat {
SERDE { class: String },
DELIMITED,
DELIMITED { delimiters: Vec<HiveRowDelimiter> },
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct HiveRowDelimiter {
pub delimiter: HiveDelimiter,
pub char: Ident,
}
impl fmt::Display for HiveRowDelimiter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} ", self.delimiter)?;
write!(f, "{}", self.char)
}
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum HiveDelimiter {
FieldsTerminatedBy,
FieldsEscapedBy,
CollectionItemsTerminatedBy,
MapKeysTerminatedBy,
LinesTerminatedBy,
NullDefinedAs,
}
impl fmt::Display for HiveDelimiter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use HiveDelimiter::*;
f.write_str(match self {
FieldsTerminatedBy => "FIELDS TERMINATED BY",
FieldsEscapedBy => "ESCAPED BY",
CollectionItemsTerminatedBy => "COLLECTION ITEMS TERMINATED BY",
MapKeysTerminatedBy => "MAP KEYS TERMINATED BY",
LinesTerminatedBy => "LINES TERMINATED BY",
NullDefinedAs => "NULL DEFINED AS",
})
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

View file

@ -153,6 +153,7 @@ define_keywords!(
COLLATE,
COLLATION,
COLLECT,
COLLECTION,
COLUMN,
COLUMNS,
COMMENT,
@ -212,6 +213,7 @@ define_keywords!(
DEFAULT,
DEFERRABLE,
DEFERRED,
DEFINED,
DELAYED,
DELETE,
DELIMITED,
@ -258,6 +260,7 @@ define_keywords!(
EQUALS,
ERROR,
ESCAPE,
ESCAPED,
EVENT,
EVERY,
EXCEPT,
@ -368,6 +371,7 @@ define_keywords!(
ISOLATION,
ISOWEEK,
ISOYEAR,
ITEMS,
JAR,
JOIN,
JSON,
@ -376,6 +380,7 @@ define_keywords!(
JSON_TABLE,
JULIAN,
KEY,
KEYS,
KILL,
LAG,
LANGUAGE,
@ -390,6 +395,7 @@ define_keywords!(
LIKE,
LIKE_REGEX,
LIMIT,
LINES,
LISTAGG,
LN,
LOAD,
@ -405,6 +411,7 @@ define_keywords!(
LOW_PRIORITY,
MACRO,
MANAGEDLOCATION,
MAP,
MATCH,
MATCHED,
MATERIALIZED,
@ -653,6 +660,7 @@ define_keywords!(
TBLPROPERTIES,
TEMP,
TEMPORARY,
TERMINATED,
TEXT,
TEXTFILE,
THEN,

View file

@ -4405,7 +4405,92 @@ impl<'a> Parser<'a> {
let class = self.parse_literal_string()?;
Ok(HiveRowFormat::SERDE { class })
}
_ => Ok(HiveRowFormat::DELIMITED),
_ => {
let mut row_delimiters = vec![];
loop {
match self.parse_one_of_keywords(&[
Keyword::FIELDS,
Keyword::COLLECTION,
Keyword::MAP,
Keyword::LINES,
Keyword::NULL,
]) {
Some(Keyword::FIELDS) => {
if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::FieldsTerminatedBy,
char: self.parse_identifier(false)?,
});
if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::FieldsEscapedBy,
char: self.parse_identifier(false)?,
});
}
} else {
break;
}
}
Some(Keyword::COLLECTION) => {
if self.parse_keywords(&[
Keyword::ITEMS,
Keyword::TERMINATED,
Keyword::BY,
]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::CollectionItemsTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::MAP) => {
if self.parse_keywords(&[
Keyword::KEYS,
Keyword::TERMINATED,
Keyword::BY,
]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::MapKeysTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::LINES) => {
if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::LinesTerminatedBy,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
Some(Keyword::NULL) => {
if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) {
row_delimiters.push(HiveRowDelimiter {
delimiter: HiveDelimiter::NullDefinedAs,
char: self.parse_identifier(false)?,
});
} else {
break;
}
}
_ => {
break;
}
}
}
Ok(HiveRowFormat::DELIMITED {
delimiters: row_delimiters,
})
}
}
}

View file

@ -193,6 +193,12 @@ fn create_temp_table() {
hive().one_statement_parses_to(query2, query);
}
#[test]
fn create_delimited_table() {
let query = "CREATE TABLE tab (cola STRING, colb BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\"' MAP KEYS TERMINATED BY '\"'";
hive().verified_stmt(query);
}
#[test]
fn create_local_directory() {
let query =