From 5d5c90c77fd5fd0e25250c043ef91632cba4ca8d Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Thu, 21 Aug 2025 19:45:32 +0300 Subject: [PATCH] Redshift: Add more copy options (#2008) --- src/ast/mod.rs | 54 ++++++++++++++++++++++++++++----- src/keywords.rs | 7 +++++ src/parser/mod.rs | 63 ++++++++++++++++++++++++++++++++------- tests/sqlparser_common.rs | 32 ++++++++++++++++++++ 4 files changed, 139 insertions(+), 17 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cb6058a0..cd937857 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -8774,32 +8774,54 @@ impl fmt::Display for CopyOption { /// An option in `COPY` statement before PostgreSQL version 9.0. /// -/// +/// [PostgreSQL](https://www.postgresql.org/docs/8.4/sql-copy.html) +/// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_COPY-alphabetical-parm-list.html) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CopyLegacyOption { + /// ACCEPTANYDATE + AcceptAnyDate, + /// ACCEPTINVCHARS + AcceptInvChars(Option), /// BINARY Binary, - /// DELIMITER \[ AS \] 'delimiter_character' - Delimiter(char), - /// NULL \[ AS \] 'null_string' - Null(String), + /// BLANKSASNULL + BlankAsNull, /// CSV ... Csv(Vec), + /// DATEFORMAT \[ AS \] {'dateformat_string' | 'auto' } + DateFormat(Option), + /// DELIMITER \[ AS \] 'delimiter_character' + Delimiter(char), + /// EMPTYASNULL + EmptyAsNull, /// IAM_ROLE { DEFAULT | 'arn:aws:iam::123456789:role/role1' } IamRole(IamRoleKind), /// IGNOREHEADER \[ AS \] number_rows IgnoreHeader(u64), + /// NULL \[ AS \] 'null_string' + Null(String), + /// TIMEFORMAT \[ AS \] {'timeformat_string' | 'auto' | 'epochsecs' | 'epochmillisecs' } + TimeFormat(Option), + /// TRUNCATECOLUMNS + TruncateColumns, } impl fmt::Display for CopyLegacyOption { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use CopyLegacyOption::*; match self { + AcceptAnyDate => write!(f, "ACCEPTANYDATE"), + AcceptInvChars(ch) => { + write!(f, "ACCEPTINVCHARS")?; + if let Some(ch) = ch { + write!(f, " '{}'", value::escape_single_quote_string(ch))?; + } + Ok(()) + } Binary => write!(f, "BINARY"), - Delimiter(char) => write!(f, "DELIMITER '{char}'"), - Null(string) => write!(f, "NULL '{}'", value::escape_single_quote_string(string)), + BlankAsNull => write!(f, "BLANKSASNULL"), Csv(opts) => { write!(f, "CSV")?; if !opts.is_empty() { @@ -8807,8 +8829,26 @@ impl fmt::Display for CopyLegacyOption { } Ok(()) } + DateFormat(fmt) => { + write!(f, "DATEFORMAT")?; + if let Some(fmt) = fmt { + write!(f, " '{}'", value::escape_single_quote_string(fmt))?; + } + Ok(()) + } + Delimiter(char) => write!(f, "DELIMITER '{char}'"), + EmptyAsNull => write!(f, "EMPTYASNULL"), IamRole(role) => write!(f, "IAM_ROLE {role}"), IgnoreHeader(num_rows) => write!(f, "IGNOREHEADER {num_rows}"), + Null(string) => write!(f, "NULL '{}'", value::escape_single_quote_string(string)), + TimeFormat(fmt) => { + write!(f, "TIMEFORMAT")?; + if let Some(fmt) = fmt { + write!(f, " '{}'", value::escape_single_quote_string(fmt))?; + } + Ok(()) + } + TruncateColumns => write!(f, "TRUNCATECOLUMNS"), } } } diff --git a/src/keywords.rs b/src/keywords.rs index 988f375c..d78a6c17 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -76,6 +76,8 @@ define_keywords!( ABS, ABSENT, ABSOLUTE, + ACCEPTANYDATE, + ACCEPTINVCHARS, ACCESS, ACCOUNT, ACTION, @@ -138,6 +140,7 @@ define_keywords!( BIND, BINDING, BIT, + BLANKSASNULL, BLOB, BLOCK, BLOOM, @@ -255,6 +258,7 @@ define_keywords!( DATA_RETENTION_TIME_IN_DAYS, DATE, DATE32, + DATEFORMAT, DATETIME, DATETIME64, DAY, @@ -314,6 +318,7 @@ define_keywords!( ELSE, ELSEIF, EMPTY, + EMPTYASNULL, ENABLE, ENABLE_SCHEMA_EVOLUTION, ENCODING, @@ -933,6 +938,7 @@ define_keywords!( THEN, TIES, TIME, + TIMEFORMAT, TIMESTAMP, TIMESTAMPTZ, TIMESTAMP_NTZ, @@ -961,6 +967,7 @@ define_keywords!( TRIM_ARRAY, TRUE, TRUNCATE, + TRUNCATECOLUMNS, TRY, TRY_CAST, TRY_CONVERT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6179b834..c4c72e9c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9602,23 +9602,38 @@ impl<'a> Parser<'a> { } fn parse_copy_legacy_option(&mut self) -> Result { + // FORMAT \[ AS \] is optional + if self.parse_keyword(Keyword::FORMAT) { + let _ = self.parse_keyword(Keyword::AS); + } + let ret = match self.parse_one_of_keywords(&[ + Keyword::ACCEPTANYDATE, + Keyword::ACCEPTINVCHARS, Keyword::BINARY, - Keyword::DELIMITER, - Keyword::NULL, + Keyword::BLANKSASNULL, Keyword::CSV, + Keyword::DATEFORMAT, + Keyword::DELIMITER, + Keyword::EMPTYASNULL, Keyword::IAM_ROLE, Keyword::IGNOREHEADER, + Keyword::NULL, + Keyword::TIMEFORMAT, + Keyword::TRUNCATECOLUMNS, ]) { + Some(Keyword::ACCEPTANYDATE) => CopyLegacyOption::AcceptAnyDate, + Some(Keyword::ACCEPTINVCHARS) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + let ch = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + Some(self.parse_literal_string()?) + } else { + None + }; + CopyLegacyOption::AcceptInvChars(ch) + } Some(Keyword::BINARY) => CopyLegacyOption::Binary, - Some(Keyword::DELIMITER) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Delimiter(self.parse_literal_char()?) - } - Some(Keyword::NULL) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Null(self.parse_literal_string()?) - } + Some(Keyword::BLANKSASNULL) => CopyLegacyOption::BlankAsNull, Some(Keyword::CSV) => CopyLegacyOption::Csv({ let mut opts = vec![]; while let Some(opt) = @@ -9628,12 +9643,40 @@ impl<'a> Parser<'a> { } opts }), + Some(Keyword::DATEFORMAT) => { + let _ = self.parse_keyword(Keyword::AS); + let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + Some(self.parse_literal_string()?) + } else { + None + }; + CopyLegacyOption::DateFormat(fmt) + } + Some(Keyword::DELIMITER) => { + let _ = self.parse_keyword(Keyword::AS); + CopyLegacyOption::Delimiter(self.parse_literal_char()?) + } + Some(Keyword::EMPTYASNULL) => CopyLegacyOption::EmptyAsNull, Some(Keyword::IAM_ROLE) => CopyLegacyOption::IamRole(self.parse_iam_role_kind()?), Some(Keyword::IGNOREHEADER) => { let _ = self.parse_keyword(Keyword::AS); let num_rows = self.parse_literal_uint()?; CopyLegacyOption::IgnoreHeader(num_rows) } + Some(Keyword::NULL) => { + let _ = self.parse_keyword(Keyword::AS); + CopyLegacyOption::Null(self.parse_literal_string()?) + } + Some(Keyword::TIMEFORMAT) => { + let _ = self.parse_keyword(Keyword::AS); + let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + Some(self.parse_literal_string()?) + } else { + None + }; + CopyLegacyOption::TimeFormat(fmt) + } + Some(Keyword::TRUNCATECOLUMNS) => CopyLegacyOption::TruncateColumns, _ => self.expected("option", self.peek_token())?, }; Ok(ret) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f7a0b1d1..54ad1732 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16840,6 +16840,38 @@ fn parse_copy_options() { } _ => unreachable!(), } + one_statement_parses_to( + concat!( + "COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' ", + "ACCEPTANYDATE ", + "ACCEPTINVCHARS AS '*' ", + "BLANKSASNULL ", + "CSV ", + "DATEFORMAT AS 'DD-MM-YYYY' ", + "EMPTYASNULL ", + "IAM_ROLE DEFAULT ", + "IGNOREHEADER AS 1 ", + "TIMEFORMAT AS 'auto' ", + "TRUNCATECOLUMNS", + ), + concat!( + "COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' ", + "ACCEPTANYDATE ", + "ACCEPTINVCHARS '*' ", + "BLANKSASNULL ", + "CSV ", + "DATEFORMAT 'DD-MM-YYYY' ", + "EMPTYASNULL ", + "IAM_ROLE DEFAULT ", + "IGNOREHEADER 1 ", + "TIMEFORMAT 'auto' ", + "TRUNCATECOLUMNS", + ), + ); + one_statement_parses_to( + "COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' FORMAT AS CSV", + "COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' CSV", + ); } #[test]