Parse Snowflake COPY INTO <location> (#1669)

This commit is contained in:
Yoav Cohen 2025-02-05 20:23:27 +01:00 committed by GitHub
parent 486b29ffab
commit 751dc5afce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 381 additions and 185 deletions

View file

@ -58,6 +58,7 @@ pub enum DataLoadingOptionType {
STRING, STRING,
BOOLEAN, BOOLEAN,
ENUM, ENUM,
NUMBER,
} }
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -128,12 +129,9 @@ impl fmt::Display for DataLoadingOption {
DataLoadingOptionType::STRING => { DataLoadingOptionType::STRING => {
write!(f, "{}='{}'", self.option_name, self.value)?; write!(f, "{}='{}'", self.option_name, self.value)?;
} }
DataLoadingOptionType::ENUM => { DataLoadingOptionType::ENUM
// single quote is omitted | DataLoadingOptionType::BOOLEAN
write!(f, "{}={}", self.option_name, self.value)?; | DataLoadingOptionType::NUMBER => {
}
DataLoadingOptionType::BOOLEAN => {
// single quote is omitted
write!(f, "{}={}", self.option_name, self.value)?; write!(f, "{}={}", self.option_name, self.value)?;
} }
} }

View file

@ -2498,24 +2498,30 @@ pub enum Statement {
values: Vec<Option<String>>, values: Vec<Option<String>>,
}, },
/// ```sql /// ```sql
/// COPY INTO /// COPY INTO <table> | <location>
/// ``` /// ```
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table> /// See:
/// <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
/// <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
///
/// Copy Into syntax available for Snowflake is different than the one implemented in /// Copy Into syntax available for Snowflake is different than the one implemented in
/// Postgres. Although they share common prefix, it is reasonable to implement them /// Postgres. Although they share common prefix, it is reasonable to implement them
/// in different enums. This can be refactored later once custom dialects /// in different enums. This can be refactored later once custom dialects
/// are allowed to have custom Statements. /// are allowed to have custom Statements.
CopyIntoSnowflake { CopyIntoSnowflake {
kind: CopyIntoSnowflakeKind,
into: ObjectName, into: ObjectName,
from_stage: ObjectName, from_obj: Option<ObjectName>,
from_stage_alias: Option<Ident>, from_obj_alias: Option<Ident>,
stage_params: StageParamsObject, stage_params: StageParamsObject,
from_transformations: Option<Vec<StageLoadSelectItem>>, from_transformations: Option<Vec<StageLoadSelectItem>>,
from_query: Option<Box<Query>>,
files: Option<Vec<String>>, files: Option<Vec<String>>,
pattern: Option<String>, pattern: Option<String>,
file_format: DataLoadingOptions, file_format: DataLoadingOptions,
copy_options: DataLoadingOptions, copy_options: DataLoadingOptions,
validation_mode: Option<String>, validation_mode: Option<String>,
partition: Option<Box<Expr>>,
}, },
/// ```sql /// ```sql
/// CLOSE /// CLOSE
@ -5048,60 +5054,69 @@ impl fmt::Display for Statement {
Ok(()) Ok(())
} }
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
kind,
into, into,
from_stage, from_obj,
from_stage_alias, from_obj_alias,
stage_params, stage_params,
from_transformations, from_transformations,
from_query,
files, files,
pattern, pattern,
file_format, file_format,
copy_options, copy_options,
validation_mode, validation_mode,
partition,
} => { } => {
write!(f, "COPY INTO {}", into)?; write!(f, "COPY INTO {}", into)?;
if from_transformations.is_none() { if let Some(from_transformations) = from_transformations {
// Standard data load
write!(f, " FROM {}{}", from_stage, stage_params)?;
if from_stage_alias.as_ref().is_some() {
write!(f, " AS {}", from_stage_alias.as_ref().unwrap())?;
}
} else {
// Data load with transformation // Data load with transformation
if let Some(from_stage) = from_obj {
write!( write!(
f, f,
" FROM (SELECT {} FROM {}{}", " FROM (SELECT {} FROM {}{}",
display_separated(from_transformations.as_ref().unwrap(), ", "), display_separated(from_transformations, ", "),
from_stage, from_stage,
stage_params, stage_params
)?; )?;
if from_stage_alias.as_ref().is_some() { }
write!(f, " AS {}", from_stage_alias.as_ref().unwrap())?; if let Some(from_obj_alias) = from_obj_alias {
write!(f, " AS {}", from_obj_alias)?;
} }
write!(f, ")")?; write!(f, ")")?;
} else if let Some(from_obj) = from_obj {
// Standard data load
write!(f, " FROM {}{}", from_obj, stage_params)?;
if let Some(from_obj_alias) = from_obj_alias {
write!(f, " AS {from_obj_alias}")?;
} }
if files.is_some() { } else if let Some(from_query) = from_query {
write!( // Data unload from query
f, write!(f, " FROM ({from_query})")?;
" FILES = ('{}')",
display_separated(files.as_ref().unwrap(), "', '")
)?;
} }
if pattern.is_some() {
write!(f, " PATTERN = '{}'", pattern.as_ref().unwrap())?; if let Some(files) = files {
write!(f, " FILES = ('{}')", display_separated(files, "', '"))?;
}
if let Some(pattern) = pattern {
write!(f, " PATTERN = '{}'", pattern)?;
}
if let Some(partition) = partition {
write!(f, " PARTITION BY {partition}")?;
} }
if !file_format.options.is_empty() { if !file_format.options.is_empty() {
write!(f, " FILE_FORMAT=({})", file_format)?; write!(f, " FILE_FORMAT=({})", file_format)?;
} }
if !copy_options.options.is_empty() { if !copy_options.options.is_empty() {
write!(f, " COPY_OPTIONS=({})", copy_options)?; match kind {
CopyIntoSnowflakeKind::Table => {
write!(f, " COPY_OPTIONS=({})", copy_options)?
} }
if validation_mode.is_some() { CopyIntoSnowflakeKind::Location => write!(f, " {copy_options}")?,
write!( }
f, }
" VALIDATION_MODE = {}", if let Some(validation_mode) = validation_mode {
validation_mode.as_ref().unwrap() write!(f, " VALIDATION_MODE = {}", validation_mode)?;
)?;
} }
Ok(()) Ok(())
} }
@ -8543,6 +8558,19 @@ impl Display for StorageSerializationPolicy {
} }
} }
/// Variants of the Snowflake `COPY INTO` statement
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum CopyIntoSnowflakeKind {
/// Loads data from files to a table
/// See: <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
Table,
/// Unloads data from a table or query to external files
/// See: <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
Location,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -333,8 +333,8 @@ impl Spanned for Statement {
} => source.span(), } => source.span(),
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
into: _, into: _,
from_stage: _, from_obj: _,
from_stage_alias: _, from_obj_alias: _,
stage_params: _, stage_params: _,
from_transformations: _, from_transformations: _,
files: _, files: _,
@ -342,6 +342,9 @@ impl Spanned for Statement {
file_format: _, file_format: _,
copy_options: _, copy_options: _,
validation_mode: _, validation_mode: _,
kind: _,
from_query: _,
partition: _,
} => Span::empty(), } => Span::empty(),
Statement::Close { cursor } => match cursor { Statement::Close { cursor } => match cursor {
CloseCursor::All => Span::empty(), CloseCursor::All => Span::empty(),

View file

@ -23,23 +23,26 @@ use crate::ast::helpers::stmt_data_loading::{
StageLoadSelectItem, StageParamsObject, StageLoadSelectItem, StageParamsObject,
}; };
use crate::ast::{ use crate::ast::{
ColumnOption, ColumnPolicy, ColumnPolicyProperty, Ident, IdentityParameters, IdentityProperty, ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident,
IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, ObjectName, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind,
RowAccessPolicy, Statement, TagsColumnOption, WrappedCollection, IdentityPropertyOrder, ObjectName, RowAccessPolicy, Statement, TagsColumnOption,
WrappedCollection,
}; };
use crate::dialect::{Dialect, Precedence}; use crate::dialect::{Dialect, Precedence};
use crate::keywords::Keyword; use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError}; use crate::parser::{Parser, ParserError};
use crate::tokenizer::Token; use crate::tokenizer::{Token, Word};
#[cfg(not(feature = "std"))]
use alloc::boxed::Box;
#[cfg(not(feature = "std"))] #[cfg(not(feature = "std"))]
use alloc::string::String; use alloc::string::String;
#[cfg(not(feature = "std"))] #[cfg(not(feature = "std"))]
use alloc::vec::Vec; use alloc::vec::Vec;
#[cfg(not(feature = "std"))] #[cfg(not(feature = "std"))]
use alloc::{format, vec}; use alloc::{format, vec};
use sqlparser::ast::StorageSerializationPolicy;
use super::keywords::RESERVED_FOR_IDENTIFIER; use super::keywords::RESERVED_FOR_IDENTIFIER;
use sqlparser::ast::StorageSerializationPolicy;
/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/)
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -665,24 +668,49 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, Par
} }
} }
/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
/// and `COPY INTO <location>` which have different syntax.
pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> { pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
let into: ObjectName = parse_snowflake_stage_name(parser)?; let kind = match parser.peek_token().token {
// Indicates an internal stage
Token::AtSign => CopyIntoSnowflakeKind::Location,
// Indicates an external stage, i.e. s3://, gcs:// or azure://
Token::SingleQuotedString(s) if s.contains("://") => CopyIntoSnowflakeKind::Location,
_ => CopyIntoSnowflakeKind::Table,
};
let mut files: Vec<String> = vec![]; let mut files: Vec<String> = vec![];
let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None; let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
let from_stage_alias; let mut from_stage_alias = None;
let from_stage: ObjectName; let mut from_stage = None;
let stage_params: StageParamsObject; let mut stage_params = StageParamsObject {
url: None,
encryption: DataLoadingOptions { options: vec![] },
endpoint: None,
storage_integration: None,
credentials: DataLoadingOptions { options: vec![] },
};
let mut from_query = None;
let mut partition = None;
let mut file_format = Vec::new();
let mut pattern = None;
let mut validation_mode = None;
let mut copy_options = Vec::new();
let into: ObjectName = parse_snowflake_stage_name(parser)?;
if kind == CopyIntoSnowflakeKind::Location {
stage_params = parse_stage_params(parser)?;
}
parser.expect_keyword_is(Keyword::FROM)?; parser.expect_keyword_is(Keyword::FROM)?;
// check if data load transformations are present
match parser.next_token().token { match parser.next_token().token {
Token::LParen => { Token::LParen if kind == CopyIntoSnowflakeKind::Table => {
// data load with transformations // Data load with transformations
parser.expect_keyword_is(Keyword::SELECT)?; parser.expect_keyword_is(Keyword::SELECT)?;
from_transformations = parse_select_items_for_data_load(parser)?; from_transformations = parse_select_items_for_data_load(parser)?;
parser.expect_keyword_is(Keyword::FROM)?; parser.expect_keyword_is(Keyword::FROM)?;
from_stage = parse_snowflake_stage_name(parser)?; from_stage = Some(parse_snowflake_stage_name(parser)?);
stage_params = parse_stage_params(parser)?; stage_params = parse_stage_params(parser)?;
// as // as
@ -696,9 +724,14 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
}; };
parser.expect_token(&Token::RParen)?; parser.expect_token(&Token::RParen)?;
} }
Token::LParen if kind == CopyIntoSnowflakeKind::Location => {
// Data unload with a query
from_query = Some(parser.parse_query()?);
parser.expect_token(&Token::RParen)?;
}
_ => { _ => {
parser.prev_token(); parser.prev_token();
from_stage = parse_snowflake_stage_name(parser)?; from_stage = Some(parse_snowflake_stage_name(parser)?);
stage_params = parse_stage_params(parser)?; stage_params = parse_stage_params(parser)?;
// as // as
@ -711,10 +744,18 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
None None
}; };
} }
}; }
// [ files ] loop {
if parser.parse_keyword(Keyword::FILES) { // FILE_FORMAT
if parser.parse_keyword(Keyword::FILE_FORMAT) {
parser.expect_token(&Token::Eq)?;
file_format = parse_parentheses_options(parser)?;
// PARTITION BY
} else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
partition = Some(Box::new(parser.parse_expr()?))
// FILES
} else if parser.parse_keyword(Keyword::FILES) {
parser.expect_token(&Token::Eq)?; parser.expect_token(&Token::Eq)?;
parser.expect_token(&Token::LParen)?; parser.expect_token(&Token::LParen)?;
let mut continue_loop = true; let mut continue_loop = true;
@ -732,46 +773,42 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
} }
} }
parser.expect_token(&Token::RParen)?; parser.expect_token(&Token::RParen)?;
} // PATTERN
} else if parser.parse_keyword(Keyword::PATTERN) {
// [ pattern ]
let mut pattern = None;
if parser.parse_keyword(Keyword::PATTERN) {
parser.expect_token(&Token::Eq)?; parser.expect_token(&Token::Eq)?;
let next_token = parser.next_token(); let next_token = parser.next_token();
pattern = Some(match next_token.token { pattern = Some(match next_token.token {
Token::SingleQuotedString(s) => s, Token::SingleQuotedString(s) => s,
_ => parser.expected("pattern", next_token)?, _ => parser.expected("pattern", next_token)?,
}); });
} // VALIDATION MODE
} else if parser.parse_keyword(Keyword::VALIDATION_MODE) {
// [ file_format]
let mut file_format = Vec::new();
if parser.parse_keyword(Keyword::FILE_FORMAT) {
parser.expect_token(&Token::Eq)?;
file_format = parse_parentheses_options(parser)?;
}
// [ copy_options ]
let mut copy_options = Vec::new();
if parser.parse_keyword(Keyword::COPY_OPTIONS) {
parser.expect_token(&Token::Eq)?;
copy_options = parse_parentheses_options(parser)?;
}
// [ VALIDATION_MODE ]
let mut validation_mode = None;
if parser.parse_keyword(Keyword::VALIDATION_MODE) {
parser.expect_token(&Token::Eq)?; parser.expect_token(&Token::Eq)?;
validation_mode = Some(parser.next_token().token.to_string()); validation_mode = Some(parser.next_token().token.to_string());
// COPY OPTIONS
} else if parser.parse_keyword(Keyword::COPY_OPTIONS) {
parser.expect_token(&Token::Eq)?;
copy_options = parse_parentheses_options(parser)?;
} else {
match parser.next_token().token {
Token::SemiColon | Token::EOF => break,
Token::Comma => continue,
// In `COPY INTO <location>` the copy options do not have a shared key
// like in `COPY INTO <table>`
Token::Word(key) => copy_options.push(parse_copy_option(parser, key)?),
_ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()),
}
}
} }
Ok(Statement::CopyIntoSnowflake { Ok(Statement::CopyIntoSnowflake {
kind,
into, into,
from_stage, from_obj: from_stage,
from_stage_alias, from_obj_alias: from_stage_alias,
stage_params, stage_params,
from_transformations, from_transformations,
from_query,
files: if files.is_empty() { None } else { Some(files) }, files: if files.is_empty() { None } else { Some(files) },
pattern, pattern,
file_format: DataLoadingOptions { file_format: DataLoadingOptions {
@ -781,6 +818,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
options: copy_options, options: copy_options,
}, },
validation_mode, validation_mode,
partition,
}) })
} }
@ -930,53 +968,53 @@ fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserEr
/// ///
fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> { fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
let mut options: Vec<DataLoadingOption> = Vec::new(); let mut options: Vec<DataLoadingOption> = Vec::new();
parser.expect_token(&Token::LParen)?; parser.expect_token(&Token::LParen)?;
loop { loop {
match parser.next_token().token { match parser.next_token().token {
Token::RParen => break, Token::RParen => break,
Token::Word(key) => { Token::Comma => continue,
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
_ => return parser.expected("another option or ')'", parser.peek_token()),
};
}
Ok(options)
}
/// Parses a `KEY = VALUE` construct based on the specified key
fn parse_copy_option(parser: &mut Parser, key: Word) -> Result<DataLoadingOption, ParserError> {
parser.expect_token(&Token::Eq)?; parser.expect_token(&Token::Eq)?;
if parser.parse_keyword(Keyword::TRUE) { if parser.parse_keyword(Keyword::TRUE) {
options.push(DataLoadingOption { Ok(DataLoadingOption {
option_name: key.value, option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN, option_type: DataLoadingOptionType::BOOLEAN,
value: "TRUE".to_string(), value: "TRUE".to_string(),
}); })
Ok(())
} else if parser.parse_keyword(Keyword::FALSE) { } else if parser.parse_keyword(Keyword::FALSE) {
options.push(DataLoadingOption { Ok(DataLoadingOption {
option_name: key.value, option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN, option_type: DataLoadingOptionType::BOOLEAN,
value: "FALSE".to_string(), value: "FALSE".to_string(),
}); })
Ok(())
} else { } else {
match parser.next_token().token { match parser.next_token().token {
Token::SingleQuotedString(value) => { Token::SingleQuotedString(value) => Ok(DataLoadingOption {
options.push(DataLoadingOption {
option_name: key.value, option_name: key.value,
option_type: DataLoadingOptionType::STRING, option_type: DataLoadingOptionType::STRING,
value, value,
}); }),
Ok(()) Token::Word(word) => Ok(DataLoadingOption {
}
Token::Word(word) => {
options.push(DataLoadingOption {
option_name: key.value, option_name: key.value,
option_type: DataLoadingOptionType::ENUM, option_type: DataLoadingOptionType::ENUM,
value: word.value, value: word.value,
}); }),
Ok(()) Token::Number(n, _) => Ok(DataLoadingOption {
} option_name: key.value,
option_type: DataLoadingOptionType::NUMBER,
value: n,
}),
_ => parser.expected("expected option value", parser.peek_token()), _ => parser.expected("expected option value", parser.peek_token()),
} }
} }
}
_ => parser.expected("another option or ')'", parser.peek_token()),
}?;
}
Ok(options)
} }
/// Parsing a property of identity or autoincrement column option /// Parsing a property of identity or autoincrement column option

View file

@ -2028,20 +2028,25 @@ fn test_copy_into() {
); );
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
kind,
into, into,
from_stage, from_obj,
files, files,
pattern, pattern,
validation_mode, validation_mode,
.. ..
} => { } => {
assert_eq!(kind, CopyIntoSnowflakeKind::Table);
assert_eq!( assert_eq!(
into, into,
ObjectName::from(vec![Ident::new("my_company"), Ident::new("emp_basic")]) ObjectName::from(vec![Ident::new("my_company"), Ident::new("emp_basic")])
); );
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::with_quote('\'', "gcs://mybucket/./../a.csv")]) Some(ObjectName::from(vec![Ident::with_quote(
'\'',
"gcs://mybucket/./../a.csv"
)]))
); );
assert!(files.is_none()); assert!(files.is_none());
assert!(pattern.is_none()); assert!(pattern.is_none());
@ -2050,6 +2055,60 @@ fn test_copy_into() {
_ => unreachable!(), _ => unreachable!(),
}; };
assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); assert_eq!(snowflake().verified_stmt(sql).to_string(), sql);
let sql = concat!("COPY INTO 's3://a/b/c/data.parquet' ", "FROM db.sc.tbl ", "PARTITION BY ('date=' || to_varchar(dt, 'YYYY-MM-DD') || '/hour=' || to_varchar(date_part(hour, ts)))");
match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake {
kind,
into,
from_obj,
from_query,
partition,
..
} => {
assert_eq!(kind, CopyIntoSnowflakeKind::Location);
assert_eq!(
into,
ObjectName::from(vec![Ident::with_quote('\'', "s3://a/b/c/data.parquet")])
);
assert_eq!(
from_obj,
Some(ObjectName::from(vec![
Ident::new("db"),
Ident::new("sc"),
Ident::new("tbl")
]))
);
assert!(from_query.is_none());
assert!(partition.is_some());
}
_ => unreachable!(),
};
assert_eq!(snowflake().verified_stmt(sql).to_string(), sql);
let sql = concat!(
"COPY INTO 's3://a/b/c/data.parquet' ",
"FROM (SELECT * FROM tbl)"
);
match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake {
kind,
into,
from_obj,
from_query,
..
} => {
assert_eq!(kind, CopyIntoSnowflakeKind::Location);
assert_eq!(
into,
ObjectName::from(vec![Ident::with_quote('\'', "s3://a/b/c/data.parquet")])
);
assert!(from_query.is_some());
assert!(from_obj.is_none());
}
_ => unreachable!(),
};
assert_eq!(snowflake().verified_stmt(sql).to_string(), sql);
} }
#[test] #[test]
@ -2065,14 +2124,17 @@ fn test_copy_into_with_stage_params() {
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
from_stage, from_obj,
stage_params, stage_params,
.. ..
} => { } => {
//assert_eq!("s3://load/files/", stage_params.url.unwrap()); //assert_eq!("s3://load/files/", stage_params.url.unwrap());
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::with_quote('\'', "s3://load/files/")]) Some(ObjectName::from(vec![Ident::with_quote(
'\'',
"s3://load/files/"
)]))
); );
assert_eq!("myint", stage_params.storage_integration.unwrap()); assert_eq!("myint", stage_params.storage_integration.unwrap());
assert_eq!( assert_eq!(
@ -2125,13 +2187,16 @@ fn test_copy_into_with_stage_params() {
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
from_stage, from_obj,
stage_params, stage_params,
.. ..
} => { } => {
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::with_quote('\'', "s3://load/files/")]) Some(ObjectName::from(vec![Ident::with_quote(
'\'',
"s3://load/files/"
)]))
); );
assert_eq!("myint", stage_params.storage_integration.unwrap()); assert_eq!("myint", stage_params.storage_integration.unwrap());
} }
@ -2154,13 +2219,13 @@ fn test_copy_into_with_files_and_pattern_and_verification() {
files, files,
pattern, pattern,
validation_mode, validation_mode,
from_stage_alias, from_obj_alias,
.. ..
} => { } => {
assert_eq!(files.unwrap(), vec!["file1.json", "file2.json"]); assert_eq!(files.unwrap(), vec!["file1.json", "file2.json"]);
assert_eq!(pattern.unwrap(), ".*employees0[1-5].csv.gz"); assert_eq!(pattern.unwrap(), ".*employees0[1-5].csv.gz");
assert_eq!(validation_mode.unwrap(), "RETURN_7_ROWS"); assert_eq!(validation_mode.unwrap(), "RETURN_7_ROWS");
assert_eq!(from_stage_alias.unwrap(), Ident::new("some_alias")); assert_eq!(from_obj_alias.unwrap(), Ident::new("some_alias"));
} }
_ => unreachable!(), _ => unreachable!(),
} }
@ -2179,13 +2244,16 @@ fn test_copy_into_with_transformations() {
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake {
from_stage, from_obj,
from_transformations, from_transformations,
.. ..
} => { } => {
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::new("@schema"), Ident::new("general_finished")]) Some(ObjectName::from(vec![
Ident::new("@schema"),
Ident::new("general_finished")
]))
); );
assert_eq!( assert_eq!(
from_transformations.as_ref().unwrap()[0], from_transformations.as_ref().unwrap()[0],
@ -2254,6 +2322,41 @@ fn test_copy_into_file_format() {
snowflake_without_unescape().verified_stmt(sql).to_string(), snowflake_without_unescape().verified_stmt(sql).to_string(),
sql sql
); );
// Test commas in file format
let sql = concat!(
"COPY INTO my_company.emp_basic ",
"FROM 'gcs://mybucket/./../a.csv' ",
"FILES = ('file1.json', 'file2.json') ",
"PATTERN = '.*employees0[1-5].csv.gz' ",
r#"FILE_FORMAT=(COMPRESSION=AUTO, BINARY_FORMAT=HEX, ESCAPE='\\')"#
);
match snowflake_without_unescape()
.parse_sql_statements(sql)
.unwrap()
.first()
.unwrap()
{
Statement::CopyIntoSnowflake { file_format, .. } => {
assert!(file_format.options.contains(&DataLoadingOption {
option_name: "COMPRESSION".to_string(),
option_type: DataLoadingOptionType::ENUM,
value: "AUTO".to_string()
}));
assert!(file_format.options.contains(&DataLoadingOption {
option_name: "BINARY_FORMAT".to_string(),
option_type: DataLoadingOptionType::ENUM,
value: "HEX".to_string()
}));
assert!(file_format.options.contains(&DataLoadingOption {
option_name: "ESCAPE".to_string(),
option_type: DataLoadingOptionType::STRING,
value: r#"\\"#.to_string()
}));
}
_ => unreachable!(),
}
} }
#[test] #[test]
@ -2285,16 +2388,8 @@ fn test_copy_into_copy_options() {
} }
#[test] #[test]
fn test_snowflake_stage_object_names() { fn test_snowflake_stage_object_names_into_location() {
let allowed_formatted_names = [
"my_company.emp_basic",
"@namespace.%table_name",
"@namespace.%table_name/path",
"@namespace.stage_name/path",
"@~/path",
];
let mut allowed_object_names = [ let mut allowed_object_names = [
ObjectName::from(vec![Ident::new("my_company"), Ident::new("emp_basic")]),
ObjectName::from(vec![Ident::new("@namespace"), Ident::new("%table_name")]), ObjectName::from(vec![Ident::new("@namespace"), Ident::new("%table_name")]),
ObjectName::from(vec![ ObjectName::from(vec![
Ident::new("@namespace"), Ident::new("@namespace"),
@ -2307,7 +2402,39 @@ fn test_snowflake_stage_object_names() {
ObjectName::from(vec![Ident::new("@~/path")]), ObjectName::from(vec![Ident::new("@~/path")]),
]; ];
for it in allowed_formatted_names let allowed_names_into_location = [
"@namespace.%table_name",
"@namespace.%table_name/path",
"@namespace.stage_name/path",
"@~/path",
];
for it in allowed_names_into_location
.iter()
.zip(allowed_object_names.iter_mut())
{
let (formatted_name, object_name) = it;
let sql = format!(
"COPY INTO {} FROM 'gcs://mybucket/./../a.csv'",
formatted_name
);
match snowflake().verified_stmt(&sql) {
Statement::CopyIntoSnowflake { into, .. } => {
assert_eq!(into.0, object_name.0)
}
_ => unreachable!(),
}
}
}
#[test]
fn test_snowflake_stage_object_names_into_table() {
let mut allowed_object_names = [
ObjectName::from(vec![Ident::new("my_company"), Ident::new("emp_basic")]),
ObjectName::from(vec![Ident::new("emp_basic")]),
];
let allowed_names_into_table = ["my_company.emp_basic", "emp_basic"];
for it in allowed_names_into_table
.iter() .iter()
.zip(allowed_object_names.iter_mut()) .zip(allowed_object_names.iter_mut())
{ {
@ -2330,16 +2457,17 @@ fn test_snowflake_copy_into() {
let sql = "COPY INTO a.b FROM @namespace.stage_name"; let sql = "COPY INTO a.b FROM @namespace.stage_name";
assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); assert_eq!(snowflake().verified_stmt(sql).to_string(), sql);
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake { into, from_obj, .. } => {
into, from_stage, ..
} => {
assert_eq!( assert_eq!(
into, into,
ObjectName::from(vec![Ident::new("a"), Ident::new("b")]) ObjectName::from(vec![Ident::new("a"), Ident::new("b")])
); );
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::new("@namespace"), Ident::new("stage_name")]) Some(ObjectName::from(vec![
Ident::new("@namespace"),
Ident::new("stage_name")
]))
) )
} }
_ => unreachable!(), _ => unreachable!(),
@ -2351,9 +2479,7 @@ fn test_snowflake_copy_into_stage_name_ends_with_parens() {
let sql = "COPY INTO SCHEMA.SOME_MONITORING_SYSTEM FROM (SELECT t.$1:st AS st FROM @schema.general_finished)"; let sql = "COPY INTO SCHEMA.SOME_MONITORING_SYSTEM FROM (SELECT t.$1:st AS st FROM @schema.general_finished)";
assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); assert_eq!(snowflake().verified_stmt(sql).to_string(), sql);
match snowflake().verified_stmt(sql) { match snowflake().verified_stmt(sql) {
Statement::CopyIntoSnowflake { Statement::CopyIntoSnowflake { into, from_obj, .. } => {
into, from_stage, ..
} => {
assert_eq!( assert_eq!(
into, into,
ObjectName::from(vec![ ObjectName::from(vec![
@ -2362,8 +2488,11 @@ fn test_snowflake_copy_into_stage_name_ends_with_parens() {
]) ])
); );
assert_eq!( assert_eq!(
from_stage, from_obj,
ObjectName::from(vec![Ident::new("@schema"), Ident::new("general_finished")]) Some(ObjectName::from(vec![
Ident::new("@schema"),
Ident::new("general_finished")
]))
) )
} }
_ => unreachable!(), _ => unreachable!(),