Parse Snowflake COPY INTO <location> (#1669)

This commit is contained in:
Yoav Cohen 2025-02-05 20:23:27 +01:00 committed by GitHub
parent 486b29ffab
commit 751dc5afce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 381 additions and 185 deletions

View file

@ -58,6 +58,7 @@ pub enum DataLoadingOptionType {
STRING,
BOOLEAN,
ENUM,
NUMBER,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -128,12 +129,9 @@ impl fmt::Display for DataLoadingOption {
DataLoadingOptionType::STRING => {
write!(f, "{}='{}'", self.option_name, self.value)?;
}
DataLoadingOptionType::ENUM => {
// single quote is omitted
write!(f, "{}={}", self.option_name, self.value)?;
}
DataLoadingOptionType::BOOLEAN => {
// single quote is omitted
DataLoadingOptionType::ENUM
| DataLoadingOptionType::BOOLEAN
| DataLoadingOptionType::NUMBER => {
write!(f, "{}={}", self.option_name, self.value)?;
}
}

View file

@ -2498,24 +2498,30 @@ pub enum Statement {
values: Vec<Option<String>>,
},
/// ```sql
/// COPY INTO
/// COPY INTO <table> | <location>
/// ```
/// See <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
/// See:
/// <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
/// <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
///
/// Copy Into syntax available for Snowflake is different than the one implemented in
/// Postgres. Although they share common prefix, it is reasonable to implement them
/// in different enums. This can be refactored later once custom dialects
/// are allowed to have custom Statements.
CopyIntoSnowflake {
kind: CopyIntoSnowflakeKind,
into: ObjectName,
from_stage: ObjectName,
from_stage_alias: Option<Ident>,
from_obj: Option<ObjectName>,
from_obj_alias: Option<Ident>,
stage_params: StageParamsObject,
from_transformations: Option<Vec<StageLoadSelectItem>>,
from_query: Option<Box<Query>>,
files: Option<Vec<String>>,
pattern: Option<String>,
file_format: DataLoadingOptions,
copy_options: DataLoadingOptions,
validation_mode: Option<String>,
partition: Option<Box<Expr>>,
},
/// ```sql
/// CLOSE
@ -5048,60 +5054,69 @@ impl fmt::Display for Statement {
Ok(())
}
Statement::CopyIntoSnowflake {
kind,
into,
from_stage,
from_stage_alias,
from_obj,
from_obj_alias,
stage_params,
from_transformations,
from_query,
files,
pattern,
file_format,
copy_options,
validation_mode,
partition,
} => {
write!(f, "COPY INTO {}", into)?;
if from_transformations.is_none() {
// Standard data load
write!(f, " FROM {}{}", from_stage, stage_params)?;
if from_stage_alias.as_ref().is_some() {
write!(f, " AS {}", from_stage_alias.as_ref().unwrap())?;
}
} else {
if let Some(from_transformations) = from_transformations {
// Data load with transformation
write!(
f,
" FROM (SELECT {} FROM {}{}",
display_separated(from_transformations.as_ref().unwrap(), ", "),
from_stage,
stage_params,
)?;
if from_stage_alias.as_ref().is_some() {
write!(f, " AS {}", from_stage_alias.as_ref().unwrap())?;
if let Some(from_stage) = from_obj {
write!(
f,
" FROM (SELECT {} FROM {}{}",
display_separated(from_transformations, ", "),
from_stage,
stage_params
)?;
}
if let Some(from_obj_alias) = from_obj_alias {
write!(f, " AS {}", from_obj_alias)?;
}
write!(f, ")")?;
} else if let Some(from_obj) = from_obj {
// Standard data load
write!(f, " FROM {}{}", from_obj, stage_params)?;
if let Some(from_obj_alias) = from_obj_alias {
write!(f, " AS {from_obj_alias}")?;
}
} else if let Some(from_query) = from_query {
// Data unload from query
write!(f, " FROM ({from_query})")?;
}
if files.is_some() {
write!(
f,
" FILES = ('{}')",
display_separated(files.as_ref().unwrap(), "', '")
)?;
if let Some(files) = files {
write!(f, " FILES = ('{}')", display_separated(files, "', '"))?;
}
if pattern.is_some() {
write!(f, " PATTERN = '{}'", pattern.as_ref().unwrap())?;
if let Some(pattern) = pattern {
write!(f, " PATTERN = '{}'", pattern)?;
}
if let Some(partition) = partition {
write!(f, " PARTITION BY {partition}")?;
}
if !file_format.options.is_empty() {
write!(f, " FILE_FORMAT=({})", file_format)?;
}
if !copy_options.options.is_empty() {
write!(f, " COPY_OPTIONS=({})", copy_options)?;
match kind {
CopyIntoSnowflakeKind::Table => {
write!(f, " COPY_OPTIONS=({})", copy_options)?
}
CopyIntoSnowflakeKind::Location => write!(f, " {copy_options}")?,
}
}
if validation_mode.is_some() {
write!(
f,
" VALIDATION_MODE = {}",
validation_mode.as_ref().unwrap()
)?;
if let Some(validation_mode) = validation_mode {
write!(f, " VALIDATION_MODE = {}", validation_mode)?;
}
Ok(())
}
@ -8543,6 +8558,19 @@ impl Display for StorageSerializationPolicy {
}
}
/// Variants of the Snowflake `COPY INTO` statement
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum CopyIntoSnowflakeKind {
/// Loads data from files to a table
/// See: <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table>
Table,
/// Unloads data from a table or query to external files
/// See: <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location>
Location,
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -333,8 +333,8 @@ impl Spanned for Statement {
} => source.span(),
Statement::CopyIntoSnowflake {
into: _,
from_stage: _,
from_stage_alias: _,
from_obj: _,
from_obj_alias: _,
stage_params: _,
from_transformations: _,
files: _,
@ -342,6 +342,9 @@ impl Spanned for Statement {
file_format: _,
copy_options: _,
validation_mode: _,
kind: _,
from_query: _,
partition: _,
} => Span::empty(),
Statement::Close { cursor } => match cursor {
CloseCursor::All => Span::empty(),

View file

@ -23,23 +23,26 @@ use crate::ast::helpers::stmt_data_loading::{
StageLoadSelectItem, StageParamsObject,
};
use crate::ast::{
ColumnOption, ColumnPolicy, ColumnPolicyProperty, Ident, IdentityParameters, IdentityProperty,
IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, ObjectName,
RowAccessPolicy, Statement, TagsColumnOption, WrappedCollection,
ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident,
IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind,
IdentityPropertyOrder, ObjectName, RowAccessPolicy, Statement, TagsColumnOption,
WrappedCollection,
};
use crate::dialect::{Dialect, Precedence};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Token;
use crate::tokenizer::{Token, Word};
#[cfg(not(feature = "std"))]
use alloc::boxed::Box;
#[cfg(not(feature = "std"))]
use alloc::string::String;
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
#[cfg(not(feature = "std"))]
use alloc::{format, vec};
use sqlparser::ast::StorageSerializationPolicy;
use super::keywords::RESERVED_FOR_IDENTIFIER;
use sqlparser::ast::StorageSerializationPolicy;
/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/)
#[derive(Debug, Default)]
@ -665,24 +668,49 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, Par
}
}
/// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>`
/// and `COPY INTO <location>` which have different syntax.
pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
let into: ObjectName = parse_snowflake_stage_name(parser)?;
let kind = match parser.peek_token().token {
// Indicates an internal stage
Token::AtSign => CopyIntoSnowflakeKind::Location,
// Indicates an external stage, i.e. s3://, gcs:// or azure://
Token::SingleQuotedString(s) if s.contains("://") => CopyIntoSnowflakeKind::Location,
_ => CopyIntoSnowflakeKind::Table,
};
let mut files: Vec<String> = vec![];
let mut from_transformations: Option<Vec<StageLoadSelectItem>> = None;
let from_stage_alias;
let from_stage: ObjectName;
let stage_params: StageParamsObject;
let mut from_stage_alias = None;
let mut from_stage = None;
let mut stage_params = StageParamsObject {
url: None,
encryption: DataLoadingOptions { options: vec![] },
endpoint: None,
storage_integration: None,
credentials: DataLoadingOptions { options: vec![] },
};
let mut from_query = None;
let mut partition = None;
let mut file_format = Vec::new();
let mut pattern = None;
let mut validation_mode = None;
let mut copy_options = Vec::new();
let into: ObjectName = parse_snowflake_stage_name(parser)?;
if kind == CopyIntoSnowflakeKind::Location {
stage_params = parse_stage_params(parser)?;
}
parser.expect_keyword_is(Keyword::FROM)?;
// check if data load transformations are present
match parser.next_token().token {
Token::LParen => {
// data load with transformations
Token::LParen if kind == CopyIntoSnowflakeKind::Table => {
// Data load with transformations
parser.expect_keyword_is(Keyword::SELECT)?;
from_transformations = parse_select_items_for_data_load(parser)?;
parser.expect_keyword_is(Keyword::FROM)?;
from_stage = parse_snowflake_stage_name(parser)?;
from_stage = Some(parse_snowflake_stage_name(parser)?);
stage_params = parse_stage_params(parser)?;
// as
@ -696,9 +724,14 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
};
parser.expect_token(&Token::RParen)?;
}
Token::LParen if kind == CopyIntoSnowflakeKind::Location => {
// Data unload with a query
from_query = Some(parser.parse_query()?);
parser.expect_token(&Token::RParen)?;
}
_ => {
parser.prev_token();
from_stage = parse_snowflake_stage_name(parser)?;
from_stage = Some(parse_snowflake_stage_name(parser)?);
stage_params = parse_stage_params(parser)?;
// as
@ -711,67 +744,71 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
None
};
}
};
}
// [ files ]
if parser.parse_keyword(Keyword::FILES) {
parser.expect_token(&Token::Eq)?;
parser.expect_token(&Token::LParen)?;
let mut continue_loop = true;
while continue_loop {
continue_loop = false;
loop {
// FILE_FORMAT
if parser.parse_keyword(Keyword::FILE_FORMAT) {
parser.expect_token(&Token::Eq)?;
file_format = parse_parentheses_options(parser)?;
// PARTITION BY
} else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) {
partition = Some(Box::new(parser.parse_expr()?))
// FILES
} else if parser.parse_keyword(Keyword::FILES) {
parser.expect_token(&Token::Eq)?;
parser.expect_token(&Token::LParen)?;
let mut continue_loop = true;
while continue_loop {
continue_loop = false;
let next_token = parser.next_token();
match next_token.token {
Token::SingleQuotedString(s) => files.push(s),
_ => parser.expected("file token", next_token)?,
};
if parser.next_token().token.eq(&Token::Comma) {
continue_loop = true;
} else {
parser.prev_token(); // not a comma, need to go back
}
}
parser.expect_token(&Token::RParen)?;
// PATTERN
} else if parser.parse_keyword(Keyword::PATTERN) {
parser.expect_token(&Token::Eq)?;
let next_token = parser.next_token();
match next_token.token {
Token::SingleQuotedString(s) => files.push(s),
_ => parser.expected("file token", next_token)?,
};
if parser.next_token().token.eq(&Token::Comma) {
continue_loop = true;
} else {
parser.prev_token(); // not a comma, need to go back
pattern = Some(match next_token.token {
Token::SingleQuotedString(s) => s,
_ => parser.expected("pattern", next_token)?,
});
// VALIDATION MODE
} else if parser.parse_keyword(Keyword::VALIDATION_MODE) {
parser.expect_token(&Token::Eq)?;
validation_mode = Some(parser.next_token().token.to_string());
// COPY OPTIONS
} else if parser.parse_keyword(Keyword::COPY_OPTIONS) {
parser.expect_token(&Token::Eq)?;
copy_options = parse_parentheses_options(parser)?;
} else {
match parser.next_token().token {
Token::SemiColon | Token::EOF => break,
Token::Comma => continue,
// In `COPY INTO <location>` the copy options do not have a shared key
// like in `COPY INTO <table>`
Token::Word(key) => copy_options.push(parse_copy_option(parser, key)?),
_ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()),
}
}
parser.expect_token(&Token::RParen)?;
}
// [ pattern ]
let mut pattern = None;
if parser.parse_keyword(Keyword::PATTERN) {
parser.expect_token(&Token::Eq)?;
let next_token = parser.next_token();
pattern = Some(match next_token.token {
Token::SingleQuotedString(s) => s,
_ => parser.expected("pattern", next_token)?,
});
}
// [ file_format]
let mut file_format = Vec::new();
if parser.parse_keyword(Keyword::FILE_FORMAT) {
parser.expect_token(&Token::Eq)?;
file_format = parse_parentheses_options(parser)?;
}
// [ copy_options ]
let mut copy_options = Vec::new();
if parser.parse_keyword(Keyword::COPY_OPTIONS) {
parser.expect_token(&Token::Eq)?;
copy_options = parse_parentheses_options(parser)?;
}
// [ VALIDATION_MODE ]
let mut validation_mode = None;
if parser.parse_keyword(Keyword::VALIDATION_MODE) {
parser.expect_token(&Token::Eq)?;
validation_mode = Some(parser.next_token().token.to_string());
}
Ok(Statement::CopyIntoSnowflake {
kind,
into,
from_stage,
from_stage_alias,
from_obj: from_stage,
from_obj_alias: from_stage_alias,
stage_params,
from_transformations,
from_query,
files: if files.is_empty() { None } else { Some(files) },
pattern,
file_format: DataLoadingOptions {
@ -781,6 +818,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> {
options: copy_options,
},
validation_mode,
partition,
})
}
@ -930,55 +968,55 @@ fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserEr
///
fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<DataLoadingOption>, ParserError> {
let mut options: Vec<DataLoadingOption> = Vec::new();
parser.expect_token(&Token::LParen)?;
loop {
match parser.next_token().token {
Token::RParen => break,
Token::Word(key) => {
parser.expect_token(&Token::Eq)?;
if parser.parse_keyword(Keyword::TRUE) {
options.push(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN,
value: "TRUE".to_string(),
});
Ok(())
} else if parser.parse_keyword(Keyword::FALSE) {
options.push(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN,
value: "FALSE".to_string(),
});
Ok(())
} else {
match parser.next_token().token {
Token::SingleQuotedString(value) => {
options.push(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::STRING,
value,
});
Ok(())
}
Token::Word(word) => {
options.push(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::ENUM,
value: word.value,
});
Ok(())
}
_ => parser.expected("expected option value", parser.peek_token()),
}
}
}
_ => parser.expected("another option or ')'", parser.peek_token()),
}?;
Token::Comma => continue,
Token::Word(key) => options.push(parse_copy_option(parser, key)?),
_ => return parser.expected("another option or ')'", parser.peek_token()),
};
}
Ok(options)
}
/// Parses a `KEY = VALUE` construct based on the specified key
fn parse_copy_option(parser: &mut Parser, key: Word) -> Result<DataLoadingOption, ParserError> {
parser.expect_token(&Token::Eq)?;
if parser.parse_keyword(Keyword::TRUE) {
Ok(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN,
value: "TRUE".to_string(),
})
} else if parser.parse_keyword(Keyword::FALSE) {
Ok(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::BOOLEAN,
value: "FALSE".to_string(),
})
} else {
match parser.next_token().token {
Token::SingleQuotedString(value) => Ok(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::STRING,
value,
}),
Token::Word(word) => Ok(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::ENUM,
value: word.value,
}),
Token::Number(n, _) => Ok(DataLoadingOption {
option_name: key.value,
option_type: DataLoadingOptionType::NUMBER,
value: n,
}),
_ => parser.expected("expected option value", parser.peek_token()),
}
}
}
/// Parsing a property of identity or autoincrement column option
/// Syntax:
/// ```sql