This commit is contained in:
Luca Cappelletti 2025-11-22 20:25:30 +01:00 committed by GitHub
commit 5c936a0036
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 1195 additions and 1371 deletions

View file

@ -16,7 +16,12 @@
// under the License.
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::ToString, vec::Vec};
use alloc::{
boxed::Box,
format,
string::{String, ToString},
vec::Vec,
};
use core::fmt::{self, Display};
#[cfg(feature = "serde")]
@ -27,10 +32,10 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::display_utils::{indented_list, Indent, SpaceOrNewline};
use super::{
display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause,
Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert,
OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins,
UpdateTableFromKind,
display_comma_separated, display_separated, helpers::attached_token::AttachedToken,
query::InputFormatClause, Assignment, CopyLegacyOption, CopyOption, CopySource, CopyTarget,
Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr,
Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, UpdateTableFromKind,
};
/// INSERT statement.
@ -310,3 +315,89 @@ impl Display for Update {
Ok(())
}
}
/// COPY statement.
///
/// Represents a PostgreSQL COPY statement for bulk data transfer between
/// a file and a table. The statement can copy data FROM a file to a table
/// or TO a file from a table or query.
///
/// # Syntax
///
/// ```sql
/// COPY table_name [(column_list)] FROM { 'filename' | STDIN | PROGRAM 'command' }
/// COPY { table_name [(column_list)] | (query) } TO { 'filename' | STDOUT | PROGRAM 'command' }
/// ```
///
/// # Examples
///
/// ```
/// # use sqlparser::ast::{Copy, CopySource, CopyTarget, ObjectName};
/// # use sqlparser::dialect::PostgreSqlDialect;
/// # use sqlparser::parser::Parser;
/// let sql = "COPY users FROM 'data.csv'";
/// let dialect = PostgreSqlDialect {};
/// let ast = Parser::parse_sql(&dialect, sql).unwrap();
/// ```
///
/// See [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-copy.html)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Copy {
/// The source of 'COPY TO', or the target of 'COPY FROM'.
/// Can be a table name with optional column list, or a query (for COPY TO only).
pub source: CopySource,
/// Direction of the copy operation.
/// - `true` for COPY TO (table/query to file)
/// - `false` for COPY FROM (file to table)
pub to: bool,
/// The target of 'COPY TO', or the source of 'COPY FROM'.
/// Can be a file, STDIN, STDOUT, or a PROGRAM command.
pub target: CopyTarget,
/// Modern COPY options (PostgreSQL 9.0+), specified within parentheses.
/// Examples: FORMAT, DELIMITER, NULL, HEADER, QUOTE, ESCAPE, etc.
pub options: Vec<CopyOption>,
/// Legacy COPY options (pre-PostgreSQL 9.0), specified without parentheses.
/// Also used by AWS Redshift extensions like IAM_ROLE, MANIFEST, etc.
pub legacy_options: Vec<CopyLegacyOption>,
/// CSV data rows for COPY FROM STDIN statements.
/// Each row is a vector of optional strings (None represents NULL).
/// Populated only when copying from STDIN with inline data.
pub values: Option<String>,
}
impl Display for Copy {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "COPY")?;
match &self.source {
CopySource::Query(query) => write!(f, " ({query})")?,
CopySource::Table {
table_name,
columns,
} => {
write!(f, " {table_name}")?;
if !columns.is_empty() {
write!(f, " ({})", display_comma_separated(columns))?;
}
}
}
write!(
f,
" {} {}",
if self.to { "TO" } else { "FROM" },
self.target
)?;
if !self.options.is_empty() {
write!(f, " ({})", display_comma_separated(&self.options))?;
}
if !self.legacy_options.is_empty() {
write!(f, " {}", display_separated(&self.legacy_options, " "))?;
}
if let Some(values) = &self.values {
write!(f, ";{values}\\.")?;
}
Ok(())
}
}

View file

@ -76,7 +76,7 @@ pub use self::ddl::{
UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation,
UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef,
};
pub use self::dml::{Delete, Insert, Update};
pub use self::dml::{Copy, Delete, Insert, Update};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
@ -3223,20 +3223,7 @@ pub enum Statement {
/// ```sql
/// COPY [TO | FROM] ...
/// ```
Copy {
/// The source of 'COPY TO', or the target of 'COPY FROM'
source: CopySource,
/// If true, is a 'COPY TO' statement. If false is a 'COPY FROM'
to: bool,
/// The target of 'COPY TO', or the source of 'COPY FROM'
target: CopyTarget,
/// WITH options (from PostgreSQL version 9.0)
options: Vec<CopyOption>,
/// WITH options (before PostgreSQL version 9.0)
legacy_options: Vec<CopyLegacyOption>,
/// VALUES a vector of values to be copied
values: Vec<Option<String>>,
},
Copy(Copy),
/// ```sql
/// COPY INTO <table> | <location>
/// ```
@ -4315,6 +4302,12 @@ impl From<ddl::Msck> for Statement {
}
}
impl From<Copy> for Statement {
fn from(copy: Copy) -> Self {
Statement::Copy(copy)
}
}
/// ```sql
/// {COPY | REVOKE} CURRENT GRANTS
/// ```
@ -4583,50 +4576,7 @@ impl fmt::Display for Statement {
Statement::Call(function) => write!(f, "CALL {function}"),
Statement::Copy {
source,
to,
target,
options,
legacy_options,
values,
} => {
write!(f, "COPY")?;
match source {
CopySource::Query(query) => write!(f, " ({query})")?,
CopySource::Table {
table_name,
columns,
} => {
write!(f, " {table_name}")?;
if !columns.is_empty() {
write!(f, " ({})", display_comma_separated(columns))?;
}
}
}
write!(f, " {} {}", if *to { "TO" } else { "FROM" }, target)?;
if !options.is_empty() {
write!(f, " ({})", display_comma_separated(options))?;
}
if !legacy_options.is_empty() {
write!(f, " {}", display_separated(legacy_options, " "))?;
}
if !values.is_empty() {
writeln!(f, ";")?;
let mut delim = "";
for v in values {
write!(f, "{delim}")?;
delim = "\t";
if let Some(v) = v {
write!(f, "{v}")?;
} else {
write!(f, "\\N")?;
}
}
write!(f, "\n\\.")?;
}
Ok(())
}
Statement::Copy(copy) => copy.fmt(f),
Statement::Update(update) => update.fmt(f),
Statement::Delete(delete) => delete.fmt(f),
Statement::Open(open) => open.fmt(f),

View file

@ -319,14 +319,7 @@ impl Spanned for Statement {
Statement::While(stmt) => stmt.span(),
Statement::Raise(stmt) => stmt.span(),
Statement::Call(function) => function.span(),
Statement::Copy {
source,
to: _,
target: _,
options: _,
legacy_options: _,
values: _,
} => source.span(),
Statement::Copy(copy) => copy.source.span(),
Statement::CopyIntoSnowflake {
into: _,
into_columns: _,

View file

@ -86,6 +86,10 @@ impl Dialect for BigQueryDialect {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
}
fn supports_hyphenated_identifiers(&self) -> bool {
true
}
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
fn supports_triple_quoted_string(&self) -> bool {
true

View file

@ -178,6 +178,53 @@ pub trait Dialect: Debug + Any {
/// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool;
/// Returns whether the dialect supports hyphenated identifiers.
///
/// Hyphenated identifiers contain hyphens within the name (e.g., `my-table`).
/// Supported by BigQuery for project, dataset, and table names.
///
/// ```rust
/// # use sqlparser::{dialect::BigQueryDialect, parser::Parser};
/// let sql = "SELECT * FROM my-project.my-dataset.my-table";
/// assert!(Parser::parse_sql(&BigQueryDialect, sql).is_ok());
/// ```
///
/// For dialects that do not support hyphenated identifiers,
/// the parser will interpret the hyphen as a minus operator,
/// and may result in a syntax error if the context is not valid.
///
/// ```rust
/// # use sqlparser::{dialect::PostgreSqlDialect, parser::Parser};
/// let sql = "SELECT * FROM my-project.my-dataset.my-table";
/// assert!(Parser::parse_sql(&PostgreSqlDialect{}, sql).is_err());
/// ```
fn supports_hyphenated_identifiers(&self) -> bool {
false
}
/// Returns whether the dialect supports path-like identifiers.
///
/// Path-like identifiers contain forward slashes for hierarchical paths
/// (e.g., `@namespace.stage_name/path`). Used in Snowflake for stage locations.
///
/// ```rust
/// # use sqlparser::{dialect::SnowflakeDialect, parser::Parser};
/// let sql = "COPY INTO a.b FROM @namespace.stage_name/path";
/// assert!(Parser::parse_sql(&SnowflakeDialect, sql).is_ok());
/// ```
///
/// For dialects that do not support path-like identifiers,
/// the parser will raise a syntax error when encountering such identifiers.
///
/// ```rust
/// # use sqlparser::{dialect::PostgreSqlDialect, parser::Parser};
/// let sql = "COPY INTO a.b FROM @namespace.stage_name/path";
/// assert!(Parser::parse_sql(&PostgreSqlDialect{}, sql).is_err());
/// ```
fn supports_path_like_identifiers(&self) -> bool {
false
}
/// Most dialects do not have custom operators. Override this method to provide custom operators.
fn is_custom_operator_part(&self, _ch: char) -> bool {
false

View file

@ -160,6 +160,10 @@ impl Dialect for SnowflakeDialect {
|| ch == '_'
}
fn supports_path_like_identifiers(&self) -> bool {
true
}
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
fn supports_string_literal_backslash_escape(&self) -> bool {
true
@ -1094,9 +1098,9 @@ pub fn parse_create_stage(
pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
let mut ident = String::new();
while let Some(next_token) = parser.next_token_no_skip() {
match &next_token.token {
Token::Whitespace(_) | Token::SemiColon => break,
loop {
match &parser.next_token().token {
Token::SemiColon | Token::EOF => break,
Token::Period => {
parser.prev_token();
break;
@ -1112,7 +1116,14 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
Token::Plus => ident.push('+'),
Token::Minus => ident.push('-'),
Token::Number(n, _) => ident.push_str(n),
Token::Word(w) => ident.push_str(&w.to_string()),
Token::Word(w) => {
if matches!(w.keyword, Keyword::NoKeyword) || ident.ends_with("@") {
ident.push_str(w.to_string().as_str());
} else {
parser.prev_token();
break;
}
}
_ => return parser.expected("stage name identifier", parser.peek_token()),
}
}

View file

@ -4034,20 +4034,13 @@ impl<'a> Parser<'a> {
/// See [`Self::peek_token`] for an example.
pub fn peek_tokens_with_location<const N: usize>(&self) -> [TokenWithSpan; N] {
let mut index = self.index;
core::array::from_fn(|_| loop {
core::array::from_fn(|_| {
let token = self.tokens.get(index);
index += 1;
if let Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) = token
{
continue;
}
break token.cloned().unwrap_or(TokenWithSpan {
token.cloned().unwrap_or(TokenWithSpan {
token: Token::EOF,
span: Span::empty(),
});
})
})
}
@ -4057,17 +4050,10 @@ impl<'a> Parser<'a> {
/// See [`Self::peek_tokens`] for an example.
pub fn peek_tokens_ref<const N: usize>(&self) -> [&TokenWithSpan; N] {
let mut index = self.index;
core::array::from_fn(|_| loop {
core::array::from_fn(|_| {
let token = self.tokens.get(index);
index += 1;
if let Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) = token
{
continue;
}
break token.unwrap_or(&EOF_TOKEN);
token.unwrap_or(&EOF_TOKEN)
})
}
@ -4081,38 +4067,13 @@ impl<'a> Parser<'a> {
let mut index = self.index;
loop {
index += 1;
match self.tokens.get(index - 1) {
Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) => continue,
non_whitespace => {
if n == 0 {
return non_whitespace.unwrap_or(&EOF_TOKEN);
}
n -= 1;
}
if n == 0 {
return self.tokens.get(index - 1).unwrap_or(&EOF_TOKEN);
}
n -= 1;
}
}
/// Return the first token, possibly whitespace, that has not yet been processed
/// (or None if reached end-of-file).
pub fn peek_token_no_skip(&self) -> TokenWithSpan {
self.peek_nth_token_no_skip(0)
}
/// Return nth token, possibly whitespace, that has not yet been processed.
pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan {
self.tokens
.get(self.index + n)
.cloned()
.unwrap_or(TokenWithSpan {
token: Token::EOF,
span: Span::empty(),
})
}
/// Return true if the next tokens exactly `expected`
///
/// Does not advance the current token.
@ -4140,26 +4101,11 @@ impl<'a> Parser<'a> {
self.index.saturating_sub(1)
}
/// Return the next unprocessed token, possibly whitespace.
pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
self.index += 1;
self.tokens.get(self.index - 1)
}
/// Advances the current token to the next non-whitespace token
///
/// See [`Self::get_current_token`] to get the current token after advancing
pub fn advance_token(&mut self) {
loop {
self.index += 1;
match self.tokens.get(self.index - 1) {
Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) => continue,
_ => break,
}
}
self.index += 1;
}
/// Returns a reference to the current token
@ -4190,18 +4136,8 @@ impl<'a> Parser<'a> {
///
// TODO rename to backup_token and deprecate prev_token?
pub fn prev_token(&mut self) {
loop {
assert!(self.index > 0);
self.index -= 1;
if let Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) = self.tokens.get(self.index)
{
continue;
}
return;
}
assert!(self.index > 0);
self.index -= 1;
}
/// Report `found` was encountered instead of `expected`
@ -8911,7 +8847,7 @@ impl<'a> Parser<'a> {
return self.expected(
"FULLTEXT or SPATIAL option without constraint name",
TokenWithSpan {
token: Token::make_keyword(&name.to_string()),
token: Token::make_keyword(name.to_string()),
span: next_token.span,
},
);
@ -9955,18 +9891,21 @@ impl<'a> Parser<'a> {
}
let values = if let CopyTarget::Stdin = target {
self.expect_token(&Token::SemiColon)?;
self.parse_tsv()
let Token::CopyFromStdin(body) = self.next_token().token else {
return self.expected("COPY ... FROM STDIN with CSV body", self.peek_token());
};
Some(body)
} else {
vec![]
None
};
Ok(Statement::Copy {
Ok(Statement::Copy(Copy {
source,
to,
target,
options,
legacy_options,
values,
})
}))
}
/// Parse [Statement::Open]
@ -10293,43 +10232,6 @@ impl<'a> Parser<'a> {
Ok(s.chars().next().unwrap())
}
/// Parse a tab separated values in
/// COPY payload
pub fn parse_tsv(&mut self) -> Vec<Option<String>> {
self.parse_tab_value()
}
pub fn parse_tab_value(&mut self) -> Vec<Option<String>> {
let mut values = vec![];
let mut content = String::from("");
while let Some(t) = self.next_token_no_skip().map(|t| &t.token) {
match t {
Token::Whitespace(Whitespace::Tab) => {
values.push(Some(content.to_string()));
content.clear();
}
Token::Whitespace(Whitespace::Newline) => {
values.push(Some(content.to_string()));
content.clear();
}
Token::Backslash => {
if self.consume_token(&Token::Period) {
return values;
}
if let Token::Word(w) = self.next_token().token {
if w.value == "N" {
values.push(None);
}
}
}
_ => {
content.push_str(&t.to_string());
}
}
}
values
}
/// Parse a literal value (numbers, strings, date/time, booleans)
pub fn parse_value(&mut self) -> Result<ValueWithSpan, ParserError> {
let next_token = self.next_token();
@ -10423,7 +10325,7 @@ impl<'a> Parser<'a> {
// 2. Not calling self.next_token() to enforce `tok`
// be followed immediately by a word/number, ie.
// without any whitespace in between
let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone();
let next_token = self.next_token();
let ident = match next_token.token {
Token::Word(w) => Ok(w.into_ident(next_token.span)),
Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)),
@ -11440,9 +11342,9 @@ impl<'a> Parser<'a> {
let mut parts = vec![];
if dialect_of!(self is BigQueryDialect) && in_table_clause {
loop {
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
let ident = self.parse_identifier()?;
parts.push(ObjectNamePart::Identifier(ident));
if !self.consume_token(&Token::Period) && !end_with_period {
if !self.consume_token(&Token::Period) {
break;
}
}
@ -11456,9 +11358,9 @@ impl<'a> Parser<'a> {
span,
}));
} else if dialect_of!(self is BigQueryDialect) && in_table_clause {
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
let ident = self.parse_identifier()?;
parts.push(ObjectNamePart::Identifier(ident));
if !self.consume_token(&Token::Period) && !end_with_period {
if !self.consume_token(&Token::Period) {
break;
}
} else if self.dialect.supports_object_name_double_dot_notation()
@ -11637,84 +11539,6 @@ impl<'a> Parser<'a> {
}
}
/// On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or
/// TABLE clause.
///
/// The first segment must be an ordinary unquoted identifier, e.g. it must not start
/// with a digit. Subsequent segments are either must either be valid identifiers or
/// integers, e.g. foo-123 is allowed, but foo-123a is not.
///
/// [BigQuery-lexical](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical)
///
/// Return a tuple of the identifier and a boolean indicating it ends with a period.
fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
match self.peek_token().token {
Token::Word(w) => {
let quote_style_is_none = w.quote_style.is_none();
let mut requires_whitespace = false;
let mut ident = w.into_ident(self.next_token().span);
if quote_style_is_none {
while matches!(self.peek_token_no_skip().token, Token::Minus) {
self.next_token();
ident.value.push('-');
let token = self
.next_token_no_skip()
.cloned()
.unwrap_or(TokenWithSpan::wrap(Token::EOF));
requires_whitespace = match token.token {
Token::Word(next_word) if next_word.quote_style.is_none() => {
ident.value.push_str(&next_word.value);
false
}
Token::Number(s, false) => {
// A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
// However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
//
// If a number token is followed by a period, it is part of an [ObjectName].
// Return the identifier with `true` if the number token is followed by a period, indicating that
// parsing should continue for the next part of the hyphenated identifier.
if s.ends_with('.') {
let Some(s) = s.split('.').next().filter(|s| {
!s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
}) else {
return self.expected(
"continuation of hyphenated identifier",
TokenWithSpan::new(Token::Number(s, false), token.span),
);
};
ident.value.push_str(s);
return Ok((ident, true));
} else {
ident.value.push_str(&s);
}
// If next token is period, then it is part of an ObjectName and we don't expect whitespace
// after the number.
!matches!(self.peek_token().token, Token::Period)
}
_ => {
return self
.expected("continuation of hyphenated identifier", token);
}
}
}
// If the last segment was a number, we must check that it's followed by whitespace,
// otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
if requires_whitespace {
let token = self.next_token();
if !matches!(token.token, Token::EOF | Token::Whitespace(_)) {
return self
.expected("whitespace following hyphenated identifier", token);
}
}
}
Ok((ident, false))
}
_ => Ok((self.parse_identifier()?, false)),
}
}
/// Parses a parenthesized, comma-separated list of column definitions within a view.
fn parse_view_columns(&mut self) -> Result<Vec<ViewColumnDef>, ParserError> {
if self.consume_token(&Token::LParen) {
@ -19199,9 +19023,17 @@ mod tests {
#[test]
fn test_placeholder_invalid_whitespace() {
for w in [" ", "/*invalid*/"] {
for w in [
" ",
"/*invalid*/",
"\n",
"\t\t",
"\r\n",
"--comment\n",
"/* multi\nline\ncomment */",
] {
let sql = format!("\nSELECT\n :{w}fooBar");
assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
assert!(Parser::parse_sql(&GenericDialect, &sql).is_err(), "Failed to error on when inserting the whitespace {w:?} within the placeholder SQL: `{sql}`");
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1567,7 +1567,11 @@ fn parse_table_identifiers() {
fn test_table_ident_err(ident: &str) {
let sql = format!("SELECT 1 FROM {ident}");
assert!(bigquery().parse_sql_statements(&sql).is_err());
let parsed = bigquery().parse_sql_statements(&sql);
assert!(
parsed.is_err(),
"Expected error parsing identifier: `{ident}`, within SQL: `{sql}` - but got success: {parsed:#?}"
);
}
test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);

View file

@ -17328,7 +17328,7 @@ fn parse_copy_options() {
r#"COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' IAM_ROLE 'arn:aws:iam::123456789:role/role1' CSV IGNOREHEADER 1"#,
);
match copy {
Statement::Copy { legacy_options, .. } => {
Statement::Copy(Copy { legacy_options, .. }) => {
assert_eq!(
legacy_options,
vec![
@ -17348,7 +17348,7 @@ fn parse_copy_options() {
r#"COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' IAM_ROLE DEFAULT CSV IGNOREHEADER 1"#,
);
match copy {
Statement::Copy { legacy_options, .. } => {
Statement::Copy(Copy { legacy_options, .. }) => {
assert_eq!(
legacy_options,
vec![

View file

@ -1014,27 +1014,49 @@ fn parse_drop_schema_if_exists() {
#[test]
fn parse_copy_from_stdin() {
let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM stdin;
1 PENELOPE GUINESS 2006-02-15 09:34:33 0.11111
2 NICK WAHLBERG 2006-02-15 09:34:33 0.22222
3 ED CHASE 2006-02-15 09:34:33 0.312323
4 JENNIFER DAVIS 2006-02-15 09:34:33 0.3232
5 JOHNNY LOLLOBRIGIDA 2006-02-15 09:34:33 1.343
6 BETTE NICHOLSON 2006-02-15 09:34:33 5.0
7 GRACE MOSTEL 2006-02-15 09:34:33 6.0
8 MATTHEW JOHANSSON 2006-02-15 09:34:33 7.0
9 JOE SWANK 2006-02-15 09:34:33 8.0
10 CHRISTIAN GABLE 2006-02-15 09:34:33 9.1
11 ZERO CAGE 2006-02-15 09:34:33 10.001
12 KARL BERRY 2017-11-02 19:15:42.308637+08 11.001
A Fateful Reflection of a Waitress And a Boat who must Discover a Sumo Wrestler in Ancient China
Kwara & Kogi
{"Deleted Scenes","Behind the Scenes"}
'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2
PHP USD $
\N Some other value
\\."#;
pg_and_generic().one_statement_parses_to(sql, "");
let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN;
1 PENELOPE GUINESS 2006-02-15 09:34:33 0.11111
2 NICK WAHLBERG 2006-02-15 09:34:33 0.22222
3 ED CHASE 2006-02-15 09:34:33 0.312323
4 JENNIFER DAVIS 2006-02-15 09:34:33 0.3232
5 JOHNNY LOLLOBRIGIDA 2006-02-15 09:34:33 1.343
6 BETTE NICHOLSON 2006-02-15 09:34:33 5.0
7 GRACE MOSTEL 2006-02-15 09:34:33 6.0
8 MATTHEW JOHANSSON 2006-02-15 09:34:33 7.0
9 JOE SWANK 2006-02-15 09:34:33 8.0
10 CHRISTIAN GABLE 2006-02-15 09:34:33 9.1
11 ZERO CAGE 2006-02-15 09:34:33 10.001
12 KARL BERRY 2017-11-02 19:15:42.308637+08 11.001
\."#;
pg_and_generic().verified_stmt(sql);
let sql_comma_separated = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
2,NICK,WAHLBERG,2006-02-15 09:34:33,0.22222
3,ED,CHASE,2006-02-15 09:34:33,0.312323
4,JENNIFER,DAVIS,2006-02-15 09:34:33,0.3232
5,JOHNNY,"LOLLO,BRIGIDA",2006-02-15 09:34:33,1.343
6,BETTE,NICHOLSON,2006-02-15 09:34:33,5.0
7,GRACE,MOSTEL,2006-02-15 09:34:33,6.0
8,MATTHEW,JOHANSSON,2006-02-15 09:34:33,7.0
9,JOE,SWANK,2006-02-15 09:34:33,8.0
10,CHRISTIAN,GABLE,2006-02-15 09:34:33,9.1
11,ZERO,CAGE,2006-02-15 09:34:33,10.001
12,KARL,BERRY,2017-11-02 19:15:42.308637+08,11.001
\."#;
pg_and_generic().verified_stmt(sql_comma_separated);
let incorrect_csv_sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
2,NICK,WAHLBERG,2006-02-15 09:34:33
\."#;
pg_and_generic().verified_stmt(incorrect_csv_sql);
let mixed_incorrect_separators = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
2 NICK WAHLBERG 2006-02-15 09:34:33,0.22222
\."#;
pg_and_generic().verified_stmt(mixed_incorrect_separators);
}
#[test]
@ -1042,7 +1064,7 @@ fn test_copy_from() {
let stmt = pg().verified_stmt("COPY users FROM 'data.csv'");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1053,14 +1075,14 @@ fn test_copy_from() {
},
options: vec![],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY users FROM 'data.csv' DELIMITER ','");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1071,14 +1093,14 @@ fn test_copy_from() {
},
options: vec![],
legacy_options: vec![CopyLegacyOption::Delimiter(',')],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY users FROM 'data.csv' DELIMITER ',' CSV HEADER");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1092,8 +1114,8 @@ fn test_copy_from() {
CopyLegacyOption::Delimiter(','),
CopyLegacyOption::Csv(vec![CopyLegacyCsvOption::Header,])
],
values: vec![],
}
values: None,
})
);
}
@ -1102,7 +1124,7 @@ fn test_copy_to() {
let stmt = pg().verified_stmt("COPY users TO 'data.csv'");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1113,14 +1135,14 @@ fn test_copy_to() {
},
options: vec![],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY users TO 'data.csv' DELIMITER ','");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1131,14 +1153,14 @@ fn test_copy_to() {
},
options: vec![],
legacy_options: vec![CopyLegacyOption::Delimiter(',')],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY users TO 'data.csv' DELIMITER ',' CSV HEADER");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1152,8 +1174,8 @@ fn test_copy_to() {
CopyLegacyOption::Delimiter(','),
CopyLegacyOption::Csv(vec![CopyLegacyCsvOption::Header,])
],
values: vec![],
}
values: None,
})
)
}
@ -1179,7 +1201,7 @@ fn parse_copy_from() {
)";
assert_eq!(
pg_and_generic().one_statement_parses_to(sql, ""),
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["table".into()]),
columns: vec!["a".into(), "b".into()],
@ -1206,8 +1228,8 @@ fn parse_copy_from() {
CopyOption::Encoding("utf8".into()),
],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
}
@ -1225,7 +1247,7 @@ fn parse_copy_to() {
let stmt = pg().verified_stmt("COPY users TO 'data.csv'");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1236,14 +1258,14 @@ fn parse_copy_to() {
},
options: vec![],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY country TO STDOUT (DELIMITER '|')");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["country".into()]),
columns: vec![],
@ -1252,15 +1274,15 @@ fn parse_copy_to() {
target: CopyTarget::Stdout,
options: vec![CopyOption::Delimiter('|')],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
let stmt =
pg().verified_stmt("COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz'");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["country".into()]),
columns: vec![],
@ -1271,14 +1293,14 @@ fn parse_copy_to() {
},
options: vec![],
legacy_options: vec![],
values: vec![],
}
values: None,
})
);
let stmt = pg().verified_stmt("COPY (SELECT 42 AS a, 'hello' AS b) TO 'query.csv'");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
@ -1339,8 +1361,8 @@ fn parse_copy_to() {
},
options: vec![],
legacy_options: vec![],
values: vec![],
}
values: None,
})
)
}
@ -1349,7 +1371,7 @@ fn parse_copy_from_before_v9_0() {
let stmt = pg().verified_stmt("COPY users FROM 'data.csv' BINARY DELIMITER ',' NULL 'null' CSV HEADER QUOTE '\"' ESCAPE '\\' FORCE NOT NULL column");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1370,15 +1392,15 @@ fn parse_copy_from_before_v9_0() {
CopyLegacyCsvOption::ForceNotNull(vec!["column".into()]),
]),
],
values: vec![],
}
values: None,
})
);
// test 'AS' keyword
let sql = "COPY users FROM 'data.csv' DELIMITER AS ',' NULL AS 'null' CSV QUOTE AS '\"' ESCAPE AS '\\'";
assert_eq!(
pg_and_generic().one_statement_parses_to(sql, ""),
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1396,8 +1418,8 @@ fn parse_copy_from_before_v9_0() {
CopyLegacyCsvOption::Escape('\\'),
]),
],
values: vec![],
}
values: None,
})
);
}
@ -1406,7 +1428,7 @@ fn parse_copy_to_before_v9_0() {
let stmt = pg().verified_stmt("COPY users TO 'data.csv' BINARY DELIMITER ',' NULL 'null' CSV HEADER QUOTE '\"' ESCAPE '\\' FORCE QUOTE column");
assert_eq!(
stmt,
Statement::Copy {
Statement::Copy(Copy {
source: CopySource::Table {
table_name: ObjectName::from(vec!["users".into()]),
columns: vec![],
@ -1427,8 +1449,8 @@ fn parse_copy_to_before_v9_0() {
CopyLegacyCsvOption::ForceQuote(vec!["column".into()]),
]),
],
values: vec![],
}
values: None,
})
)
}

View file

@ -563,12 +563,7 @@ fn test_snowflake_single_line_tokenize() {
let expected = vec![
Token::make_keyword("CREATE"),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("TABLE"),
Token::Whitespace(Whitespace::SingleLineComment {
prefix: "#".to_string(),
comment: " this is a comment \n".to_string(),
}),
Token::make_word("table_1", None),
];
@ -579,13 +574,7 @@ fn test_snowflake_single_line_tokenize() {
let expected = vec![
Token::make_keyword("CREATE"),
Token::Whitespace(Whitespace::Space),
Token::make_keyword("TABLE"),
Token::Whitespace(Whitespace::Space),
Token::Whitespace(Whitespace::SingleLineComment {
prefix: "//".to_string(),
comment: " this is a comment \n".to_string(),
}),
Token::make_word("table_1", None),
];