Merge 03a884d6f8 into 2b8e99c665

2025-11-25 00:19:37 +00:00 · 2025-11-22 20:25:30 +01:00 · 2025-11-22 20:25:30 +01:00 · 5c936a0036
commit 5c936a0036
parent 2b8e99c665 03a884d6f8
12 changed files with 1195 additions and 1371 deletions
--- a/src/ast/dml.rs
+++ b/src/ast/dml.rs
@ -16,7 +16,12 @@
 // under the License.

 #[cfg(not(feature = "std"))]
-use alloc::{boxed::Box, format, string::ToString, vec::Vec};
+use alloc::{
+    boxed::Box,
+    format,
+    string::{String, ToString},
+    vec::Vec,
+};

 use core::fmt::{self, Display};
 #[cfg(feature = "serde")]
@ -27,10 +32,10 @@ use sqlparser_derive::{Visit, VisitMut};
 use crate::display_utils::{indented_list, Indent, SpaceOrNewline};

 use super::{
-    display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause,
-    Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert,
-    OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins,
-    UpdateTableFromKind,
+    display_comma_separated, display_separated, helpers::attached_token::AttachedToken,
+    query::InputFormatClause, Assignment, CopyLegacyOption, CopyOption, CopySource, CopyTarget,
+    Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr,
+    Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, UpdateTableFromKind,
 };

 /// INSERT statement.
@ -310,3 +315,89 @@ impl Display for Update {
        Ok(())
    }
 }
+
+/// COPY statement.
+///
+/// Represents a PostgreSQL COPY statement for bulk data transfer between
+/// a file and a table. The statement can copy data FROM a file to a table
+/// or TO a file from a table or query.
+///
+/// # Syntax
+///
+/// ```sql
+/// COPY table_name [(column_list)] FROM { 'filename' | STDIN | PROGRAM 'command' }
+/// COPY { table_name [(column_list)] | (query) } TO { 'filename' | STDOUT | PROGRAM 'command' }
+/// ```
+///
+/// # Examples
+///
+/// ```
+/// # use sqlparser::ast::{Copy, CopySource, CopyTarget, ObjectName};
+/// # use sqlparser::dialect::PostgreSqlDialect;
+/// # use sqlparser::parser::Parser;
+/// let sql = "COPY users FROM 'data.csv'";
+/// let dialect = PostgreSqlDialect {};
+/// let ast = Parser::parse_sql(&dialect, sql).unwrap();
+/// ```
+///
+/// See [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-copy.html)
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub struct Copy {
+    /// The source of 'COPY TO', or the target of 'COPY FROM'.
+    /// Can be a table name with optional column list, or a query (for COPY TO only).
+    pub source: CopySource,
+    /// Direction of the copy operation.
+    /// - `true` for COPY TO (table/query to file)
+    /// - `false` for COPY FROM (file to table)
+    pub to: bool,
+    /// The target of 'COPY TO', or the source of 'COPY FROM'.
+    /// Can be a file, STDIN, STDOUT, or a PROGRAM command.
+    pub target: CopyTarget,
+    /// Modern COPY options (PostgreSQL 9.0+), specified within parentheses.
+    /// Examples: FORMAT, DELIMITER, NULL, HEADER, QUOTE, ESCAPE, etc.
+    pub options: Vec<CopyOption>,
+    /// Legacy COPY options (pre-PostgreSQL 9.0), specified without parentheses.
+    /// Also used by AWS Redshift extensions like IAM_ROLE, MANIFEST, etc.
+    pub legacy_options: Vec<CopyLegacyOption>,
+    /// CSV data rows for COPY FROM STDIN statements.
+    /// Each row is a vector of optional strings (None represents NULL).
+    /// Populated only when copying from STDIN with inline data.
+    pub values: Option<String>,
+}
+
+impl Display for Copy {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "COPY")?;
+        match &self.source {
+            CopySource::Query(query) => write!(f, " ({query})")?,
+            CopySource::Table {
+                table_name,
+                columns,
+            } => {
+                write!(f, " {table_name}")?;
+                if !columns.is_empty() {
+                    write!(f, " ({})", display_comma_separated(columns))?;
+                }
+            }
+        }
+        write!(
+            f,
+            " {} {}",
+            if self.to { "TO" } else { "FROM" },
+            self.target
+        )?;
+        if !self.options.is_empty() {
+            write!(f, " ({})", display_comma_separated(&self.options))?;
+        }
+        if !self.legacy_options.is_empty() {
+            write!(f, " {}", display_separated(&self.legacy_options, " "))?;
+        }
+
+        if let Some(values) = &self.values {
+            write!(f, ";{values}\\.")?;
+        }
+        Ok(())
+    }
+}
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@ -76,7 +76,7 @@ pub use self::ddl::{
    UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation,
    UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef,
 };
-pub use self::dml::{Delete, Insert, Update};
+pub use self::dml::{Copy, Delete, Insert, Update};
 pub use self::operator::{BinaryOperator, UnaryOperator};
 pub use self::query::{
    AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
@ -3223,20 +3223,7 @@ pub enum Statement {
    /// ```sql
    /// COPY [TO | FROM] ...
    /// ```
-    Copy {
-        /// The source of 'COPY TO', or the target of 'COPY FROM'
-        source: CopySource,
-        /// If true, is a 'COPY TO' statement. If false is a 'COPY FROM'
-        to: bool,
-        /// The target of 'COPY TO', or the source of 'COPY FROM'
-        target: CopyTarget,
-        /// WITH options (from PostgreSQL version 9.0)
-        options: Vec<CopyOption>,
-        /// WITH options (before PostgreSQL version 9.0)
-        legacy_options: Vec<CopyLegacyOption>,
-        /// VALUES a vector of values to be copied
-        values: Vec<Option<String>>,
-    },
+    Copy(Copy),
    /// ```sql
    /// COPY INTO <table> | <location>
    /// ```
@ -4315,6 +4302,12 @@ impl From<ddl::Msck> for Statement {
    }
 }

+impl From<Copy> for Statement {
+    fn from(copy: Copy) -> Self {
+        Statement::Copy(copy)
+    }
+}
+
 /// ```sql
 /// {COPY | REVOKE} CURRENT GRANTS
 /// ```
@ -4583,50 +4576,7 @@ impl fmt::Display for Statement {

            Statement::Call(function) => write!(f, "CALL {function}"),

-            Statement::Copy {
-                source,
-                to,
-                target,
-                options,
-                legacy_options,
-                values,
-            } => {
-                write!(f, "COPY")?;
-                match source {
-                    CopySource::Query(query) => write!(f, " ({query})")?,
-                    CopySource::Table {
-                        table_name,
-                        columns,
-                    } => {
-                        write!(f, " {table_name}")?;
-                        if !columns.is_empty() {
-                            write!(f, " ({})", display_comma_separated(columns))?;
-                        }
-                    }
-                }
-                write!(f, " {} {}", if *to { "TO" } else { "FROM" }, target)?;
-                if !options.is_empty() {
-                    write!(f, " ({})", display_comma_separated(options))?;
-                }
-                if !legacy_options.is_empty() {
-                    write!(f, " {}", display_separated(legacy_options, " "))?;
-                }
-                if !values.is_empty() {
-                    writeln!(f, ";")?;
-                    let mut delim = "";
-                    for v in values {
-                        write!(f, "{delim}")?;
-                        delim = "\t";
-                        if let Some(v) = v {
-                            write!(f, "{v}")?;
-                        } else {
-                            write!(f, "\\N")?;
-                        }
-                    }
-                    write!(f, "\n\\.")?;
-                }
-                Ok(())
-            }
+            Statement::Copy(copy) => copy.fmt(f),
            Statement::Update(update) => update.fmt(f),
            Statement::Delete(delete) => delete.fmt(f),
            Statement::Open(open) => open.fmt(f),
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@ -319,14 +319,7 @@ impl Spanned for Statement {
            Statement::While(stmt) => stmt.span(),
            Statement::Raise(stmt) => stmt.span(),
            Statement::Call(function) => function.span(),
-            Statement::Copy {
-                source,
-                to: _,
-                target: _,
-                options: _,
-                legacy_options: _,
-                values: _,
-            } => source.span(),
+            Statement::Copy(copy) => copy.source.span(),
            Statement::CopyIntoSnowflake {
                into: _,
                into_columns: _,
--- a/src/dialect/bigquery.rs
+++ b/src/dialect/bigquery.rs
@ -86,6 +86,10 @@ impl Dialect for BigQueryDialect {
        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
    }

+    fn supports_hyphenated_identifiers(&self) -> bool {
+        true
+    }
+
    /// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
    fn supports_triple_quoted_string(&self) -> bool {
        true
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@ -178,6 +178,53 @@ pub trait Dialect: Debug + Any {
    /// Determine if a character is a valid unquoted identifier character
    fn is_identifier_part(&self, ch: char) -> bool;

+    /// Returns whether the dialect supports hyphenated identifiers.
+    ///
+    /// Hyphenated identifiers contain hyphens within the name (e.g., `my-table`).
+    /// Supported by BigQuery for project, dataset, and table names.
+    ///
+    /// ```rust
+    /// # use sqlparser::{dialect::BigQueryDialect, parser::Parser};
+    /// let sql = "SELECT * FROM my-project.my-dataset.my-table";
+    /// assert!(Parser::parse_sql(&BigQueryDialect, sql).is_ok());
+    /// ```
+    ///
+    /// For dialects that do not support hyphenated identifiers,
+    /// the parser will interpret the hyphen as a minus operator,
+    /// and may result in a syntax error if the context is not valid.
+    ///
+    /// ```rust
+    /// # use sqlparser::{dialect::PostgreSqlDialect, parser::Parser};
+    /// let sql = "SELECT * FROM my-project.my-dataset.my-table";
+    /// assert!(Parser::parse_sql(&PostgreSqlDialect{}, sql).is_err());
+    /// ```
+    fn supports_hyphenated_identifiers(&self) -> bool {
+        false
+    }
+
+    /// Returns whether the dialect supports path-like identifiers.
+    ///
+    /// Path-like identifiers contain forward slashes for hierarchical paths
+    /// (e.g., `@namespace.stage_name/path`). Used in Snowflake for stage locations.
+    ///
+    /// ```rust
+    /// # use sqlparser::{dialect::SnowflakeDialect, parser::Parser};
+    /// let sql = "COPY INTO a.b FROM @namespace.stage_name/path";
+    /// assert!(Parser::parse_sql(&SnowflakeDialect, sql).is_ok());
+    /// ```
+    ///
+    /// For dialects that do not support path-like identifiers,
+    /// the parser will raise a syntax error when encountering such identifiers.
+    ///
+    /// ```rust
+    /// # use sqlparser::{dialect::PostgreSqlDialect, parser::Parser};
+    /// let sql = "COPY INTO a.b FROM @namespace.stage_name/path";
+    /// assert!(Parser::parse_sql(&PostgreSqlDialect{}, sql).is_err());
+    /// ```
+    fn supports_path_like_identifiers(&self) -> bool {
+        false
+    }
+
    /// Most dialects do not have custom operators. Override this method to provide custom operators.
    fn is_custom_operator_part(&self, _ch: char) -> bool {
        false
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@ -160,6 +160,10 @@ impl Dialect for SnowflakeDialect {
            || ch == '_'
    }

+    fn supports_path_like_identifiers(&self) -> bool {
+        true
+    }
+
    // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
    fn supports_string_literal_backslash_escape(&self) -> bool {
        true
@ -1094,9 +1098,9 @@ pub fn parse_create_stage(

 pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
    let mut ident = String::new();
-    while let Some(next_token) = parser.next_token_no_skip() {
-        match &next_token.token {
-            Token::Whitespace(_) | Token::SemiColon => break,
+    loop {
+        match &parser.next_token().token {
+            Token::SemiColon | Token::EOF => break,
            Token::Period => {
                parser.prev_token();
                break;
@ -1112,7 +1116,14 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
            Token::Plus => ident.push('+'),
            Token::Minus => ident.push('-'),
            Token::Number(n, _) => ident.push_str(n),
-            Token::Word(w) => ident.push_str(&w.to_string()),
+            Token::Word(w) => {
+                if matches!(w.keyword, Keyword::NoKeyword) || ident.ends_with("@") {
+                    ident.push_str(w.to_string().as_str());
+                } else {
+                    parser.prev_token();
+                    break;
+                }
+            }
            _ => return parser.expected("stage name identifier", parser.peek_token()),
        }
    }
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -4034,20 +4034,13 @@ impl<'a> Parser<'a> {
    /// See [`Self::peek_token`] for an example.
    pub fn peek_tokens_with_location<const N: usize>(&self) -> [TokenWithSpan; N] {
        let mut index = self.index;
-        core::array::from_fn(|_| loop {
+        core::array::from_fn(|_| {
            let token = self.tokens.get(index);
            index += 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = token
-            {
-                continue;
-            }
-            break token.cloned().unwrap_or(TokenWithSpan {
+            token.cloned().unwrap_or(TokenWithSpan {
                token: Token::EOF,
                span: Span::empty(),
-            });
+            })
        })
    }

@ -4057,17 +4050,10 @@ impl<'a> Parser<'a> {
    /// See [`Self::peek_tokens`] for an example.
    pub fn peek_tokens_ref<const N: usize>(&self) -> [&TokenWithSpan; N] {
        let mut index = self.index;
-        core::array::from_fn(|_| loop {
+        core::array::from_fn(|_| {
            let token = self.tokens.get(index);
            index += 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = token
-            {
-                continue;
-            }
-            break token.unwrap_or(&EOF_TOKEN);
+            token.unwrap_or(&EOF_TOKEN)
        })
    }

@ -4081,38 +4067,13 @@ impl<'a> Parser<'a> {
        let mut index = self.index;
        loop {
            index += 1;
-            match self.tokens.get(index - 1) {
-                Some(TokenWithSpan {
-                    token: Token::Whitespace(_),
-                    span: _,
-                }) => continue,
-                non_whitespace => {
-                    if n == 0 {
-                        return non_whitespace.unwrap_or(&EOF_TOKEN);
-                    }
-                    n -= 1;
-                }
+            if n == 0 {
+                return self.tokens.get(index - 1).unwrap_or(&EOF_TOKEN);
            }
+            n -= 1;
        }
    }

-    /// Return the first token, possibly whitespace, that has not yet been processed
-    /// (or None if reached end-of-file).
-    pub fn peek_token_no_skip(&self) -> TokenWithSpan {
-        self.peek_nth_token_no_skip(0)
-    }
-
-    /// Return nth token, possibly whitespace, that has not yet been processed.
-    pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan {
-        self.tokens
-            .get(self.index + n)
-            .cloned()
-            .unwrap_or(TokenWithSpan {
-                token: Token::EOF,
-                span: Span::empty(),
-            })
-    }
-
    /// Return true if the next tokens exactly `expected`
    ///
    /// Does not advance the current token.
@ -4140,26 +4101,11 @@ impl<'a> Parser<'a> {
        self.index.saturating_sub(1)
    }

-    /// Return the next unprocessed token, possibly whitespace.
-    pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
-        self.index += 1;
-        self.tokens.get(self.index - 1)
-    }
-
    /// Advances the current token to the next non-whitespace token
    ///
    /// See [`Self::get_current_token`] to get the current token after advancing
    pub fn advance_token(&mut self) {
-        loop {
-            self.index += 1;
-            match self.tokens.get(self.index - 1) {
-                Some(TokenWithSpan {
-                    token: Token::Whitespace(_),
-                    span: _,
-                }) => continue,
-                _ => break,
-            }
-        }
+        self.index += 1;
    }

    /// Returns a reference to the current token
@ -4190,18 +4136,8 @@ impl<'a> Parser<'a> {
    ///
    // TODO rename to backup_token and deprecate prev_token?
    pub fn prev_token(&mut self) {
-        loop {
-            assert!(self.index > 0);
-            self.index -= 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = self.tokens.get(self.index)
-            {
-                continue;
-            }
-            return;
-        }
+        assert!(self.index > 0);
+        self.index -= 1;
    }

    /// Report `found` was encountered instead of `expected`
@ -8911,7 +8847,7 @@ impl<'a> Parser<'a> {
                    return self.expected(
                        "FULLTEXT or SPATIAL option without constraint name",
                        TokenWithSpan {
-                            token: Token::make_keyword(&name.to_string()),
+                            token: Token::make_keyword(name.to_string()),
                            span: next_token.span,
                        },
                    );
@ -9955,18 +9891,21 @@ impl<'a> Parser<'a> {
        }
        let values = if let CopyTarget::Stdin = target {
            self.expect_token(&Token::SemiColon)?;
-            self.parse_tsv()
+            let Token::CopyFromStdin(body) = self.next_token().token else {
+                return self.expected("COPY ... FROM STDIN with CSV body", self.peek_token());
+            };
+            Some(body)
        } else {
-            vec![]
+            None
        };
-        Ok(Statement::Copy {
+        Ok(Statement::Copy(Copy {
            source,
            to,
            target,
            options,
            legacy_options,
            values,
-        })
+        }))
    }

    /// Parse [Statement::Open]
@ -10293,43 +10232,6 @@ impl<'a> Parser<'a> {
        Ok(s.chars().next().unwrap())
    }

-    /// Parse a tab separated values in
-    /// COPY payload
-    pub fn parse_tsv(&mut self) -> Vec<Option<String>> {
-        self.parse_tab_value()
-    }
-
-    pub fn parse_tab_value(&mut self) -> Vec<Option<String>> {
-        let mut values = vec![];
-        let mut content = String::from("");
-        while let Some(t) = self.next_token_no_skip().map(|t| &t.token) {
-            match t {
-                Token::Whitespace(Whitespace::Tab) => {
-                    values.push(Some(content.to_string()));
-                    content.clear();
-                }
-                Token::Whitespace(Whitespace::Newline) => {
-                    values.push(Some(content.to_string()));
-                    content.clear();
-                }
-                Token::Backslash => {
-                    if self.consume_token(&Token::Period) {
-                        return values;
-                    }
-                    if let Token::Word(w) = self.next_token().token {
-                        if w.value == "N" {
-                            values.push(None);
-                        }
-                    }
-                }
-                _ => {
-                    content.push_str(&t.to_string());
-                }
-            }
-        }
-        values
-    }
-
    /// Parse a literal value (numbers, strings, date/time, booleans)
    pub fn parse_value(&mut self) -> Result<ValueWithSpan, ParserError> {
        let next_token = self.next_token();
@ -10423,7 +10325,7 @@ impl<'a> Parser<'a> {
                // 2. Not calling self.next_token() to enforce `tok`
                //    be followed immediately by a word/number, ie.
                //    without any whitespace in between
-                let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone();
+                let next_token = self.next_token();
                let ident = match next_token.token {
                    Token::Word(w) => Ok(w.into_ident(next_token.span)),
                    Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)),
@ -11440,9 +11342,9 @@ impl<'a> Parser<'a> {
        let mut parts = vec![];
        if dialect_of!(self is BigQueryDialect) && in_table_clause {
            loop {
-                let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
+                let ident = self.parse_identifier()?;
                parts.push(ObjectNamePart::Identifier(ident));
-                if !self.consume_token(&Token::Period) && !end_with_period {
+                if !self.consume_token(&Token::Period) {
                    break;
                }
            }
@ -11456,9 +11358,9 @@ impl<'a> Parser<'a> {
                        span,
                    }));
                } else if dialect_of!(self is BigQueryDialect) && in_table_clause {
-                    let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
+                    let ident = self.parse_identifier()?;
                    parts.push(ObjectNamePart::Identifier(ident));
-                    if !self.consume_token(&Token::Period) && !end_with_period {
+                    if !self.consume_token(&Token::Period) {
                        break;
                    }
                } else if self.dialect.supports_object_name_double_dot_notation()
@ -11637,84 +11539,6 @@ impl<'a> Parser<'a> {
        }
    }

-    /// On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or
-    /// TABLE clause.
-    ///
-    /// The first segment must be an ordinary unquoted identifier, e.g. it must not start
-    /// with a digit. Subsequent segments are either must either be valid identifiers or
-    /// integers, e.g. foo-123 is allowed, but foo-123a is not.
-    ///
-    /// [BigQuery-lexical](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical)
-    ///
-    /// Return a tuple of the identifier and a boolean indicating it ends with a period.
-    fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
-        match self.peek_token().token {
-            Token::Word(w) => {
-                let quote_style_is_none = w.quote_style.is_none();
-                let mut requires_whitespace = false;
-                let mut ident = w.into_ident(self.next_token().span);
-                if quote_style_is_none {
-                    while matches!(self.peek_token_no_skip().token, Token::Minus) {
-                        self.next_token();
-                        ident.value.push('-');
-
-                        let token = self
-                            .next_token_no_skip()
-                            .cloned()
-                            .unwrap_or(TokenWithSpan::wrap(Token::EOF));
-                        requires_whitespace = match token.token {
-                            Token::Word(next_word) if next_word.quote_style.is_none() => {
-                                ident.value.push_str(&next_word.value);
-                                false
-                            }
-                            Token::Number(s, false) => {
-                                // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
-                                // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
-                                //
-                                // If a number token is followed by a period, it is part of an [ObjectName].
-                                // Return the identifier with `true` if the number token is followed by a period, indicating that
-                                // parsing should continue for the next part of the hyphenated identifier.
-                                if s.ends_with('.') {
-                                    let Some(s) = s.split('.').next().filter(|s| {
-                                        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
-                                    }) else {
-                                        return self.expected(
-                                            "continuation of hyphenated identifier",
-                                            TokenWithSpan::new(Token::Number(s, false), token.span),
-                                        );
-                                    };
-                                    ident.value.push_str(s);
-                                    return Ok((ident, true));
-                                } else {
-                                    ident.value.push_str(&s);
-                                }
-                                // If next token is period, then it is part of an ObjectName and we don't expect whitespace
-                                // after the number.
-                                !matches!(self.peek_token().token, Token::Period)
-                            }
-                            _ => {
-                                return self
-                                    .expected("continuation of hyphenated identifier", token);
-                            }
-                        }
-                    }
-
-                    // If the last segment was a number, we must check that it's followed by whitespace,
-                    // otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
-                    if requires_whitespace {
-                        let token = self.next_token();
-                        if !matches!(token.token, Token::EOF | Token::Whitespace(_)) {
-                            return self
-                                .expected("whitespace following hyphenated identifier", token);
-                        }
-                    }
-                }
-                Ok((ident, false))
-            }
-            _ => Ok((self.parse_identifier()?, false)),
-        }
-    }
-
    /// Parses a parenthesized, comma-separated list of column definitions within a view.
    fn parse_view_columns(&mut self) -> Result<Vec<ViewColumnDef>, ParserError> {
        if self.consume_token(&Token::LParen) {
@ -19199,9 +19023,17 @@ mod tests {

    #[test]
    fn test_placeholder_invalid_whitespace() {
-        for w in ["  ", "/*invalid*/"] {
+        for w in [
+            "  ",
+            "/*invalid*/",
+            "\n",
+            "\t\t",
+            "\r\n",
+            "--comment\n",
+            "/* multi\nline\ncomment */",
+        ] {
            let sql = format!("\nSELECT\n  :{w}fooBar");
-            assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
+            assert!(Parser::parse_sql(&GenericDialect, &sql).is_err(), "Failed to error on when inserting the whitespace {w:?} within the placeholder SQL: `{sql}`");
        }
    }
 }
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@ -1567,7 +1567,11 @@ fn parse_table_identifiers() {

    fn test_table_ident_err(ident: &str) {
        let sql = format!("SELECT 1 FROM {ident}");
-        assert!(bigquery().parse_sql_statements(&sql).is_err());
+        let parsed = bigquery().parse_sql_statements(&sql);
+        assert!(
+            parsed.is_err(),
+            "Expected error parsing identifier: `{ident}`, within SQL: `{sql}` - but got success: {parsed:#?}"
+        );
    }

    test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@ -17328,7 +17328,7 @@ fn parse_copy_options() {
        r#"COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' IAM_ROLE 'arn:aws:iam::123456789:role/role1' CSV IGNOREHEADER 1"#,
    );
    match copy {
-        Statement::Copy { legacy_options, .. } => {
+        Statement::Copy(Copy { legacy_options, .. }) => {
            assert_eq!(
                legacy_options,
                vec![
@ -17348,7 +17348,7 @@ fn parse_copy_options() {
        r#"COPY dst (c1, c2, c3) FROM 's3://redshift-downloads/tickit/category_pipe.txt' IAM_ROLE DEFAULT CSV IGNOREHEADER 1"#,
    );
    match copy {
-        Statement::Copy { legacy_options, .. } => {
+        Statement::Copy(Copy { legacy_options, .. }) => {
            assert_eq!(
                legacy_options,
                vec![
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@ -1014,27 +1014,49 @@ fn parse_drop_schema_if_exists() {

 #[test]
 fn parse_copy_from_stdin() {
-    let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM stdin;
-1	PENELOPE	GUINESS	2006-02-15 09:34:33 0.11111
-2	NICK	WAHLBERG	2006-02-15 09:34:33 0.22222
-3	ED	CHASE	2006-02-15 09:34:33 0.312323
-4	JENNIFER	DAVIS	2006-02-15 09:34:33 0.3232
-5	JOHNNY	LOLLOBRIGIDA	2006-02-15 09:34:33 1.343
-6	BETTE	NICHOLSON	2006-02-15 09:34:33 5.0
-7	GRACE	MOSTEL	2006-02-15 09:34:33 6.0
-8	MATTHEW	JOHANSSON	2006-02-15 09:34:33 7.0
-9	JOE	SWANK	2006-02-15 09:34:33 8.0
-10	CHRISTIAN	GABLE	2006-02-15 09:34:33 9.1
-11	ZERO	CAGE	2006-02-15 09:34:33 10.001
-12	KARL	BERRY	2017-11-02 19:15:42.308637+08 11.001
-A Fateful Reflection of a Waitress And a Boat who must Discover a Sumo Wrestler in Ancient China
-Kwara & Kogi
-{"Deleted Scenes","Behind the Scenes"}
-'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2
-PHP	₱ USD $
-\N  Some other value
-\\."#;
-    pg_and_generic().one_statement_parses_to(sql, "");
+    let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN;
+1	PENELOPE	GUINESS	2006-02-15 09:34:33	0.11111
+2	NICK	WAHLBERG	2006-02-15 09:34:33	0.22222
+3	ED	CHASE	2006-02-15 09:34:33	0.312323
+4	JENNIFER	DAVIS	2006-02-15 09:34:33	0.3232
+5	JOHNNY	LOLLOBRIGIDA	2006-02-15 09:34:33	1.343
+6	BETTE	NICHOLSON	2006-02-15 09:34:33	5.0
+7	GRACE	MOSTEL	2006-02-15 09:34:33	6.0
+8	MATTHEW	JOHANSSON	2006-02-15 09:34:33	7.0
+9	JOE	SWANK	2006-02-15 09:34:33	8.0
+10	CHRISTIAN	GABLE	2006-02-15 09:34:33	9.1
+11	ZERO	CAGE	2006-02-15 09:34:33	10.001
+12	KARL	BERRY	2017-11-02 19:15:42.308637+08	11.001
+\."#;
+    pg_and_generic().verified_stmt(sql);
+
+    let sql_comma_separated = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2,NICK,WAHLBERG,2006-02-15 09:34:33,0.22222
+3,ED,CHASE,2006-02-15 09:34:33,0.312323
+4,JENNIFER,DAVIS,2006-02-15 09:34:33,0.3232
+5,JOHNNY,"LOLLO,BRIGIDA",2006-02-15 09:34:33,1.343
+6,BETTE,NICHOLSON,2006-02-15 09:34:33,5.0
+7,GRACE,MOSTEL,2006-02-15 09:34:33,6.0
+8,MATTHEW,JOHANSSON,2006-02-15 09:34:33,7.0
+9,JOE,SWANK,2006-02-15 09:34:33,8.0
+10,CHRISTIAN,GABLE,2006-02-15 09:34:33,9.1
+11,ZERO,CAGE,2006-02-15 09:34:33,10.001
+12,KARL,BERRY,2017-11-02 19:15:42.308637+08,11.001
+\."#;
+    pg_and_generic().verified_stmt(sql_comma_separated);
+
+    let incorrect_csv_sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2,NICK,WAHLBERG,2006-02-15 09:34:33
+\."#;
+    pg_and_generic().verified_stmt(incorrect_csv_sql);
+
+    let mixed_incorrect_separators = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2	NICK	WAHLBERG	2006-02-15 09:34:33,0.22222
+\."#;
+    pg_and_generic().verified_stmt(mixed_incorrect_separators);
 }

 #[test]
@ -1042,7 +1064,7 @@ fn test_copy_from() {
    let stmt = pg().verified_stmt("COPY users FROM 'data.csv'");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1053,14 +1075,14 @@ fn test_copy_from() {
            },
            options: vec![],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY users FROM 'data.csv' DELIMITER ','");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1071,14 +1093,14 @@ fn test_copy_from() {
            },
            options: vec![],
            legacy_options: vec![CopyLegacyOption::Delimiter(',')],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY users FROM 'data.csv' DELIMITER ',' CSV HEADER");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1092,8 +1114,8 @@ fn test_copy_from() {
                CopyLegacyOption::Delimiter(','),
                CopyLegacyOption::Csv(vec![CopyLegacyCsvOption::Header,])
            ],
-            values: vec![],
-        }
+            values: None,
+        })
    );
 }

@ -1102,7 +1124,7 @@ fn test_copy_to() {
    let stmt = pg().verified_stmt("COPY users TO 'data.csv'");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1113,14 +1135,14 @@ fn test_copy_to() {
            },
            options: vec![],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY users TO 'data.csv' DELIMITER ','");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1131,14 +1153,14 @@ fn test_copy_to() {
            },
            options: vec![],
            legacy_options: vec![CopyLegacyOption::Delimiter(',')],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY users TO 'data.csv' DELIMITER ',' CSV HEADER");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1152,8 +1174,8 @@ fn test_copy_to() {
                CopyLegacyOption::Delimiter(','),
                CopyLegacyOption::Csv(vec![CopyLegacyCsvOption::Header,])
            ],
-            values: vec![],
-        }
+            values: None,
+        })
    )
 }

@ -1179,7 +1201,7 @@ fn parse_copy_from() {
    )";
    assert_eq!(
        pg_and_generic().one_statement_parses_to(sql, ""),
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["table".into()]),
                columns: vec!["a".into(), "b".into()],
@ -1206,8 +1228,8 @@ fn parse_copy_from() {
                CopyOption::Encoding("utf8".into()),
            ],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );
 }

@ -1225,7 +1247,7 @@ fn parse_copy_to() {
    let stmt = pg().verified_stmt("COPY users TO 'data.csv'");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1236,14 +1258,14 @@ fn parse_copy_to() {
            },
            options: vec![],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY country TO STDOUT (DELIMITER '|')");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["country".into()]),
                columns: vec![],
@ -1252,15 +1274,15 @@ fn parse_copy_to() {
            target: CopyTarget::Stdout,
            options: vec![CopyOption::Delimiter('|')],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt =
        pg().verified_stmt("COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz'");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["country".into()]),
                columns: vec![],
@ -1271,14 +1293,14 @@ fn parse_copy_to() {
            },
            options: vec![],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    let stmt = pg().verified_stmt("COPY (SELECT 42 AS a, 'hello' AS b) TO 'query.csv'");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Query(Box::new(Query {
                with: None,
                body: Box::new(SetExpr::Select(Box::new(Select {
@ -1339,8 +1361,8 @@ fn parse_copy_to() {
            },
            options: vec![],
            legacy_options: vec![],
-            values: vec![],
-        }
+            values: None,
+        })
    )
 }

@ -1349,7 +1371,7 @@ fn parse_copy_from_before_v9_0() {
    let stmt = pg().verified_stmt("COPY users FROM 'data.csv' BINARY DELIMITER ',' NULL 'null' CSV HEADER QUOTE '\"' ESCAPE '\\' FORCE NOT NULL column");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1370,15 +1392,15 @@ fn parse_copy_from_before_v9_0() {
                    CopyLegacyCsvOption::ForceNotNull(vec!["column".into()]),
                ]),
            ],
-            values: vec![],
-        }
+            values: None,
+        })
    );

    // test 'AS' keyword
    let sql = "COPY users FROM 'data.csv' DELIMITER AS ',' NULL AS 'null' CSV QUOTE AS '\"' ESCAPE AS '\\'";
    assert_eq!(
        pg_and_generic().one_statement_parses_to(sql, ""),
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1396,8 +1418,8 @@ fn parse_copy_from_before_v9_0() {
                    CopyLegacyCsvOption::Escape('\\'),
                ]),
            ],
-            values: vec![],
-        }
+            values: None,
+        })
    );
 }

@ -1406,7 +1428,7 @@ fn parse_copy_to_before_v9_0() {
    let stmt = pg().verified_stmt("COPY users TO 'data.csv' BINARY DELIMITER ',' NULL 'null' CSV HEADER QUOTE '\"' ESCAPE '\\' FORCE QUOTE column");
    assert_eq!(
        stmt,
-        Statement::Copy {
+        Statement::Copy(Copy {
            source: CopySource::Table {
                table_name: ObjectName::from(vec!["users".into()]),
                columns: vec![],
@ -1427,8 +1449,8 @@ fn parse_copy_to_before_v9_0() {
                    CopyLegacyCsvOption::ForceQuote(vec!["column".into()]),
                ]),
            ],
-            values: vec![],
-        }
+            values: None,
+        })
    )
 }

--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@ -563,12 +563,7 @@ fn test_snowflake_single_line_tokenize() {

    let expected = vec![
        Token::make_keyword("CREATE"),
-        Token::Whitespace(Whitespace::Space),
        Token::make_keyword("TABLE"),
-        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "#".to_string(),
-            comment: " this is a comment \n".to_string(),
-        }),
        Token::make_word("table_1", None),
    ];

@ -579,13 +574,7 @@ fn test_snowflake_single_line_tokenize() {

    let expected = vec![
        Token::make_keyword("CREATE"),
-        Token::Whitespace(Whitespace::Space),
        Token::make_keyword("TABLE"),
-        Token::Whitespace(Whitespace::Space),
-        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "//".to_string(),
-            comment: " this is a comment \n".to_string(),
-        }),
        Token::make_word("table_1", None),
    ];