From c75f11bf478a2911fb1ad0ffe70b5aeda8e72bef Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Tue, 28 Oct 2025 16:26:35 +0100
Subject: [PATCH] Extended placeholder syntax test and moved check in tokenizer

---
 src/parser/mod.rs           |  2 +-
 src/tokenizer.rs            | 26 ++++++++++++++++++++++++--
 tests/sqlparser_bigquery.rs |  5 ++++-
 3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index ab981f9f..a51781fb 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -18475,7 +18475,7 @@ mod tests {
 
     #[test]
     fn test_placeholder_invalid_whitespace() {
-        for w in ["  ", "/*invalid*/"] {
+        for w in [" ", "  ", "/*invalid*/", "\n", "\t", "\r\n", "--comment\n"] {
             let sql = format!("\nSELECT\n  :{w}fooBar");
             assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
         }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 45154515..1dffb8c5 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -908,6 +908,22 @@ impl<'a> Tokenizer<'a> {
         Ok(Some(Token::make_word(&word, None)))
     }
 
+    /// Returns a standardized error if the previous token is a `:` and
+    /// the method is expected to be called when a space is found after it.
+    fn handle_colon_space_error(
+        &self,
+        chars: &State,
+        prev_token: Option<&Token>,
+    ) -> Result<Option<Token>, TokenizerError> {
+        if let Some(Token::Colon) = prev_token {
+            return Err(TokenizerError {
+                message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?".to_string(),
+                location: chars.location(),
+            });
+        }
+        Ok(None)
+    }
+
     /// Get the next token or return None
     fn next_token(
         &self,
@@ -919,6 +935,7 @@ impl<'a> Tokenizer<'a> {
         match chars.peek() {
             Some(&ch) => match ch {
                 ' ' | '\t' | '\n' | '\r' => {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
@@ -1166,7 +1183,7 @@ impl<'a> Tokenizer<'a> {
                     // if the prev token is not a word, then this is not a valid sql
                     // word or number.
                     if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
-                        if let Some(Token::Word(_)) = prev_token {
+                        if !preceded_by_whitespace {
                             chars.next();
                             return Ok(Some(Token::Period));
                         }
@@ -1210,7 +1227,7 @@ impl<'a> Tokenizer<'a> {
                     // we should yield the dot as a dedicated token so compound identifiers
                     // starting with digits can be parsed correctly.
                     if s == "." && self.dialect.supports_numeric_prefix() {
-                        if let Some(Token::Word(_)) = prev_token {
+                        if !preceded_by_whitespace {
                             return Ok(Some(Token::Period));
                         }
                     }
@@ -1300,6 +1317,7 @@ impl<'a> Tokenizer<'a> {
                             }
 
                             if is_comment {
+                                self.handle_colon_space_error(chars, prev_token)?;
                                 chars.next(); // consume second '-'
                                 // Consume the rest of the line as comment
                                 let _comment = self.tokenize_single_line_comment(chars);
@@ -1324,12 +1342,14 @@ impl<'a> Tokenizer<'a> {
                     chars.next(); // consume the '/'
                     match chars.peek() {
                         Some('*') => {
+                            self.handle_colon_space_error(chars, prev_token)?;
                             chars.next(); // consume the '*', starting a multi-line comment
                             let _comment = self.consume_multiline_comment(chars)?;
                             *location = chars.location();
                             self.next_token(location, chars, prev_token, true)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
+                            self.handle_colon_space_error(chars, prev_token)?;
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
                             // Consume the rest of the line as comment
                             let _comment = self.tokenize_single_line_comment(chars);
@@ -1534,6 +1554,7 @@ impl<'a> Tokenizer<'a> {
                 '}' => self.consume_and_return(chars, Token::RBrace),
                 '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                 {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume the '#', starting a snowflake single-line comment
                     // Consume the rest of the line as comment
                     let _comment = self.tokenize_single_line_comment(chars);
@@ -1668,6 +1689,7 @@ impl<'a> Tokenizer<'a> {
 
                 // whitespace check (including unicode chars) should be last as it covers some of the chars above
                 ch if ch.is_whitespace() => {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 03a0ac81..9f1e72aa 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -1567,7 +1567,10 @@ fn parse_table_identifiers() {
 
     fn test_table_ident_err(ident: &str) {
         let sql = format!("SELECT 1 FROM {ident}");
-        assert!(bigquery().parse_sql_statements(&sql).is_err());
+        assert!(
+            bigquery().parse_sql_statements(&sql).is_err(),
+            "Expected error parsing identifier: `{ident}`, within SQL: `{sql}`"
+        );
     }
 
     test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);