From 819c0958d6f59120794fb9f15d6a910c659b53e2 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 29 Oct 2025 10:16:19 +0100 Subject: [PATCH] Tentatively added support for path identifiers --- src/dialect/mod.rs | 5 +++++ src/dialect/snowflake.rs | 22 ++++++++++++++++++++-- src/tokenizer.rs | 24 ++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index abc8291d..df19a598 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -183,6 +183,11 @@ pub trait Dialect: Debug + Any { false } + /// Returns whether the dialect supports path-like identifiers + fn supports_path_like_identifiers(&self) -> bool { + false + } + /// Most dialects do not have custom operators. Override this method to provide custom operators. fn is_custom_operator_part(&self, _ch: char) -> bool { false diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 6b40125e..ba370b34 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -158,6 +158,10 @@ impl Dialect for SnowflakeDialect { || ch == '_' } + fn supports_path_like_identifiers(&self) -> bool { + true + } + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences fn supports_string_literal_backslash_escape(&self) -> bool { true @@ -1067,8 +1071,22 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result ident.push('+'), Token::Minus => ident.push('-'), Token::Number(n, _) => ident.push_str(n), - Token::Word(w) => ident.push_str(&w.to_string()), - _ => return parser.expected("stage name identifier", parser.peek_token()), + Token::Word(w) => { + if matches!(w.keyword, Keyword::NoKeyword) { + ident.push_str(w.to_string().as_str()); + } else { + parser.prev_token(); + break; + } + } + token => { + return { + println!( + "Unexpected token {token:?} while parsing stage name identifier {ident:?}" + ); + parser.expected("stage name identifier", parser.peek_token()) + } + } } } Ok(Ident::new(ident)) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f49468fe..9b1094f8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1669,6 +1669,11 @@ impl<'a> Tokenizer<'a> { _ => self.start_binop(chars, "~~", Token::DoubleTilde), } } + Some('/') if self.dialect.supports_path_like_identifiers() => { + // regular identifier starting with an "E" or "e" + let s = self.tokenize_word("~", chars, prev_keyword)?; + Ok(Some(Token::make_word(s, None))) + } _ => self.start_binop(chars, "~", Token::Tilde), } } @@ -1969,6 +1974,25 @@ impl<'a> Tokenizer<'a> { s.push_str(&peeking_take_while(chars, |ch| { self.dialect.is_identifier_part(ch) })); + + while !matches!(prev_keyword, Some(Keyword::SELECT)) + && self.dialect.supports_path_like_identifiers() + && chars.peek().map(|&ch| ch == '/').unwrap_or(false) + && chars + .peekable + .clone() + .nth(1) + .map(|ch| ch.is_alphabetic()) + .unwrap_or(false) + { + s.push('/'); + chars.next(); // consume the '/' + + s.push_str(&peeking_take_while(chars, |ch| { + self.dialect.is_identifier_part(ch) + })); + } + if !matches!(prev_keyword, Some(Keyword::SELECT)) && self.dialect.supports_hyphenated_identifiers() {