diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 9fb1bf02..1cab9c24 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -131,6 +131,8 @@ pub enum BinaryOperator { PGRegexNotMatch, /// String does not match regular expression (case insensitively), e.g. `a !~* b` (PostgreSQL-specific) PGRegexNotIMatch, + /// String "starts with", eg: `a ^@ b` (PostgreSQL-specific) + PGStartsWith, /// PostgreSQL-specific custom operator. /// /// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html) @@ -172,6 +174,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::PGRegexIMatch => f.write_str("~*"), BinaryOperator::PGRegexNotMatch => f.write_str("!~"), BinaryOperator::PGRegexNotIMatch => f.write_str("!~*"), + BinaryOperator::PGStartsWith => f.write_str("^@"), BinaryOperator::PGCustomBinaryOperator(idents) => { write!(f, "OPERATOR({})", display_separated(idents, ".")) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fa30bbbc..1f00478b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2196,6 +2196,9 @@ impl<'a> Parser<'a> { Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGOverlap) } + Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGStartsWith) + } Token::Tilde => Some(BinaryOperator::PGRegexMatch), Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), @@ -2630,6 +2633,7 @@ impl<'a> Parser<'a> { | Token::LongArrow | Token::Arrow | Token::Overlap + | Token::CaretAt | Token::HashArrow | Token::HashLongArrow | Token::AtArrow diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e2d1843b..2156d068 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -60,7 +60,8 @@ pub enum Token { DoubleQuotedString(String), /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$ DollarQuotedString(DollarQuotedString), - /// Byte string literal: i.e: b'string' or B'string' + /// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as + /// PostgreSQL, may treat this syntax as a bit string literal instead, i.e: b'10010101') SingleQuotedByteStringLiteral(String), /// Byte string literal: i.e: b"string" or B"string" DoubleQuotedByteStringLiteral(String), @@ -114,7 +115,7 @@ pub enum Token { Period, /// Colon `:` Colon, - /// DoubleColon `::` (used for casting in postgresql) + /// DoubleColon `::` (used for casting in PostgreSQL) DoubleColon, /// Assignment `:=` (used for keyword argument in DuckDB macros) DuckAssignment, @@ -152,7 +153,7 @@ pub enum Token { ShiftLeft, /// `>>`, a bitwise shift right operator in PostgreSQL ShiftRight, - /// '&&', an overlap operator in PostgreSQL + /// `&&`, an overlap operator in PostgreSQL Overlap, /// Exclamation Mark `!` used for PostgreSQL factorial operator ExclamationMark, @@ -160,19 +161,21 @@ pub enum Token { DoubleExclamationMark, /// AtSign `@` used for PostgreSQL abs operator AtSign, + /// `^@`, a "starts with" string operator in PostgreSQL + CaretAt, /// `|/`, a square root math operator in PostgreSQL PGSquareRoot, /// `||/`, a cube root math operator in PostgreSQL PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), - /// ->, used as a operator to extract json field in PostgreSQL + /// `->`, used as a operator to extract json field in PostgreSQL Arrow, - /// ->>, used as a operator to extract json field as text in PostgreSQL + /// `->>`, used as a operator to extract json field as text in PostgreSQL LongArrow, - /// #> Extracts JSON sub-object at the specified path + /// `#>`, extracts JSON sub-object at the specified path HashArrow, - /// #>> Extracts JSON sub-object at the specified path as text + /// `#>>`, extracts JSON sub-object at the specified path as text HashLongArrow, /// jsonb @> jsonb -> boolean: Test whether left json contains the right json AtArrow, @@ -247,6 +250,7 @@ impl fmt::Display for Token { Token::ExclamationMarkTilde => f.write_str("!~"), Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"), Token::AtSign => f.write_str("@"), + Token::CaretAt => f.write_str("^@"), Token::ShiftLeft => f.write_str("<<"), Token::ShiftRight => f.write_str(">>"), Token::Overlap => f.write_str("&&"), @@ -940,7 +944,13 @@ impl<'a> Tokenizer<'a> { _ => Ok(Some(Token::Ampersand)), } } - '^' => self.consume_and_return(chars, Token::Caret), + '^' => { + chars.next(); // consume the '^' + match chars.peek() { + Some('@') => self.consume_and_return(chars, Token::CaretAt), + _ => Ok(Some(Token::Caret)), + } + } '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), '#' if dialect_of!(self is SnowflakeDialect) => { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 8469adbb..8d90ca91 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1728,6 +1728,7 @@ fn parse_pg_binary_ops() { (">>", BinaryOperator::PGBitwiseShiftRight, pg_and_generic()), ("<<", BinaryOperator::PGBitwiseShiftLeft, pg_and_generic()), ("&&", BinaryOperator::PGOverlap, pg()), + ("^@", BinaryOperator::PGStartsWith, pg()), ]; for (str_op, op, dialects) in binary_ops {