datafusion-sqlparse/src/ast/value.rs
Sergey Olontsev 50c605a471
Some checks are pending
Rust / test (beta) (push) Waiting to run
license / Release Audit Tool (RAT) (push) Waiting to run
Rust / test (nightly) (push) Waiting to run
Rust / test (stable) (push) Waiting to run
Rust / codestyle (push) Waiting to run
Rust / lint (push) Waiting to run
Rust / benchmark-lint (push) Waiting to run
Rust / compile (push) Waiting to run
Rust / docs (push) Waiting to run
Rust / compile-no-std (push) Waiting to run
Support for Map values in ClickHouse settings (#1896)
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
2025-06-28 08:13:11 +02:00

593 lines
22 KiB
Rust

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#[cfg(not(feature = "std"))]
use alloc::string::String;
use core::fmt;
#[cfg(feature = "bigdecimal")]
use bigdecimal::BigDecimal;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::{ast::Ident, tokenizer::Span};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
/// Wraps a primitive SQL [`Value`] with its [`Span`] location
///
/// # Example: create a `ValueWithSpan` from a `Value`
/// ```
/// # use sqlparser::ast::{Value, ValueWithSpan};
/// # use sqlparser::tokenizer::{Location, Span};
/// let value = Value::SingleQuotedString(String::from("endpoint"));
/// // from line 1, column 1 to line 1, column 7
/// let span = Span::new(Location::new(1, 1), Location::new(1, 7));
/// let value_with_span = value.with_span(span);
/// ```
///
/// # Example: create a `ValueWithSpan` from a `Value` with an empty span
///
/// You can call [`Value::with_empty_span`] to create a `ValueWithSpan` with an empty span
/// ```
/// # use sqlparser::ast::{Value, ValueWithSpan};
/// # use sqlparser::tokenizer::{Location, Span};
/// let value = Value::SingleQuotedString(String::from("endpoint"));
/// let value_with_span = value.with_empty_span();
/// assert_eq!(value_with_span.span, Span::empty());
/// ```
///
/// You can also use the [`From`] trait to convert `ValueWithSpan` to/from `Value`s
/// ```
/// # use sqlparser::ast::{Value, ValueWithSpan};
/// # use sqlparser::tokenizer::{Location, Span};
/// let value = Value::SingleQuotedString(String::from("endpoint"));
/// // converting `Value` to `ValueWithSpan` results in an empty span
/// let value_with_span: ValueWithSpan = value.into();
/// assert_eq!(value_with_span.span, Span::empty());
/// // convert back to `Value`
/// let value: Value = value_with_span.into();
/// ```
#[derive(Debug, Clone, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct ValueWithSpan {
pub value: Value,
pub span: Span,
}
impl PartialEq for ValueWithSpan {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Ord for ValueWithSpan {
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
self.value.cmp(&other.value)
}
}
impl PartialOrd for ValueWithSpan {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(Ord::cmp(self, other))
}
}
impl core::hash::Hash for ValueWithSpan {
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
self.value.hash(state);
}
}
impl From<Value> for ValueWithSpan {
fn from(value: Value) -> Self {
value.with_empty_span()
}
}
impl From<ValueWithSpan> for Value {
fn from(value: ValueWithSpan) -> Self {
value.value
}
}
/// Primitive SQL values such as number and string
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(
feature = "visitor",
derive(Visit, VisitMut),
visit(with = "visit_value")
)]
pub enum Value {
/// Numeric literal
#[cfg(not(feature = "bigdecimal"))]
Number(String, bool),
#[cfg(feature = "bigdecimal")]
// HINT: use `test_utils::number` to make an instance of
// Value::Number This might help if you your tests pass locally
// but fail on CI with the `--all-features` flag enabled
Number(BigDecimal, bool),
/// 'string value'
SingleQuotedString(String),
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
DollarQuotedString(DollarQuotedString),
/// Triple single quoted strings: Example '''abc'''
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleSingleQuotedString(String),
/// Triple double quoted strings: Example """abc"""
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleDoubleQuotedString(String),
/// e'string value' (postgres extension)
/// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS)
/// for more details.
EscapedStringLiteral(String),
/// u&'string value' (postgres extension)
/// See [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE)
/// for more details.
UnicodeStringLiteral(String),
/// B'string value'
SingleQuotedByteStringLiteral(String),
/// B"string value"
DoubleQuotedByteStringLiteral(String),
/// Triple single quoted literal with byte string prefix. Example `B'''abc'''`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleSingleQuotedByteStringLiteral(String),
/// Triple double quoted literal with byte string prefix. Example `B"""abc"""`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleDoubleQuotedByteStringLiteral(String),
/// Single quoted literal with raw string prefix. Example `R'abc'`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
SingleQuotedRawStringLiteral(String),
/// Double quoted literal with raw string prefix. Example `R"abc"`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
DoubleQuotedRawStringLiteral(String),
/// Triple single quoted literal with raw string prefix. Example `R'''abc'''`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleSingleQuotedRawStringLiteral(String),
/// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
TripleDoubleQuotedRawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
HexStringLiteral(String),
DoubleQuotedString(String),
/// Boolean value true or false
Boolean(bool),
/// `NULL` value
Null,
/// `?` or `$` Prepared statement arg placeholder
Placeholder(String),
}
impl ValueWithSpan {
/// If the underlying literal is a string, regardless of quote style, returns the associated string value
pub fn into_string(self) -> Option<String> {
self.value.into_string()
}
}
impl Value {
/// If the underlying literal is a string, regardless of quote style, returns the associated string value
pub fn into_string(self) -> Option<String> {
match self {
Value::SingleQuotedString(s)
| Value::DoubleQuotedString(s)
| Value::TripleSingleQuotedString(s)
| Value::TripleDoubleQuotedString(s)
| Value::SingleQuotedByteStringLiteral(s)
| Value::DoubleQuotedByteStringLiteral(s)
| Value::TripleSingleQuotedByteStringLiteral(s)
| Value::TripleDoubleQuotedByteStringLiteral(s)
| Value::SingleQuotedRawStringLiteral(s)
| Value::DoubleQuotedRawStringLiteral(s)
| Value::TripleSingleQuotedRawStringLiteral(s)
| Value::TripleDoubleQuotedRawStringLiteral(s)
| Value::EscapedStringLiteral(s)
| Value::UnicodeStringLiteral(s)
| Value::NationalStringLiteral(s)
| Value::HexStringLiteral(s) => Some(s),
Value::DollarQuotedString(s) => Some(s.value),
_ => None,
}
}
pub fn with_span(self, span: Span) -> ValueWithSpan {
ValueWithSpan { value: self, span }
}
pub fn with_empty_span(self) -> ValueWithSpan {
self.with_span(Span::empty())
}
}
impl fmt::Display for ValueWithSpan {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.value)
}
}
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::TripleSingleQuotedString(v) => {
write!(f, "'''{v}'''")
}
Value::TripleDoubleQuotedString(v) => {
write!(f, r#""""{v}""""#)
}
Value::DollarQuotedString(v) => write!(f, "{v}"),
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""),
Value::TripleSingleQuotedByteStringLiteral(v) => write!(f, "B'''{v}'''"),
Value::TripleDoubleQuotedByteStringLiteral(v) => write!(f, r#"B"""{v}""""#),
Value::SingleQuotedRawStringLiteral(v) => write!(f, "R'{v}'"),
Value::DoubleQuotedRawStringLiteral(v) => write!(f, "R\"{v}\""),
Value::TripleSingleQuotedRawStringLiteral(v) => write!(f, "R'''{v}'''"),
Value::TripleDoubleQuotedRawStringLiteral(v) => write!(f, r#"R"""{v}""""#),
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{v}"),
}
}
}
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct DollarQuotedString {
pub value: String,
pub tag: Option<String>,
}
impl fmt::Display for DollarQuotedString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.tag {
Some(tag) => {
write!(f, "${}${}${}$", tag, self.value, tag)
}
None => {
write!(f, "$${}$$", self.value)
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum DateTimeField {
Year,
Years,
Month,
Months,
/// Week optionally followed by a WEEKDAY.
///
/// ```sql
/// WEEK(MONDAY)
/// ```
///
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#extract)
Week(Option<Ident>),
Weeks,
Day,
DayOfWeek,
DayOfYear,
Days,
Date,
Datetime,
Hour,
Hours,
Minute,
Minutes,
Second,
Seconds,
Century,
Decade,
Dow,
Doy,
Epoch,
Isodow,
IsoWeek,
Isoyear,
Julian,
Microsecond,
Microseconds,
Millenium,
Millennium,
Millisecond,
Milliseconds,
Nanosecond,
Nanoseconds,
Quarter,
Time,
Timezone,
TimezoneAbbr,
TimezoneHour,
TimezoneMinute,
TimezoneRegion,
NoDateTime,
/// Arbitrary abbreviation or custom date-time part.
///
/// ```sql
/// EXTRACT(q FROM CURRENT_TIMESTAMP)
/// ```
/// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions-date-time#supported-date-and-time-parts)
Custom(Ident),
}
impl fmt::Display for DateTimeField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DateTimeField::Year => write!(f, "YEAR"),
DateTimeField::Years => write!(f, "YEARS"),
DateTimeField::Month => write!(f, "MONTH"),
DateTimeField::Months => write!(f, "MONTHS"),
DateTimeField::Week(week_day) => {
write!(f, "WEEK")?;
if let Some(week_day) = week_day {
write!(f, "({week_day})")?
}
Ok(())
}
DateTimeField::Weeks => write!(f, "WEEKS"),
DateTimeField::Day => write!(f, "DAY"),
DateTimeField::DayOfWeek => write!(f, "DAYOFWEEK"),
DateTimeField::DayOfYear => write!(f, "DAYOFYEAR"),
DateTimeField::Days => write!(f, "DAYS"),
DateTimeField::Date => write!(f, "DATE"),
DateTimeField::Datetime => write!(f, "DATETIME"),
DateTimeField::Hour => write!(f, "HOUR"),
DateTimeField::Hours => write!(f, "HOURS"),
DateTimeField::Minute => write!(f, "MINUTE"),
DateTimeField::Minutes => write!(f, "MINUTES"),
DateTimeField::Second => write!(f, "SECOND"),
DateTimeField::Seconds => write!(f, "SECONDS"),
DateTimeField::Century => write!(f, "CENTURY"),
DateTimeField::Decade => write!(f, "DECADE"),
DateTimeField::Dow => write!(f, "DOW"),
DateTimeField::Doy => write!(f, "DOY"),
DateTimeField::Epoch => write!(f, "EPOCH"),
DateTimeField::Isodow => write!(f, "ISODOW"),
DateTimeField::Isoyear => write!(f, "ISOYEAR"),
DateTimeField::IsoWeek => write!(f, "ISOWEEK"),
DateTimeField::Julian => write!(f, "JULIAN"),
DateTimeField::Microsecond => write!(f, "MICROSECOND"),
DateTimeField::Microseconds => write!(f, "MICROSECONDS"),
DateTimeField::Millenium => write!(f, "MILLENIUM"),
DateTimeField::Millennium => write!(f, "MILLENNIUM"),
DateTimeField::Millisecond => write!(f, "MILLISECOND"),
DateTimeField::Milliseconds => write!(f, "MILLISECONDS"),
DateTimeField::Nanosecond => write!(f, "NANOSECOND"),
DateTimeField::Nanoseconds => write!(f, "NANOSECONDS"),
DateTimeField::Quarter => write!(f, "QUARTER"),
DateTimeField::Time => write!(f, "TIME"),
DateTimeField::Timezone => write!(f, "TIMEZONE"),
DateTimeField::TimezoneAbbr => write!(f, "TIMEZONE_ABBR"),
DateTimeField::TimezoneHour => write!(f, "TIMEZONE_HOUR"),
DateTimeField::TimezoneMinute => write!(f, "TIMEZONE_MINUTE"),
DateTimeField::TimezoneRegion => write!(f, "TIMEZONE_REGION"),
DateTimeField::NoDateTime => write!(f, "NODATETIME"),
DateTimeField::Custom(custom) => write!(f, "{custom}"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
/// The Unicode Standard defines four normalization forms, which are intended to eliminate
/// certain distinctions between visually or functionally identical characters.
///
/// See [Unicode Normalization Forms](https://unicode.org/reports/tr15/) for details.
pub enum NormalizationForm {
/// Canonical Decomposition, followed by Canonical Composition.
NFC,
/// Canonical Decomposition.
NFD,
/// Compatibility Decomposition, followed by Canonical Composition.
NFKC,
/// Compatibility Decomposition.
NFKD,
}
impl fmt::Display for NormalizationForm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
NormalizationForm::NFC => write!(f, "NFC"),
NormalizationForm::NFD => write!(f, "NFD"),
NormalizationForm::NFKC => write!(f, "NFKC"),
NormalizationForm::NFKD => write!(f, "NFKD"),
}
}
}
pub struct EscapeQuotedString<'a> {
string: &'a str,
quote: char,
}
impl fmt::Display for EscapeQuotedString<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// EscapeQuotedString doesn't know which mode of escape was
// chosen by the user. So this code must to correctly display
// strings without knowing if the strings are already escaped
// or not.
//
// If the quote symbol in the string is repeated twice, OR, if
// the quote symbol is after backslash, display all the chars
// without any escape. However, if the quote symbol is used
// just between usual chars, `fmt()` should display it twice."
//
// The following table has examples
//
// | original query | mode | AST Node | serialized |
// | ------------- | --------- | -------------------------------------------------- | ------------ |
// | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` |
// | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
// | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` |
// | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
let quote = self.quote;
let mut previous_char = char::default();
let mut start_idx = 0;
let mut peekable_chars = self.string.char_indices().peekable();
while let Some(&(idx, ch)) = peekable_chars.peek() {
match ch {
char if char == quote => {
if previous_char == '\\' {
// the quote is already escaped with a backslash, skip
peekable_chars.next();
continue;
}
peekable_chars.next();
match peekable_chars.peek() {
Some((_, c)) if *c == quote => {
// the quote is already escaped with another quote, skip
peekable_chars.next();
}
_ => {
// The quote is not escaped.
// Including idx in the range, so the quote at idx will be printed twice:
// in this call to write_str() and in the next one.
f.write_str(&self.string[start_idx..=idx])?;
start_idx = idx;
}
}
}
_ => {
peekable_chars.next();
}
}
previous_char = ch;
}
f.write_str(&self.string[start_idx..])?;
Ok(())
}
}
pub fn escape_quoted_string(string: &str, quote: char) -> EscapeQuotedString<'_> {
EscapeQuotedString { string, quote }
}
pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> {
escape_quoted_string(s, '\'')
}
pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> {
escape_quoted_string(s, '\"')
}
pub struct EscapeEscapedStringLiteral<'a>(&'a str);
impl fmt::Display for EscapeEscapedStringLiteral<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.0.chars() {
match c {
'\'' => {
write!(f, r#"\'"#)?;
}
'\\' => {
write!(f, r#"\\"#)?;
}
'\n' => {
write!(f, r#"\n"#)?;
}
'\t' => {
write!(f, r#"\t"#)?;
}
'\r' => {
write!(f, r#"\r"#)?;
}
_ => {
write!(f, "{c}")?;
}
}
}
Ok(())
}
}
pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> {
EscapeEscapedStringLiteral(s)
}
pub struct EscapeUnicodeStringLiteral<'a>(&'a str);
impl fmt::Display for EscapeUnicodeStringLiteral<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.0.chars() {
match c {
'\'' => {
write!(f, "''")?;
}
'\\' => {
write!(f, r#"\\"#)?;
}
x if x.is_ascii() => {
write!(f, "{c}")?;
}
_ => {
let codepoint = c as u32;
// if the character fits in 32 bits, we can use the \XXXX format
// otherwise, we need to use the \+XXXXXX format
if codepoint <= 0xFFFF {
write!(f, "\\{codepoint:04X}")?;
} else {
write!(f, "\\+{codepoint:06X}")?;
}
}
}
}
Ok(())
}
}
pub fn escape_unicode_string(s: &str) -> EscapeUnicodeStringLiteral<'_> {
EscapeUnicodeStringLiteral(s)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum TrimWhereField {
Both,
Leading,
Trailing,
}
impl fmt::Display for TrimWhereField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use TrimWhereField::*;
f.write_str(match self {
Both => "BOTH",
Leading => "LEADING",
Trailing => "TRAILING",
})
}
}