datafusion-sqlparse/src/ast/value.rs
Dmitry Patsura 2c0886d9fe
feat: Support escaped string literals (PostgreSQL) (#502)
* feat: Support escaped string literals (PostgreSQL)

Signed-off-by: Dmitry Patsura <talk@dmtry.me>

* lint

* escape ', \r, \t

* Update src/ast/value.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update src/tokenizer.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* test: two slashes

* remove dead code

* test: parsing error

* support generic dialect too (for DF)

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
2022-05-25 14:42:14 -04:00

251 lines
8.3 KiB
Rust

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(not(feature = "std"))]
use alloc::string::String;
use core::fmt;
#[cfg(feature = "bigdecimal")]
use bigdecimal::BigDecimal;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// Primitive SQL values such as number and string
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum Value {
/// Numeric literal
#[cfg(not(feature = "bigdecimal"))]
Number(String, bool),
#[cfg(feature = "bigdecimal")]
Number(BigDecimal, bool),
/// 'string value'
SingleQuotedString(String),
/// e'string value' (postgres extension)
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
EscapedStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
HexStringLiteral(String),
DoubleQuotedString(String),
/// Boolean value true or false
Boolean(bool),
/// INTERVAL literals, roughly in the following format:
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
/// e.g. `INTERVAL '123:45.67' MINUTE(3) TO SECOND(2)`.
///
/// The parser does not validate the `<value>`, nor does it ensure
/// that the `<leading_field>` units >= the units in `<last_field>`,
/// so the user will have to reject intervals like `HOUR TO YEAR`.
Interval {
value: String,
leading_field: Option<DateTimeField>,
leading_precision: Option<u64>,
last_field: Option<DateTimeField>,
/// The seconds precision can be specified in SQL source as
/// `INTERVAL '__' SECOND(_, x)` (in which case the `leading_field`
/// will be `Second` and the `last_field` will be `None`),
/// or as `__ TO SECOND(x)`.
fractional_seconds_precision: Option<u64>,
},
/// `NULL` value
Null,
/// `?` or `$` Prepared statement arg placeholder
Placeholder(String),
}
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
Value::Boolean(v) => write!(f, "{}", v),
Value::Interval {
value,
leading_field: Some(DateTimeField::Second),
leading_precision: Some(leading_precision),
last_field,
fractional_seconds_precision: Some(fractional_seconds_precision),
} => {
// When the leading field is SECOND, the parser guarantees that
// the last field is None.
assert!(last_field.is_none());
write!(
f,
"INTERVAL '{}' SECOND ({}, {})",
escape_single_quote_string(value),
leading_precision,
fractional_seconds_precision
)
}
Value::Interval {
value,
leading_field,
leading_precision,
last_field,
fractional_seconds_precision,
} => {
write!(f, "INTERVAL '{}'", escape_single_quote_string(value))?;
if let Some(leading_field) = leading_field {
write!(f, " {}", leading_field)?;
}
if let Some(leading_precision) = leading_precision {
write!(f, " ({})", leading_precision)?;
}
if let Some(last_field) = last_field {
write!(f, " TO {}", last_field)?;
}
if let Some(fractional_seconds_precision) = fractional_seconds_precision {
write!(f, " ({})", fractional_seconds_precision)?;
}
Ok(())
}
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{}", v),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DateTimeField {
Year,
Month,
Week,
Day,
Hour,
Minute,
Second,
Century,
Decade,
Dow,
Doy,
Epoch,
Isodow,
Isoyear,
Julian,
Microseconds,
Millenium,
Milliseconds,
Quarter,
Timezone,
TimezoneHour,
TimezoneMinute,
}
impl fmt::Display for DateTimeField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self {
DateTimeField::Year => "YEAR",
DateTimeField::Month => "MONTH",
DateTimeField::Week => "WEEK",
DateTimeField::Day => "DAY",
DateTimeField::Hour => "HOUR",
DateTimeField::Minute => "MINUTE",
DateTimeField::Second => "SECOND",
DateTimeField::Century => "CENTURY",
DateTimeField::Decade => "DECADE",
DateTimeField::Dow => "DOW",
DateTimeField::Doy => "DOY",
DateTimeField::Epoch => "EPOCH",
DateTimeField::Isodow => "ISODOW",
DateTimeField::Isoyear => "ISOYEAR",
DateTimeField::Julian => "JULIAN",
DateTimeField::Microseconds => "MICROSECONDS",
DateTimeField::Millenium => "MILLENIUM",
DateTimeField::Milliseconds => "MILLISECONDS",
DateTimeField::Quarter => "QUARTER",
DateTimeField::Timezone => "TIMEZONE",
DateTimeField::TimezoneHour => "TIMEZONE_HOUR",
DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE",
})
}
}
pub struct EscapeSingleQuoteString<'a>(&'a str);
impl<'a> fmt::Display for EscapeSingleQuoteString<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.0.chars() {
if c == '\'' {
write!(f, "\'\'")?;
} else {
write!(f, "{}", c)?;
}
}
Ok(())
}
}
pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> {
EscapeSingleQuoteString(s)
}
pub struct EscapeEscapedStringLiteral<'a>(&'a str);
impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.0.chars() {
match c {
'\'' => {
write!(f, r#"\'"#)?;
}
'\\' => {
write!(f, r#"\\"#)?;
}
'\n' => {
write!(f, r#"\n"#)?;
}
'\t' => {
write!(f, r#"\t"#)?;
}
'\r' => {
write!(f, r#"\r"#)?;
}
_ => {
write!(f, "{}", c)?;
}
}
}
Ok(())
}
}
pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> {
EscapeEscapedStringLiteral(s)
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum TrimWhereField {
Both,
Leading,
Trailing,
}
impl fmt::Display for TrimWhereField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use TrimWhereField::*;
f.write_str(match self {
Both => "BOTH",
Leading => "LEADING",
Trailing => "TRAILING",
})
}
}