mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 19:27:21 +00:00
Add support for unit on char length units for small character string types. (#663)
This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING, CHAR VARYING, and VARCHAR.
This commit is contained in:
parent
777672625f
commit
cacdf3305f
6 changed files with 244 additions and 34 deletions
|
@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
pub enum DataType {
|
pub enum DataType {
|
||||||
/// Fixed-length character type e.g. CHARACTER(10)
|
/// Fixed-length character type e.g. CHARACTER(10)
|
||||||
Character(Option<u64>),
|
Character(Option<CharacterLength>),
|
||||||
/// Fixed-length char type e.g. CHAR(10)
|
/// Fixed-length char type e.g. CHAR(10)
|
||||||
Char(Option<u64>),
|
Char(Option<CharacterLength>),
|
||||||
/// Character varying type e.g. CHARACTER VARYING(10)
|
/// Character varying type e.g. CHARACTER VARYING(10)
|
||||||
CharacterVarying(Option<u64>),
|
CharacterVarying(Option<CharacterLength>),
|
||||||
/// Char varying type e.g. CHAR VARYING(10)
|
/// Char varying type e.g. CHAR VARYING(10)
|
||||||
CharVarying(Option<u64>),
|
CharVarying(Option<CharacterLength>),
|
||||||
/// Variable-length character type e.g. VARCHAR(10)
|
/// Variable-length character type e.g. VARCHAR(10)
|
||||||
Varchar(Option<u64>),
|
Varchar(Option<CharacterLength>),
|
||||||
/// Variable-length character type e.g. NVARCHAR(10)
|
/// Variable-length character type e.g. NVARCHAR(10)
|
||||||
Nvarchar(Option<u64>),
|
Nvarchar(Option<u64>),
|
||||||
/// Uuid type
|
/// Uuid type
|
||||||
|
@ -133,17 +133,14 @@ pub enum DataType {
|
||||||
impl fmt::Display for DataType {
|
impl fmt::Display for DataType {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
DataType::Character(size) => {
|
DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
|
||||||
format_type_with_optional_length(f, "CHARACTER", size, false)
|
DataType::Char(size) => format_character_string_type(f, "CHAR", size),
|
||||||
}
|
|
||||||
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
|
|
||||||
DataType::CharacterVarying(size) => {
|
DataType::CharacterVarying(size) => {
|
||||||
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
|
format_character_string_type(f, "CHARACTER VARYING", size)
|
||||||
}
|
}
|
||||||
DataType::CharVarying(size) => {
|
|
||||||
format_type_with_optional_length(f, "CHAR VARYING", size, false)
|
DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
|
||||||
}
|
DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
|
||||||
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
|
|
||||||
DataType::Nvarchar(size) => {
|
DataType::Nvarchar(size) => {
|
||||||
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
||||||
}
|
}
|
||||||
|
@ -247,6 +244,18 @@ fn format_type_with_optional_length(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn format_character_string_type(
|
||||||
|
f: &mut fmt::Formatter,
|
||||||
|
sql_type: &str,
|
||||||
|
size: &Option<CharacterLength>,
|
||||||
|
) -> fmt::Result {
|
||||||
|
write!(f, "{}", sql_type)?;
|
||||||
|
if let Some(size) = size {
|
||||||
|
write!(f, "({})", size)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Timestamp and Time data types information about TimeZone formatting.
|
/// Timestamp and Time data types information about TimeZone formatting.
|
||||||
///
|
///
|
||||||
/// This is more related to a display information than real differences between each variant. To
|
/// This is more related to a display information than real differences between each variant. To
|
||||||
|
@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Information about [character length][1], including length and possibly unit.
|
||||||
|
///
|
||||||
|
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
pub struct CharacterLength {
|
||||||
|
/// Default (if VARYING) or maximum (if not VARYING) length
|
||||||
|
pub length: u64,
|
||||||
|
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
|
||||||
|
pub unit: Option<CharLengthUnits>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for CharacterLength {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{}", self.length)?;
|
||||||
|
if let Some(unit) = &self.unit {
|
||||||
|
write!(f, " {}", unit)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Possible units for characters, initially based on 2016 ANSI [standard][1].
|
||||||
|
///
|
||||||
|
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
pub enum CharLengthUnits {
|
||||||
|
/// CHARACTERS unit
|
||||||
|
Characters,
|
||||||
|
/// OCTETS unit
|
||||||
|
Octets,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for CharLengthUnits {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Characters => {
|
||||||
|
write!(f, "CHARACTERS")
|
||||||
|
}
|
||||||
|
Self::Octets => {
|
||||||
|
write!(f, "OCTETS")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -22,9 +22,9 @@ use core::fmt;
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
pub use self::data_type::DataType;
|
pub use self::data_type::{
|
||||||
pub use self::data_type::ExactNumberInfo;
|
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||||
pub use self::data_type::TimezoneInfo;
|
};
|
||||||
pub use self::ddl::{
|
pub use self::ddl::{
|
||||||
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
|
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
|
||||||
ReferentialAction, TableConstraint,
|
ReferentialAction, TableConstraint,
|
||||||
|
|
|
@ -123,6 +123,7 @@ define_keywords!(
|
||||||
CHANGE,
|
CHANGE,
|
||||||
CHAR,
|
CHAR,
|
||||||
CHARACTER,
|
CHARACTER,
|
||||||
|
CHARACTERS,
|
||||||
CHARACTER_LENGTH,
|
CHARACTER_LENGTH,
|
||||||
CHARSET,
|
CHARSET,
|
||||||
CHAR_LENGTH,
|
CHAR_LENGTH,
|
||||||
|
@ -372,6 +373,7 @@ define_keywords!(
|
||||||
NVARCHAR,
|
NVARCHAR,
|
||||||
OBJECT,
|
OBJECT,
|
||||||
OCCURRENCES_REGEX,
|
OCCURRENCES_REGEX,
|
||||||
|
OCTETS,
|
||||||
OCTET_LENGTH,
|
OCTET_LENGTH,
|
||||||
OF,
|
OF,
|
||||||
OFFSET,
|
OFFSET,
|
||||||
|
|
156
src/parser.rs
156
src/parser.rs
|
@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
|
||||||
Ok(DataType::BigInt(optional_precision?))
|
Ok(DataType::BigInt(optional_precision?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
|
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
|
||||||
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
||||||
Keyword::CHARACTER => {
|
Keyword::CHARACTER => {
|
||||||
if self.parse_keyword(Keyword::VARYING) {
|
if self.parse_keyword(Keyword::VARYING) {
|
||||||
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
|
Ok(DataType::CharacterVarying(
|
||||||
|
self.parse_optional_character_length()?,
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
Ok(DataType::Character(self.parse_optional_precision()?))
|
Ok(DataType::Character(self.parse_optional_character_length()?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Keyword::CHAR => {
|
Keyword::CHAR => {
|
||||||
if self.parse_keyword(Keyword::VARYING) {
|
if self.parse_keyword(Keyword::VARYING) {
|
||||||
Ok(DataType::CharVarying(self.parse_optional_precision()?))
|
Ok(DataType::CharVarying(
|
||||||
|
self.parse_optional_character_length()?,
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
Ok(DataType::Char(self.parse_optional_precision()?))
|
Ok(DataType::Char(self.parse_optional_character_length()?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
|
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
|
||||||
|
@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parse_optional_character_length(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<Option<CharacterLength>, ParserError> {
|
||||||
|
if self.consume_token(&Token::LParen) {
|
||||||
|
let character_length = self.parse_character_length()?;
|
||||||
|
self.expect_token(&Token::RParen)?;
|
||||||
|
Ok(Some(character_length))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
|
||||||
|
let length = self.parse_literal_uint()?;
|
||||||
|
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
|
||||||
|
Some(CharLengthUnits::Characters)
|
||||||
|
} else if self.parse_keyword(Keyword::OCTETS) {
|
||||||
|
Some(CharLengthUnits::Octets)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(CharacterLength { length, unit })
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_optional_precision_scale(
|
pub fn parse_optional_precision_scale(
|
||||||
&mut self,
|
&mut self,
|
||||||
) -> Result<(Option<u64>, Option<u64>), ParserError> {
|
) -> Result<(Option<u64>, Option<u64>), ParserError> {
|
||||||
|
@ -5337,7 +5366,9 @@ mod tests {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test_parse_data_type {
|
mod test_parse_data_type {
|
||||||
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo};
|
use crate::ast::{
|
||||||
|
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||||
|
};
|
||||||
use crate::dialect::{AnsiDialect, GenericDialect};
|
use crate::dialect::{AnsiDialect, GenericDialect};
|
||||||
use crate::test_utils::TestedDialects;
|
use crate::test_utils::TestedDialects;
|
||||||
|
|
||||||
|
@ -5360,21 +5391,124 @@ mod tests {
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
|
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER(20)",
|
||||||
|
DataType::Character(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: None
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER(20 CHARACTERS)",
|
||||||
|
DataType::Character(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Characters)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER(20 OCTETS)",
|
||||||
|
DataType::Character(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Octets)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
|
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR(20)",
|
||||||
|
DataType::Char(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: None
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR(20 CHARACTERS)",
|
||||||
|
DataType::Char(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Characters)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR(20 OCTETS)",
|
||||||
|
DataType::Char(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Octets)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
test_parse_data_type!(
|
test_parse_data_type!(
|
||||||
dialect,
|
dialect,
|
||||||
"CHARACTER VARYING(20)",
|
"CHARACTER VARYING(20)",
|
||||||
DataType::CharacterVarying(Some(20))
|
DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: None
|
||||||
|
}))
|
||||||
);
|
);
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER VARYING(20 CHARACTERS)",
|
||||||
|
DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Characters)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER VARYING(20 OCTETS)",
|
||||||
|
DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Octets)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR VARYING(20)",
|
||||||
|
DataType::CharVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: None
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR VARYING(20 CHARACTERS)",
|
||||||
|
DataType::CharVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Characters)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHAR VARYING(20 OCTETS)",
|
||||||
|
DataType::CharVarying(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: Some(CharLengthUnits::Octets)
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"VARCHAR(20)",
|
||||||
|
DataType::Varchar(Some(CharacterLength {
|
||||||
|
length: 20,
|
||||||
|
unit: None
|
||||||
|
}))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -1945,7 +1945,10 @@ fn parse_create_table() {
|
||||||
vec![
|
vec![
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "name".into(),
|
name: "name".into(),
|
||||||
data_type: DataType::Varchar(Some(100)),
|
data_type: DataType::Varchar(Some(CharacterLength {
|
||||||
|
length: 100,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
|
||||||
vec![
|
vec![
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "name".into(),
|
name: "name".into(),
|
||||||
data_type: DataType::Varchar(Some(100)),
|
data_type: DataType::Varchar(Some(CharacterLength {
|
||||||
|
length: 100,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
|
||||||
columns,
|
columns,
|
||||||
vec![ColumnDef {
|
vec![ColumnDef {
|
||||||
name: "name".into(),
|
name: "name".into(),
|
||||||
data_type: DataType::Varchar(Some(100)),
|
data_type: DataType::Varchar(Some(CharacterLength {
|
||||||
|
length: 100,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
|
|
@ -74,7 +74,10 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "first_name".into(),
|
name: "first_name".into(),
|
||||||
data_type: DataType::CharacterVarying(Some(45)),
|
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 45,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -83,7 +86,10 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "last_name".into(),
|
name: "last_name".into(),
|
||||||
data_type: DataType::CharacterVarying(Some(45)),
|
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 45,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -92,7 +98,10 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "email".into(),
|
name: "email".into(),
|
||||||
data_type: DataType::CharacterVarying(Some(50)),
|
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||||
|
length: 50,
|
||||||
|
unit: None
|
||||||
|
})),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![],
|
options: vec![],
|
||||||
},
|
},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue