Add support for unit on char length units for small character string types. (#663)

This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING,
CHAR VARYING, and VARCHAR.
This commit is contained in:
AugustoFKL 2022-10-11 09:54:15 -03:00 committed by GitHub
parent 777672625f
commit cacdf3305f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 244 additions and 34 deletions

View file

@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DataType { pub enum DataType {
/// Fixed-length character type e.g. CHARACTER(10) /// Fixed-length character type e.g. CHARACTER(10)
Character(Option<u64>), Character(Option<CharacterLength>),
/// Fixed-length char type e.g. CHAR(10) /// Fixed-length char type e.g. CHAR(10)
Char(Option<u64>), Char(Option<CharacterLength>),
/// Character varying type e.g. CHARACTER VARYING(10) /// Character varying type e.g. CHARACTER VARYING(10)
CharacterVarying(Option<u64>), CharacterVarying(Option<CharacterLength>),
/// Char varying type e.g. CHAR VARYING(10) /// Char varying type e.g. CHAR VARYING(10)
CharVarying(Option<u64>), CharVarying(Option<CharacterLength>),
/// Variable-length character type e.g. VARCHAR(10) /// Variable-length character type e.g. VARCHAR(10)
Varchar(Option<u64>), Varchar(Option<CharacterLength>),
/// Variable-length character type e.g. NVARCHAR(10) /// Variable-length character type e.g. NVARCHAR(10)
Nvarchar(Option<u64>), Nvarchar(Option<u64>),
/// Uuid type /// Uuid type
@ -133,17 +133,14 @@ pub enum DataType {
impl fmt::Display for DataType { impl fmt::Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
DataType::Character(size) => { DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
format_type_with_optional_length(f, "CHARACTER", size, false) DataType::Char(size) => format_character_string_type(f, "CHAR", size),
}
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
DataType::CharacterVarying(size) => { DataType::CharacterVarying(size) => {
format_type_with_optional_length(f, "CHARACTER VARYING", size, false) format_character_string_type(f, "CHARACTER VARYING", size)
} }
DataType::CharVarying(size) => {
format_type_with_optional_length(f, "CHAR VARYING", size, false) DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
} DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
DataType::Nvarchar(size) => { DataType::Nvarchar(size) => {
format_type_with_optional_length(f, "NVARCHAR", size, false) format_type_with_optional_length(f, "NVARCHAR", size, false)
} }
@ -247,6 +244,18 @@ fn format_type_with_optional_length(
Ok(()) Ok(())
} }
fn format_character_string_type(
f: &mut fmt::Formatter,
sql_type: &str,
size: &Option<CharacterLength>,
) -> fmt::Result {
write!(f, "{}", sql_type)?;
if let Some(size) = size {
write!(f, "({})", size)?;
}
Ok(())
}
/// Timestamp and Time data types information about TimeZone formatting. /// Timestamp and Time data types information about TimeZone formatting.
/// ///
/// This is more related to a display information than real differences between each variant. To /// This is more related to a display information than real differences between each variant. To
@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
} }
} }
} }
/// Information about [character length][1], including length and possibly unit.
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CharacterLength {
/// Default (if VARYING) or maximum (if not VARYING) length
pub length: u64,
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
pub unit: Option<CharLengthUnits>,
}
impl fmt::Display for CharacterLength {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.length)?;
if let Some(unit) = &self.unit {
write!(f, " {}", unit)?;
}
Ok(())
}
}
/// Possible units for characters, initially based on 2016 ANSI [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum CharLengthUnits {
/// CHARACTERS unit
Characters,
/// OCTETS unit
Octets,
}
impl fmt::Display for CharLengthUnits {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Characters => {
write!(f, "CHARACTERS")
}
Self::Octets => {
write!(f, "OCTETS")
}
}
}
}

View file

@ -22,9 +22,9 @@ use core::fmt;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
pub use self::data_type::DataType; pub use self::data_type::{
pub use self::data_type::ExactNumberInfo; CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
pub use self::data_type::TimezoneInfo; };
pub use self::ddl::{ pub use self::ddl::{
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
ReferentialAction, TableConstraint, ReferentialAction, TableConstraint,

View file

@ -123,6 +123,7 @@ define_keywords!(
CHANGE, CHANGE,
CHAR, CHAR,
CHARACTER, CHARACTER,
CHARACTERS,
CHARACTER_LENGTH, CHARACTER_LENGTH,
CHARSET, CHARSET,
CHAR_LENGTH, CHAR_LENGTH,
@ -372,6 +373,7 @@ define_keywords!(
NVARCHAR, NVARCHAR,
OBJECT, OBJECT,
OCCURRENCES_REGEX, OCCURRENCES_REGEX,
OCTETS,
OCTET_LENGTH, OCTET_LENGTH,
OF, OF,
OFFSET, OFFSET,

View file

@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
Ok(DataType::BigInt(optional_precision?)) Ok(DataType::BigInt(optional_precision?))
} }
} }
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)), Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)), Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
Keyword::CHARACTER => { Keyword::CHARACTER => {
if self.parse_keyword(Keyword::VARYING) { if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharacterVarying(self.parse_optional_precision()?)) Ok(DataType::CharacterVarying(
self.parse_optional_character_length()?,
))
} else { } else {
Ok(DataType::Character(self.parse_optional_precision()?)) Ok(DataType::Character(self.parse_optional_character_length()?))
} }
} }
Keyword::CHAR => { Keyword::CHAR => {
if self.parse_keyword(Keyword::VARYING) { if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharVarying(self.parse_optional_precision()?)) Ok(DataType::CharVarying(
self.parse_optional_character_length()?,
))
} else { } else {
Ok(DataType::Char(self.parse_optional_precision()?)) Ok(DataType::Char(self.parse_optional_character_length()?))
} }
} }
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
} }
} }
pub fn parse_optional_character_length(
&mut self,
) -> Result<Option<CharacterLength>, ParserError> {
if self.consume_token(&Token::LParen) {
let character_length = self.parse_character_length()?;
self.expect_token(&Token::RParen)?;
Ok(Some(character_length))
} else {
Ok(None)
}
}
pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
let length = self.parse_literal_uint()?;
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
Some(CharLengthUnits::Characters)
} else if self.parse_keyword(Keyword::OCTETS) {
Some(CharLengthUnits::Octets)
} else {
None
};
Ok(CharacterLength { length, unit })
}
pub fn parse_optional_precision_scale( pub fn parse_optional_precision_scale(
&mut self, &mut self,
) -> Result<(Option<u64>, Option<u64>), ParserError> { ) -> Result<(Option<u64>, Option<u64>), ParserError> {
@ -5337,7 +5366,9 @@ mod tests {
#[cfg(test)] #[cfg(test)]
mod test_parse_data_type { mod test_parse_data_type {
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo}; use crate::ast::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
use crate::dialect::{AnsiDialect, GenericDialect}; use crate::dialect::{AnsiDialect, GenericDialect};
use crate::test_utils::TestedDialects; use crate::test_utils::TestedDialects;
@ -5360,21 +5391,124 @@ mod tests {
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20))); test_parse_data_type!(
dialect,
"CHARACTER(20)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: None
}))
);
test_parse_data_type!(
dialect,
"CHARACTER(20 CHARACTERS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);
test_parse_data_type!(
dialect,
"CHARACTER(20 OCTETS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);
test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20))); test_parse_data_type!(
dialect,
"CHAR(20)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: None
}))
);
test_parse_data_type!(
dialect,
"CHAR(20 CHARACTERS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);
test_parse_data_type!(
dialect,
"CHAR(20 OCTETS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);
test_parse_data_type!( test_parse_data_type!(
dialect, dialect,
"CHARACTER VARYING(20)", "CHARACTER VARYING(20)",
DataType::CharacterVarying(Some(20)) DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: None
}))
); );
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20))); test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 CHARACTERS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20))); test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 OCTETS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);
test_parse_data_type!(
dialect,
"CHAR VARYING(20)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: None
}))
);
test_parse_data_type!(
dialect,
"CHAR VARYING(20 CHARACTERS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);
test_parse_data_type!(
dialect,
"CHAR VARYING(20 OCTETS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);
test_parse_data_type!(
dialect,
"VARCHAR(20)",
DataType::Varchar(Some(CharacterLength {
length: 20,
unit: None
}))
);
} }
#[test] #[test]

View file

@ -1945,7 +1945,10 @@ fn parse_create_table() {
vec![ vec![
ColumnDef { ColumnDef {
name: "name".into(), name: "name".into(),
data_type: DataType::Varchar(Some(100)), data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None, collation: None,
options: vec![ColumnOptionDef { options: vec![ColumnOptionDef {
name: None, name: None,
@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
vec![ vec![
ColumnDef { ColumnDef {
name: "name".into(), name: "name".into(),
data_type: DataType::Varchar(Some(100)), data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None, collation: None,
options: vec![ColumnOptionDef { options: vec![ColumnOptionDef {
name: None, name: None,
@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
columns, columns,
vec![ColumnDef { vec![ColumnDef {
name: "name".into(), name: "name".into(),
data_type: DataType::Varchar(Some(100)), data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None, collation: None,
options: vec![ColumnOptionDef { options: vec![ColumnOptionDef {
name: None, name: None,

View file

@ -74,7 +74,10 @@ fn parse_create_table_with_defaults() {
}, },
ColumnDef { ColumnDef {
name: "first_name".into(), name: "first_name".into(),
data_type: DataType::CharacterVarying(Some(45)), data_type: DataType::CharacterVarying(Some(CharacterLength {
length: 45,
unit: None
})),
collation: None, collation: None,
options: vec![ColumnOptionDef { options: vec![ColumnOptionDef {
name: None, name: None,
@ -83,7 +86,10 @@ fn parse_create_table_with_defaults() {
}, },
ColumnDef { ColumnDef {
name: "last_name".into(), name: "last_name".into(),
data_type: DataType::CharacterVarying(Some(45)), data_type: DataType::CharacterVarying(Some(CharacterLength {
length: 45,
unit: None
})),
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])), collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
options: vec![ColumnOptionDef { options: vec![ColumnOptionDef {
name: None, name: None,
@ -92,7 +98,10 @@ fn parse_create_table_with_defaults() {
}, },
ColumnDef { ColumnDef {
name: "email".into(), name: "email".into(),
data_type: DataType::CharacterVarying(Some(50)), data_type: DataType::CharacterVarying(Some(CharacterLength {
length: 50,
unit: None
})),
collation: None, collation: None,
options: vec![], options: vec![],
}, },