mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-04 06:18:17 +00:00
Add support for unit on char length units for small character string types. (#663)
This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING, CHAR VARYING, and VARCHAR.
This commit is contained in:
parent
777672625f
commit
cacdf3305f
6 changed files with 244 additions and 34 deletions
|
@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
|
|||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum DataType {
|
||||
/// Fixed-length character type e.g. CHARACTER(10)
|
||||
Character(Option<u64>),
|
||||
Character(Option<CharacterLength>),
|
||||
/// Fixed-length char type e.g. CHAR(10)
|
||||
Char(Option<u64>),
|
||||
Char(Option<CharacterLength>),
|
||||
/// Character varying type e.g. CHARACTER VARYING(10)
|
||||
CharacterVarying(Option<u64>),
|
||||
CharacterVarying(Option<CharacterLength>),
|
||||
/// Char varying type e.g. CHAR VARYING(10)
|
||||
CharVarying(Option<u64>),
|
||||
CharVarying(Option<CharacterLength>),
|
||||
/// Variable-length character type e.g. VARCHAR(10)
|
||||
Varchar(Option<u64>),
|
||||
Varchar(Option<CharacterLength>),
|
||||
/// Variable-length character type e.g. NVARCHAR(10)
|
||||
Nvarchar(Option<u64>),
|
||||
/// Uuid type
|
||||
|
@ -133,17 +133,14 @@ pub enum DataType {
|
|||
impl fmt::Display for DataType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DataType::Character(size) => {
|
||||
format_type_with_optional_length(f, "CHARACTER", size, false)
|
||||
}
|
||||
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
|
||||
DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
|
||||
DataType::Char(size) => format_character_string_type(f, "CHAR", size),
|
||||
DataType::CharacterVarying(size) => {
|
||||
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
|
||||
format_character_string_type(f, "CHARACTER VARYING", size)
|
||||
}
|
||||
DataType::CharVarying(size) => {
|
||||
format_type_with_optional_length(f, "CHAR VARYING", size, false)
|
||||
}
|
||||
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
|
||||
|
||||
DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
|
||||
DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
|
||||
DataType::Nvarchar(size) => {
|
||||
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
||||
}
|
||||
|
@ -247,6 +244,18 @@ fn format_type_with_optional_length(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn format_character_string_type(
|
||||
f: &mut fmt::Formatter,
|
||||
sql_type: &str,
|
||||
size: &Option<CharacterLength>,
|
||||
) -> fmt::Result {
|
||||
write!(f, "{}", sql_type)?;
|
||||
if let Some(size) = size {
|
||||
write!(f, "({})", size)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Timestamp and Time data types information about TimeZone formatting.
|
||||
///
|
||||
/// This is more related to a display information than real differences between each variant. To
|
||||
|
@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about [character length][1], including length and possibly unit.
|
||||
///
|
||||
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct CharacterLength {
|
||||
/// Default (if VARYING) or maximum (if not VARYING) length
|
||||
pub length: u64,
|
||||
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
|
||||
pub unit: Option<CharLengthUnits>,
|
||||
}
|
||||
|
||||
impl fmt::Display for CharacterLength {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.length)?;
|
||||
if let Some(unit) = &self.unit {
|
||||
write!(f, " {}", unit)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Possible units for characters, initially based on 2016 ANSI [standard][1].
|
||||
///
|
||||
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum CharLengthUnits {
|
||||
/// CHARACTERS unit
|
||||
Characters,
|
||||
/// OCTETS unit
|
||||
Octets,
|
||||
}
|
||||
|
||||
impl fmt::Display for CharLengthUnits {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Characters => {
|
||||
write!(f, "CHARACTERS")
|
||||
}
|
||||
Self::Octets => {
|
||||
write!(f, "OCTETS")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,9 +22,9 @@ use core::fmt;
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use self::data_type::DataType;
|
||||
pub use self::data_type::ExactNumberInfo;
|
||||
pub use self::data_type::TimezoneInfo;
|
||||
pub use self::data_type::{
|
||||
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||
};
|
||||
pub use self::ddl::{
|
||||
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
|
||||
ReferentialAction, TableConstraint,
|
||||
|
|
|
@ -123,6 +123,7 @@ define_keywords!(
|
|||
CHANGE,
|
||||
CHAR,
|
||||
CHARACTER,
|
||||
CHARACTERS,
|
||||
CHARACTER_LENGTH,
|
||||
CHARSET,
|
||||
CHAR_LENGTH,
|
||||
|
@ -372,6 +373,7 @@ define_keywords!(
|
|||
NVARCHAR,
|
||||
OBJECT,
|
||||
OCCURRENCES_REGEX,
|
||||
OCTETS,
|
||||
OCTET_LENGTH,
|
||||
OF,
|
||||
OFFSET,
|
||||
|
|
156
src/parser.rs
156
src/parser.rs
|
@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
|
|||
Ok(DataType::BigInt(optional_precision?))
|
||||
}
|
||||
}
|
||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
|
||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
|
||||
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
||||
Keyword::CHARACTER => {
|
||||
if self.parse_keyword(Keyword::VARYING) {
|
||||
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
|
||||
Ok(DataType::CharacterVarying(
|
||||
self.parse_optional_character_length()?,
|
||||
))
|
||||
} else {
|
||||
Ok(DataType::Character(self.parse_optional_precision()?))
|
||||
Ok(DataType::Character(self.parse_optional_character_length()?))
|
||||
}
|
||||
}
|
||||
Keyword::CHAR => {
|
||||
if self.parse_keyword(Keyword::VARYING) {
|
||||
Ok(DataType::CharVarying(self.parse_optional_precision()?))
|
||||
Ok(DataType::CharVarying(
|
||||
self.parse_optional_character_length()?,
|
||||
))
|
||||
} else {
|
||||
Ok(DataType::Char(self.parse_optional_precision()?))
|
||||
Ok(DataType::Char(self.parse_optional_character_length()?))
|
||||
}
|
||||
}
|
||||
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
|
||||
|
@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_optional_character_length(
|
||||
&mut self,
|
||||
) -> Result<Option<CharacterLength>, ParserError> {
|
||||
if self.consume_token(&Token::LParen) {
|
||||
let character_length = self.parse_character_length()?;
|
||||
self.expect_token(&Token::RParen)?;
|
||||
Ok(Some(character_length))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
|
||||
let length = self.parse_literal_uint()?;
|
||||
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
|
||||
Some(CharLengthUnits::Characters)
|
||||
} else if self.parse_keyword(Keyword::OCTETS) {
|
||||
Some(CharLengthUnits::Octets)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(CharacterLength { length, unit })
|
||||
}
|
||||
|
||||
pub fn parse_optional_precision_scale(
|
||||
&mut self,
|
||||
) -> Result<(Option<u64>, Option<u64>), ParserError> {
|
||||
|
@ -5337,7 +5366,9 @@ mod tests {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test_parse_data_type {
|
||||
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo};
|
||||
use crate::ast::{
|
||||
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
|
||||
};
|
||||
use crate::dialect::{AnsiDialect, GenericDialect};
|
||||
use crate::test_utils::TestedDialects;
|
||||
|
||||
|
@ -5360,21 +5391,124 @@ mod tests {
|
|||
|
||||
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
|
||||
|
||||
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER(20)",
|
||||
DataType::Character(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: None
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER(20 CHARACTERS)",
|
||||
DataType::Character(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Characters)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER(20 OCTETS)",
|
||||
DataType::Character(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Octets)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR(20)",
|
||||
DataType::Char(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: None
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR(20 CHARACTERS)",
|
||||
DataType::Char(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Characters)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR(20 OCTETS)",
|
||||
DataType::Char(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Octets)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER VARYING(20)",
|
||||
DataType::CharacterVarying(Some(20))
|
||||
DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: None
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER VARYING(20 CHARACTERS)",
|
||||
DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Characters)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER VARYING(20 OCTETS)",
|
||||
DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Octets)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR VARYING(20)",
|
||||
DataType::CharVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: None
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR VARYING(20 CHARACTERS)",
|
||||
DataType::CharVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Characters)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHAR VARYING(20 OCTETS)",
|
||||
DataType::CharVarying(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: Some(CharLengthUnits::Octets)
|
||||
}))
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"VARCHAR(20)",
|
||||
DataType::Varchar(Some(CharacterLength {
|
||||
length: 20,
|
||||
unit: None
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1945,7 +1945,10 @@ fn parse_create_table() {
|
|||
vec![
|
||||
ColumnDef {
|
||||
name: "name".into(),
|
||||
data_type: DataType::Varchar(Some(100)),
|
||||
data_type: DataType::Varchar(Some(CharacterLength {
|
||||
length: 100,
|
||||
unit: None
|
||||
})),
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
|
|||
vec![
|
||||
ColumnDef {
|
||||
name: "name".into(),
|
||||
data_type: DataType::Varchar(Some(100)),
|
||||
data_type: DataType::Varchar(Some(CharacterLength {
|
||||
length: 100,
|
||||
unit: None
|
||||
})),
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
|
|||
columns,
|
||||
vec![ColumnDef {
|
||||
name: "name".into(),
|
||||
data_type: DataType::Varchar(Some(100)),
|
||||
data_type: DataType::Varchar(Some(CharacterLength {
|
||||
length: 100,
|
||||
unit: None
|
||||
})),
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
|
|
@ -74,7 +74,10 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "first_name".into(),
|
||||
data_type: DataType::CharacterVarying(Some(45)),
|
||||
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 45,
|
||||
unit: None
|
||||
})),
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -83,7 +86,10 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "last_name".into(),
|
||||
data_type: DataType::CharacterVarying(Some(45)),
|
||||
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 45,
|
||||
unit: None
|
||||
})),
|
||||
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -92,7 +98,10 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "email".into(),
|
||||
data_type: DataType::CharacterVarying(Some(50)),
|
||||
data_type: DataType::CharacterVarying(Some(CharacterLength {
|
||||
length: 50,
|
||||
unit: None
|
||||
})),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue