mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-31 11:17:23 +00:00
647 Adding all ansii character string types, parsing them, and differentiating between each one (#648)
This commit is contained in:
parent
977cdb2270
commit
f7f14df4b1
4 changed files with 106 additions and 78 deletions
|
@ -25,8 +25,14 @@ use super::value::escape_single_quote_string;
|
|||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub enum DataType {
|
||||
/// Fixed-length character type e.g. CHAR(10)
|
||||
/// Fixed-length character type e.g. CHARACTER(10)
|
||||
Character(Option<u64>),
|
||||
/// Fixed-length char type e.g. CHAR(10)
|
||||
Char(Option<u64>),
|
||||
/// Character varying type e.g. CHARACTER VARYING(10)
|
||||
CharacterVarying(Option<u64>),
|
||||
/// Char varying type e.g. CHAR VARYING(10)
|
||||
CharVarying(Option<u64>),
|
||||
/// Variable-length character type e.g. VARCHAR(10)
|
||||
Varchar(Option<u64>),
|
||||
/// Variable-length character type e.g. NVARCHAR(10)
|
||||
|
@ -127,10 +133,17 @@ pub enum DataType {
|
|||
impl fmt::Display for DataType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DataType::Character(size) => {
|
||||
format_type_with_optional_length(f, "CHARACTER", size, false)
|
||||
}
|
||||
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
|
||||
DataType::Varchar(size) => {
|
||||
DataType::CharacterVarying(size) => {
|
||||
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
|
||||
}
|
||||
DataType::CharVarying(size) => {
|
||||
format_type_with_optional_length(f, "CHAR VARYING", size, false)
|
||||
}
|
||||
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
|
||||
DataType::Nvarchar(size) => {
|
||||
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
||||
}
|
||||
|
|
161
src/parser.rs
161
src/parser.rs
|
@ -3414,9 +3414,16 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
|
||||
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
||||
Keyword::CHAR | Keyword::CHARACTER => {
|
||||
Keyword::CHARACTER => {
|
||||
if self.parse_keyword(Keyword::VARYING) {
|
||||
Ok(DataType::Varchar(self.parse_optional_precision()?))
|
||||
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
|
||||
} else {
|
||||
Ok(DataType::Character(self.parse_optional_precision()?))
|
||||
}
|
||||
}
|
||||
Keyword::CHAR => {
|
||||
if self.parse_keyword(Keyword::VARYING) {
|
||||
Ok(DataType::CharVarying(self.parse_optional_precision()?))
|
||||
} else {
|
||||
Ok(DataType::Char(self.parse_optional_precision()?))
|
||||
}
|
||||
|
@ -5288,80 +5295,88 @@ mod tests {
|
|||
});
|
||||
}
|
||||
|
||||
// TODO add tests for all data types? https://github.com/sqlparser-rs/sqlparser-rs/issues/2
|
||||
// TODO when we have dialect validation by data type parsing, split test
|
||||
#[test]
|
||||
fn test_parse_data_type() {
|
||||
// BINARY data type
|
||||
test_parse_data_type("BINARY", DataType::Binary(None), "BINARY");
|
||||
test_parse_data_type("BINARY(20)", DataType::Binary(Some(20)), "BINARY(20)");
|
||||
#[cfg(test)]
|
||||
mod test_parse_data_type {
|
||||
use crate::ast::{DataType, TimezoneInfo};
|
||||
use crate::dialect::{AnsiDialect, GenericDialect};
|
||||
use crate::test_utils::TestedDialects;
|
||||
|
||||
// BLOB data type
|
||||
test_parse_data_type("BLOB", DataType::Blob(None), "BLOB");
|
||||
test_parse_data_type("BLOB(50)", DataType::Blob(Some(50)), "BLOB(50)");
|
||||
|
||||
// CLOB data type
|
||||
test_parse_data_type("CLOB", DataType::Clob(None), "CLOB");
|
||||
test_parse_data_type("CLOB(50)", DataType::Clob(Some(50)), "CLOB(50)");
|
||||
|
||||
// Double data type
|
||||
test_parse_data_type(
|
||||
"DOUBLE PRECISION",
|
||||
DataType::DoublePrecision,
|
||||
"DOUBLE PRECISION",
|
||||
);
|
||||
test_parse_data_type("DOUBLE", DataType::Double, "DOUBLE");
|
||||
|
||||
// Time data type
|
||||
test_parse_data_type("TIME", DataType::Time(TimezoneInfo::None), "TIME");
|
||||
test_parse_data_type(
|
||||
"TIME WITH TIME ZONE",
|
||||
DataType::Time(TimezoneInfo::WithTimeZone),
|
||||
"TIME WITH TIME ZONE",
|
||||
);
|
||||
test_parse_data_type(
|
||||
"TIME WITHOUT TIME ZONE",
|
||||
DataType::Time(TimezoneInfo::WithoutTimeZone),
|
||||
"TIME WITHOUT TIME ZONE",
|
||||
);
|
||||
test_parse_data_type("TIMETZ", DataType::Time(TimezoneInfo::Tz), "TIMETZ");
|
||||
|
||||
// Timestamp data type
|
||||
test_parse_data_type(
|
||||
"TIMESTAMP",
|
||||
DataType::Timestamp(TimezoneInfo::None),
|
||||
"TIMESTAMP",
|
||||
);
|
||||
test_parse_data_type(
|
||||
"TIMESTAMP WITH TIME ZONE",
|
||||
DataType::Timestamp(TimezoneInfo::WithTimeZone),
|
||||
"TIMESTAMP WITH TIME ZONE",
|
||||
);
|
||||
test_parse_data_type(
|
||||
"TIMESTAMP WITHOUT TIME ZONE",
|
||||
DataType::Timestamp(TimezoneInfo::WithoutTimeZone),
|
||||
"TIMESTAMP WITHOUT TIME ZONE",
|
||||
);
|
||||
test_parse_data_type(
|
||||
"TIMESTAMPTZ",
|
||||
DataType::Timestamp(TimezoneInfo::Tz),
|
||||
"TIMESTAMPTZ",
|
||||
);
|
||||
|
||||
// VARBINARY data type
|
||||
test_parse_data_type("VARBINARY", DataType::Varbinary(None), "VARBINARY");
|
||||
test_parse_data_type(
|
||||
"VARBINARY(20)",
|
||||
DataType::Varbinary(Some(20)),
|
||||
"VARBINARY(20)",
|
||||
);
|
||||
|
||||
fn test_parse_data_type(input: &str, expected_type: DataType, expected_str: &str) {
|
||||
all_dialects().run_parser_method(input, |parser| {
|
||||
macro_rules! test_parse_data_type {
|
||||
($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{
|
||||
$dialect.run_parser_method(&*$input, |parser| {
|
||||
let data_type = parser.parse_data_type().unwrap();
|
||||
assert_eq!(data_type, expected_type);
|
||||
assert_eq!(expected_type.to_string(), expected_str.to_string());
|
||||
assert_eq!(data_type, $expected_type);
|
||||
assert_eq!(data_type.to_string(), $input.to_string());
|
||||
});
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ansii_character_string_types() {
|
||||
// Character string types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-string-type>
|
||||
let dialect = TestedDialects {
|
||||
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
|
||||
};
|
||||
|
||||
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
|
||||
|
||||
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"CHARACTER VARYING(20)",
|
||||
DataType::CharacterVarying(Some(20))
|
||||
);
|
||||
|
||||
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
|
||||
|
||||
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ansii_datetime_types() {
|
||||
// Datetime types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type>
|
||||
let dialect = TestedDialects {
|
||||
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
|
||||
};
|
||||
|
||||
test_parse_data_type!(dialect, "DATE", DataType::Date);
|
||||
|
||||
test_parse_data_type!(dialect, "TIME", DataType::Time(TimezoneInfo::None));
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"TIME WITH TIME ZONE",
|
||||
DataType::Time(TimezoneInfo::WithTimeZone)
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"TIME WITHOUT TIME ZONE",
|
||||
DataType::Time(TimezoneInfo::WithoutTimeZone)
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"TIMESTAMP",
|
||||
DataType::Timestamp(TimezoneInfo::None)
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"TIMESTAMP WITH TIME ZONE",
|
||||
DataType::Timestamp(TimezoneInfo::WithTimeZone)
|
||||
);
|
||||
|
||||
test_parse_data_type!(
|
||||
dialect,
|
||||
"TIMESTAMP WITHOUT TIME ZONE",
|
||||
DataType::Timestamp(TimezoneInfo::WithoutTimeZone)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1850,7 +1850,7 @@ fn parse_create_table() {
|
|||
let ast = one_statement_parses_to(
|
||||
sql,
|
||||
"CREATE TABLE uk_cities (\
|
||||
name CHARACTER VARYING(100) NOT NULL, \
|
||||
name VARCHAR(100) NOT NULL, \
|
||||
lat DOUBLE NULL, \
|
||||
lng DOUBLE, \
|
||||
constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \
|
||||
|
@ -2312,7 +2312,7 @@ fn parse_create_external_table() {
|
|||
let ast = one_statement_parses_to(
|
||||
sql,
|
||||
"CREATE EXTERNAL TABLE uk_cities (\
|
||||
name CHARACTER VARYING(100) NOT NULL, \
|
||||
name VARCHAR(100) NOT NULL, \
|
||||
lat DOUBLE NULL, \
|
||||
lng DOUBLE) \
|
||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
||||
|
@ -2382,7 +2382,7 @@ fn parse_create_or_replace_external_table() {
|
|||
let ast = one_statement_parses_to(
|
||||
sql,
|
||||
"CREATE OR REPLACE EXTERNAL TABLE uk_cities (\
|
||||
name CHARACTER VARYING(100) NOT NULL) \
|
||||
name VARCHAR(100) NOT NULL) \
|
||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
||||
);
|
||||
match ast {
|
||||
|
@ -2435,7 +2435,7 @@ fn parse_create_external_table_lowercase() {
|
|||
let ast = one_statement_parses_to(
|
||||
sql,
|
||||
"CREATE EXTERNAL TABLE uk_cities (\
|
||||
name CHARACTER VARYING(100) NOT NULL, \
|
||||
name VARCHAR(100) NOT NULL, \
|
||||
lat DOUBLE NULL, \
|
||||
lng DOUBLE) \
|
||||
STORED AS PARQUET LOCATION '/tmp/example.csv'",
|
||||
|
|
|
@ -74,7 +74,7 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "first_name".into(),
|
||||
data_type: DataType::Varchar(Some(45)),
|
||||
data_type: DataType::CharacterVarying(Some(45)),
|
||||
collation: None,
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -83,7 +83,7 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "last_name".into(),
|
||||
data_type: DataType::Varchar(Some(45)),
|
||||
data_type: DataType::CharacterVarying(Some(45)),
|
||||
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
||||
options: vec![ColumnOptionDef {
|
||||
name: None,
|
||||
|
@ -92,7 +92,7 @@ fn parse_create_table_with_defaults() {
|
|||
},
|
||||
ColumnDef {
|
||||
name: "email".into(),
|
||||
data_type: DataType::Varchar(Some(50)),
|
||||
data_type: DataType::CharacterVarying(Some(50)),
|
||||
collation: None,
|
||||
options: vec![],
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue