mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-09-03 20:50:33 +00:00
647 Adding all ansii character string types, parsing them, and differentiating between each one (#648)
This commit is contained in:
parent
977cdb2270
commit
f7f14df4b1
4 changed files with 106 additions and 78 deletions
|
@ -25,8 +25,14 @@ use super::value::escape_single_quote_string;
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
pub enum DataType {
|
pub enum DataType {
|
||||||
/// Fixed-length character type e.g. CHAR(10)
|
/// Fixed-length character type e.g. CHARACTER(10)
|
||||||
|
Character(Option<u64>),
|
||||||
|
/// Fixed-length char type e.g. CHAR(10)
|
||||||
Char(Option<u64>),
|
Char(Option<u64>),
|
||||||
|
/// Character varying type e.g. CHARACTER VARYING(10)
|
||||||
|
CharacterVarying(Option<u64>),
|
||||||
|
/// Char varying type e.g. CHAR VARYING(10)
|
||||||
|
CharVarying(Option<u64>),
|
||||||
/// Variable-length character type e.g. VARCHAR(10)
|
/// Variable-length character type e.g. VARCHAR(10)
|
||||||
Varchar(Option<u64>),
|
Varchar(Option<u64>),
|
||||||
/// Variable-length character type e.g. NVARCHAR(10)
|
/// Variable-length character type e.g. NVARCHAR(10)
|
||||||
|
@ -127,10 +133,17 @@ pub enum DataType {
|
||||||
impl fmt::Display for DataType {
|
impl fmt::Display for DataType {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
|
DataType::Character(size) => {
|
||||||
|
format_type_with_optional_length(f, "CHARACTER", size, false)
|
||||||
|
}
|
||||||
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
|
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
|
||||||
DataType::Varchar(size) => {
|
DataType::CharacterVarying(size) => {
|
||||||
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
|
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
|
||||||
}
|
}
|
||||||
|
DataType::CharVarying(size) => {
|
||||||
|
format_type_with_optional_length(f, "CHAR VARYING", size, false)
|
||||||
|
}
|
||||||
|
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),
|
||||||
DataType::Nvarchar(size) => {
|
DataType::Nvarchar(size) => {
|
||||||
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
format_type_with_optional_length(f, "NVARCHAR", size, false)
|
||||||
}
|
}
|
||||||
|
|
161
src/parser.rs
161
src/parser.rs
|
@ -3414,9 +3414,16 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
|
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
|
||||||
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
|
||||||
Keyword::CHAR | Keyword::CHARACTER => {
|
Keyword::CHARACTER => {
|
||||||
if self.parse_keyword(Keyword::VARYING) {
|
if self.parse_keyword(Keyword::VARYING) {
|
||||||
Ok(DataType::Varchar(self.parse_optional_precision()?))
|
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
|
||||||
|
} else {
|
||||||
|
Ok(DataType::Character(self.parse_optional_precision()?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Keyword::CHAR => {
|
||||||
|
if self.parse_keyword(Keyword::VARYING) {
|
||||||
|
Ok(DataType::CharVarying(self.parse_optional_precision()?))
|
||||||
} else {
|
} else {
|
||||||
Ok(DataType::Char(self.parse_optional_precision()?))
|
Ok(DataType::Char(self.parse_optional_precision()?))
|
||||||
}
|
}
|
||||||
|
@ -5288,80 +5295,88 @@ mod tests {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO add tests for all data types? https://github.com/sqlparser-rs/sqlparser-rs/issues/2
|
#[cfg(test)]
|
||||||
// TODO when we have dialect validation by data type parsing, split test
|
mod test_parse_data_type {
|
||||||
#[test]
|
use crate::ast::{DataType, TimezoneInfo};
|
||||||
fn test_parse_data_type() {
|
use crate::dialect::{AnsiDialect, GenericDialect};
|
||||||
// BINARY data type
|
use crate::test_utils::TestedDialects;
|
||||||
test_parse_data_type("BINARY", DataType::Binary(None), "BINARY");
|
|
||||||
test_parse_data_type("BINARY(20)", DataType::Binary(Some(20)), "BINARY(20)");
|
|
||||||
|
|
||||||
// BLOB data type
|
macro_rules! test_parse_data_type {
|
||||||
test_parse_data_type("BLOB", DataType::Blob(None), "BLOB");
|
($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{
|
||||||
test_parse_data_type("BLOB(50)", DataType::Blob(Some(50)), "BLOB(50)");
|
$dialect.run_parser_method(&*$input, |parser| {
|
||||||
|
|
||||||
// CLOB data type
|
|
||||||
test_parse_data_type("CLOB", DataType::Clob(None), "CLOB");
|
|
||||||
test_parse_data_type("CLOB(50)", DataType::Clob(Some(50)), "CLOB(50)");
|
|
||||||
|
|
||||||
// Double data type
|
|
||||||
test_parse_data_type(
|
|
||||||
"DOUBLE PRECISION",
|
|
||||||
DataType::DoublePrecision,
|
|
||||||
"DOUBLE PRECISION",
|
|
||||||
);
|
|
||||||
test_parse_data_type("DOUBLE", DataType::Double, "DOUBLE");
|
|
||||||
|
|
||||||
// Time data type
|
|
||||||
test_parse_data_type("TIME", DataType::Time(TimezoneInfo::None), "TIME");
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIME WITH TIME ZONE",
|
|
||||||
DataType::Time(TimezoneInfo::WithTimeZone),
|
|
||||||
"TIME WITH TIME ZONE",
|
|
||||||
);
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIME WITHOUT TIME ZONE",
|
|
||||||
DataType::Time(TimezoneInfo::WithoutTimeZone),
|
|
||||||
"TIME WITHOUT TIME ZONE",
|
|
||||||
);
|
|
||||||
test_parse_data_type("TIMETZ", DataType::Time(TimezoneInfo::Tz), "TIMETZ");
|
|
||||||
|
|
||||||
// Timestamp data type
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIMESTAMP",
|
|
||||||
DataType::Timestamp(TimezoneInfo::None),
|
|
||||||
"TIMESTAMP",
|
|
||||||
);
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIMESTAMP WITH TIME ZONE",
|
|
||||||
DataType::Timestamp(TimezoneInfo::WithTimeZone),
|
|
||||||
"TIMESTAMP WITH TIME ZONE",
|
|
||||||
);
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIMESTAMP WITHOUT TIME ZONE",
|
|
||||||
DataType::Timestamp(TimezoneInfo::WithoutTimeZone),
|
|
||||||
"TIMESTAMP WITHOUT TIME ZONE",
|
|
||||||
);
|
|
||||||
test_parse_data_type(
|
|
||||||
"TIMESTAMPTZ",
|
|
||||||
DataType::Timestamp(TimezoneInfo::Tz),
|
|
||||||
"TIMESTAMPTZ",
|
|
||||||
);
|
|
||||||
|
|
||||||
// VARBINARY data type
|
|
||||||
test_parse_data_type("VARBINARY", DataType::Varbinary(None), "VARBINARY");
|
|
||||||
test_parse_data_type(
|
|
||||||
"VARBINARY(20)",
|
|
||||||
DataType::Varbinary(Some(20)),
|
|
||||||
"VARBINARY(20)",
|
|
||||||
);
|
|
||||||
|
|
||||||
fn test_parse_data_type(input: &str, expected_type: DataType, expected_str: &str) {
|
|
||||||
all_dialects().run_parser_method(input, |parser| {
|
|
||||||
let data_type = parser.parse_data_type().unwrap();
|
let data_type = parser.parse_data_type().unwrap();
|
||||||
assert_eq!(data_type, expected_type);
|
assert_eq!(data_type, $expected_type);
|
||||||
assert_eq!(expected_type.to_string(), expected_str.to_string());
|
assert_eq!(data_type.to_string(), $input.to_string());
|
||||||
});
|
});
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ansii_character_string_types() {
|
||||||
|
// Character string types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-string-type>
|
||||||
|
let dialect = TestedDialects {
|
||||||
|
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
|
||||||
|
};
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "CHAR", DataType::Char(None));
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"CHARACTER VARYING(20)",
|
||||||
|
DataType::CharacterVarying(Some(20))
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ansii_datetime_types() {
|
||||||
|
// Datetime types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type>
|
||||||
|
let dialect = TestedDialects {
|
||||||
|
dialects: vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})],
|
||||||
|
};
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "DATE", DataType::Date);
|
||||||
|
|
||||||
|
test_parse_data_type!(dialect, "TIME", DataType::Time(TimezoneInfo::None));
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"TIME WITH TIME ZONE",
|
||||||
|
DataType::Time(TimezoneInfo::WithTimeZone)
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"TIME WITHOUT TIME ZONE",
|
||||||
|
DataType::Time(TimezoneInfo::WithoutTimeZone)
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"TIMESTAMP",
|
||||||
|
DataType::Timestamp(TimezoneInfo::None)
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"TIMESTAMP WITH TIME ZONE",
|
||||||
|
DataType::Timestamp(TimezoneInfo::WithTimeZone)
|
||||||
|
);
|
||||||
|
|
||||||
|
test_parse_data_type!(
|
||||||
|
dialect,
|
||||||
|
"TIMESTAMP WITHOUT TIME ZONE",
|
||||||
|
DataType::Timestamp(TimezoneInfo::WithoutTimeZone)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1850,7 +1850,7 @@ fn parse_create_table() {
|
||||||
let ast = one_statement_parses_to(
|
let ast = one_statement_parses_to(
|
||||||
sql,
|
sql,
|
||||||
"CREATE TABLE uk_cities (\
|
"CREATE TABLE uk_cities (\
|
||||||
name CHARACTER VARYING(100) NOT NULL, \
|
name VARCHAR(100) NOT NULL, \
|
||||||
lat DOUBLE NULL, \
|
lat DOUBLE NULL, \
|
||||||
lng DOUBLE, \
|
lng DOUBLE, \
|
||||||
constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \
|
constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \
|
||||||
|
@ -2312,7 +2312,7 @@ fn parse_create_external_table() {
|
||||||
let ast = one_statement_parses_to(
|
let ast = one_statement_parses_to(
|
||||||
sql,
|
sql,
|
||||||
"CREATE EXTERNAL TABLE uk_cities (\
|
"CREATE EXTERNAL TABLE uk_cities (\
|
||||||
name CHARACTER VARYING(100) NOT NULL, \
|
name VARCHAR(100) NOT NULL, \
|
||||||
lat DOUBLE NULL, \
|
lat DOUBLE NULL, \
|
||||||
lng DOUBLE) \
|
lng DOUBLE) \
|
||||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
||||||
|
@ -2382,7 +2382,7 @@ fn parse_create_or_replace_external_table() {
|
||||||
let ast = one_statement_parses_to(
|
let ast = one_statement_parses_to(
|
||||||
sql,
|
sql,
|
||||||
"CREATE OR REPLACE EXTERNAL TABLE uk_cities (\
|
"CREATE OR REPLACE EXTERNAL TABLE uk_cities (\
|
||||||
name CHARACTER VARYING(100) NOT NULL) \
|
name VARCHAR(100) NOT NULL) \
|
||||||
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
STORED AS TEXTFILE LOCATION '/tmp/example.csv'",
|
||||||
);
|
);
|
||||||
match ast {
|
match ast {
|
||||||
|
@ -2435,7 +2435,7 @@ fn parse_create_external_table_lowercase() {
|
||||||
let ast = one_statement_parses_to(
|
let ast = one_statement_parses_to(
|
||||||
sql,
|
sql,
|
||||||
"CREATE EXTERNAL TABLE uk_cities (\
|
"CREATE EXTERNAL TABLE uk_cities (\
|
||||||
name CHARACTER VARYING(100) NOT NULL, \
|
name VARCHAR(100) NOT NULL, \
|
||||||
lat DOUBLE NULL, \
|
lat DOUBLE NULL, \
|
||||||
lng DOUBLE) \
|
lng DOUBLE) \
|
||||||
STORED AS PARQUET LOCATION '/tmp/example.csv'",
|
STORED AS PARQUET LOCATION '/tmp/example.csv'",
|
||||||
|
|
|
@ -74,7 +74,7 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "first_name".into(),
|
name: "first_name".into(),
|
||||||
data_type: DataType::Varchar(Some(45)),
|
data_type: DataType::CharacterVarying(Some(45)),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -83,7 +83,7 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "last_name".into(),
|
name: "last_name".into(),
|
||||||
data_type: DataType::Varchar(Some(45)),
|
data_type: DataType::CharacterVarying(Some(45)),
|
||||||
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])),
|
||||||
options: vec![ColumnOptionDef {
|
options: vec![ColumnOptionDef {
|
||||||
name: None,
|
name: None,
|
||||||
|
@ -92,7 +92,7 @@ fn parse_create_table_with_defaults() {
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "email".into(),
|
name: "email".into(),
|
||||||
data_type: DataType::Varchar(Some(50)),
|
data_type: DataType::CharacterVarying(Some(50)),
|
||||||
collation: None,
|
collation: None,
|
||||||
options: vec![],
|
options: vec![],
|
||||||
},
|
},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue