mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
Remove some non ANSI SQL support
This commit is contained in:
parent
751a6f0880
commit
5bac9fd131
3 changed files with 75 additions and 164 deletions
|
@ -69,12 +69,6 @@ pub enum ASTNode {
|
|||
name: String,
|
||||
/// Optional schema
|
||||
columns: Vec<SQLColumnDef>,
|
||||
/// File type (CSV or Parquet)
|
||||
file_type: FileType,
|
||||
/// For CSV files, indicate whether the file has a header row or not
|
||||
header_row: bool,
|
||||
/// Path to file or directory contianing files
|
||||
location: String,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -101,22 +95,29 @@ pub enum SQLType {
|
|||
Varbinary(usize),
|
||||
/// Large binary object e.g. BLOB(1000)
|
||||
Blob(usize),
|
||||
|
||||
|
||||
//TODO: remove these non ANSI sql stypes
|
||||
|
||||
/// Decimal type with precision and optional scale e.g. DECIMAL(10,2)
|
||||
Decimal(usize, Option<usize>),
|
||||
/// Small integer
|
||||
SmallInt,
|
||||
/// Integer
|
||||
Int,
|
||||
/// Big integer
|
||||
BigInt,
|
||||
/// Floating point with precision e.g. FLOAT(8)
|
||||
Float(usize),
|
||||
/// Floating point e.g. REAL
|
||||
Real,
|
||||
/// Double e.g. DOUBLE PRECISION
|
||||
Double,
|
||||
/// Boolean
|
||||
Boolean,
|
||||
UInt8,
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Int8,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
Float32,
|
||||
Double64,
|
||||
Utf8(usize),
|
||||
/// Date
|
||||
Date,
|
||||
/// Time
|
||||
Time,
|
||||
/// Timestamp
|
||||
Timestamp,
|
||||
|
||||
}
|
||||
|
||||
/// SQL Operator
|
||||
|
|
162
src/sqlparser.rs
162
src/sqlparser.rs
|
@ -309,13 +309,8 @@ impl Parser {
|
|||
true
|
||||
}
|
||||
|
||||
// fn parse_identifier(&mut self) -> Result<ASTNode::SQLIdentifier, Err> {
|
||||
// let expr = self.parse_expr()?;
|
||||
// match expr {
|
||||
// Some(ASTNode::SQLIdentifier { .. }) => Ok(expr),
|
||||
// _ => parser_err!(format!("Expected identifier but found {:?}", expr)))
|
||||
// }
|
||||
// }
|
||||
|
||||
//TODO: this function is inconsistent and sometimes returns bool and sometimes fails
|
||||
|
||||
/// Consume the next token if it matches the expected token, otherwise return an error
|
||||
fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> {
|
||||
|
@ -336,7 +331,7 @@ impl Parser {
|
|||
|
||||
/// Parse a SQL CREATE statement
|
||||
fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
|
||||
if self.parse_keywords(vec!["EXTERNAL", "TABLE"]) {
|
||||
if self.parse_keywords(vec!["TABLE"]) {
|
||||
match self.next_token() {
|
||||
Some(Token::Identifier(id)) => {
|
||||
// parse optional column list (schema)
|
||||
|
@ -388,39 +383,9 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
//println!("Parsed {} column defs", columns.len());
|
||||
|
||||
let mut headers = true;
|
||||
let file_type: FileType = if self.parse_keywords(vec!["STORED", "AS", "CSV"]) {
|
||||
if self.parse_keywords(vec!["WITH", "HEADER", "ROW"]) {
|
||||
headers = true;
|
||||
} else if self.parse_keywords(vec!["WITHOUT", "HEADER", "ROW"]) {
|
||||
headers = false;
|
||||
}
|
||||
FileType::CSV
|
||||
} else if self.parse_keywords(vec!["STORED", "AS", "NDJSON"]) {
|
||||
FileType::NdJson
|
||||
} else if self.parse_keywords(vec!["STORED", "AS", "PARQUET"]) {
|
||||
FileType::Parquet
|
||||
} else {
|
||||
return parser_err!(format!(
|
||||
"Expected 'STORED AS' clause, found {:?}",
|
||||
self.peek_token()
|
||||
));
|
||||
};
|
||||
|
||||
let location: String = if self.parse_keywords(vec!["LOCATION"]) {
|
||||
self.parse_literal_string()?
|
||||
} else {
|
||||
return parser_err!("Missing 'LOCATION' clause");
|
||||
};
|
||||
|
||||
Ok(ASTNode::SQLCreateTable {
|
||||
name: id,
|
||||
columns,
|
||||
file_type,
|
||||
header_row: headers,
|
||||
location,
|
||||
})
|
||||
}
|
||||
_ => parser_err!(format!(
|
||||
|
@ -459,32 +424,35 @@ impl Parser {
|
|||
match self.next_token() {
|
||||
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
|
||||
"BOOLEAN" => Ok(SQLType::Boolean),
|
||||
"UINT8" => Ok(SQLType::UInt8),
|
||||
"UINT16" => Ok(SQLType::UInt16),
|
||||
"UINT32" => Ok(SQLType::UInt32),
|
||||
"UINT64" => Ok(SQLType::UInt64),
|
||||
"INT8" => Ok(SQLType::Int8),
|
||||
"INT16" => Ok(SQLType::Int16),
|
||||
"INT32" | "INT" | "INTEGER" => Ok(SQLType::Int32),
|
||||
"INT64" | "LONG" => Ok(SQLType::Int64),
|
||||
"FLOAT32" | "FLOAT" => Ok(SQLType::Float32),
|
||||
"FLOAT64" | "DOUBLE" => Ok(SQLType::Double64),
|
||||
"UTF8" | "VARCHAR" | "STRING" => {
|
||||
// optional length
|
||||
if self.consume_token(&Token::LParen)? {
|
||||
let n = self.parse_literal_int()?;
|
||||
self.consume_token(&Token::RParen)?;
|
||||
Ok(SQLType::Utf8(n as usize))
|
||||
} else {
|
||||
Ok(SQLType::Utf8(100 as usize))
|
||||
}
|
||||
}
|
||||
"FLOAT" => Ok(SQLType::Float(self.parse_precision()?)),
|
||||
"REAL" => Ok(SQLType::Real),
|
||||
"DOUBLE" => Ok(SQLType::Double),
|
||||
"SMALLINT" => Ok(SQLType::SmallInt),
|
||||
"INT" | "INTEGER" => Ok(SQLType::Int),
|
||||
"BIGINT" => Ok(SQLType::BigInt),
|
||||
"VARCHAR" => Ok(SQLType::Varchar(self.parse_precision()?)),
|
||||
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
|
||||
},
|
||||
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_precision(&mut self) -> Result<usize, ParserError> {
|
||||
//TODO: error handling
|
||||
Ok(self.parse_optional_precision()?.unwrap())
|
||||
}
|
||||
|
||||
fn parse_optional_precision(&mut self) -> Result<Option<usize>, ParserError> {
|
||||
if self.consume_token(&Token::LParen)? {
|
||||
let n = self.parse_literal_int()?;
|
||||
//TODO: check return value of reading rparen
|
||||
self.consume_token(&Token::RParen)?;
|
||||
Ok(Some(n as usize))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a SELECT statement
|
||||
fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
|
||||
let projection = self.parse_expr_list()?;
|
||||
|
@ -807,7 +775,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn parse_cast() {
|
||||
let sql = String::from("SELECT CAST(id AS DOUBLE) FROM customer");
|
||||
let sql = String::from("SELECT CAST(id AS BIGINT) FROM customer");
|
||||
let ast = parse_sql(&sql);
|
||||
match ast {
|
||||
ASTNode::SQLSelect { projection, .. } => {
|
||||
|
@ -815,7 +783,7 @@ mod tests {
|
|||
assert_eq!(
|
||||
ASTNode::SQLCast {
|
||||
expr: Box::new(ASTNode::SQLIdentifier("id".to_string())),
|
||||
data_type: SQLType::Double64
|
||||
data_type: SQLType::BigInt
|
||||
},
|
||||
projection[0]
|
||||
);
|
||||
|
@ -825,103 +793,41 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_external_table_csv_with_header_row() {
|
||||
fn parse_create_table() {
|
||||
let sql = String::from(
|
||||
"CREATE EXTERNAL TABLE uk_cities (\
|
||||
"CREATE TABLE uk_cities (\
|
||||
name VARCHAR(100) NOT NULL,\
|
||||
lat DOUBLE NULL,\
|
||||
lng DOUBLE NULL) \
|
||||
STORED AS CSV WITH HEADER ROW \
|
||||
LOCATION '/mnt/ssd/uk_cities.csv'",
|
||||
lng DOUBLE NULL)",
|
||||
);
|
||||
let ast = parse_sql(&sql);
|
||||
match ast {
|
||||
ASTNode::SQLCreateTable {
|
||||
name,
|
||||
columns,
|
||||
file_type,
|
||||
header_row,
|
||||
location,
|
||||
} => {
|
||||
assert_eq!("uk_cities", name);
|
||||
assert_eq!(3, columns.len());
|
||||
assert_eq!(FileType::CSV, file_type);
|
||||
assert_eq!(true, header_row);
|
||||
assert_eq!("/mnt/ssd/uk_cities.csv", location);
|
||||
|
||||
let c_name = &columns[0];
|
||||
assert_eq!("name", c_name.name);
|
||||
assert_eq!(SQLType::Utf8(100), c_name.data_type);
|
||||
assert_eq!(SQLType::Varchar(100), c_name.data_type);
|
||||
assert_eq!(false, c_name.allow_null);
|
||||
|
||||
let c_lat = &columns[1];
|
||||
assert_eq!("lat", c_lat.name);
|
||||
assert_eq!(SQLType::Double64, c_lat.data_type);
|
||||
assert_eq!(SQLType::Double, c_lat.data_type);
|
||||
assert_eq!(true, c_lat.allow_null);
|
||||
|
||||
let c_lng = &columns[2];
|
||||
assert_eq!("lng", c_lng.name);
|
||||
assert_eq!(SQLType::Double64, c_lng.data_type);
|
||||
assert_eq!(SQLType::Double, c_lng.data_type);
|
||||
assert_eq!(true, c_lng.allow_null);
|
||||
}
|
||||
_ => assert!(false),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_external_table_csv_without_header_row() {
|
||||
let sql = String::from(
|
||||
"CREATE EXTERNAL TABLE uk_cities (\
|
||||
name VARCHAR(100) NOT NULL,\
|
||||
lat DOUBLE NOT NULL,\
|
||||
lng DOUBLE NOT NULL) \
|
||||
STORED AS CSV WITHOUT HEADER ROW \
|
||||
LOCATION '/mnt/ssd/uk_cities.csv'",
|
||||
);
|
||||
let ast = parse_sql(&sql);
|
||||
match ast {
|
||||
ASTNode::SQLCreateTable {
|
||||
name,
|
||||
columns,
|
||||
file_type,
|
||||
header_row,
|
||||
location,
|
||||
} => {
|
||||
assert_eq!("uk_cities", name);
|
||||
assert_eq!(3, columns.len());
|
||||
assert_eq!(FileType::CSV, file_type);
|
||||
assert_eq!(false, header_row);
|
||||
assert_eq!("/mnt/ssd/uk_cities.csv", location);
|
||||
}
|
||||
_ => assert!(false),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_create_external_table_parquet() {
|
||||
let sql = String::from(
|
||||
"CREATE EXTERNAL TABLE uk_cities \
|
||||
STORED AS PARQUET \
|
||||
LOCATION '/mnt/ssd/uk_cities.parquet'",
|
||||
);
|
||||
let ast = parse_sql(&sql);
|
||||
match ast {
|
||||
ASTNode::SQLCreateTable {
|
||||
name,
|
||||
columns,
|
||||
file_type,
|
||||
location,
|
||||
..
|
||||
} => {
|
||||
assert_eq!("uk_cities", name);
|
||||
assert_eq!(0, columns.len());
|
||||
assert_eq!(FileType::Parquet, file_type);
|
||||
assert_eq!("/mnt/ssd/uk_cities.parquet", location);
|
||||
}
|
||||
_ => assert!(false),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_scalar_function_in_projection() {
|
||||
let sql = String::from("SELECT sqrt(id) FROM foo");
|
||||
|
|
|
@ -108,27 +108,31 @@ lazy_static! {
|
|||
m.insert("ROW");
|
||||
|
||||
// SQL types
|
||||
m.insert("STRING");
|
||||
m.insert("CHAR");
|
||||
m.insert("CHARACTER");
|
||||
m.insert("VARYING");
|
||||
m.insert("LARGE");
|
||||
m.insert("OBJECT");
|
||||
m.insert("VARCHAR");
|
||||
m.insert("CLOB");
|
||||
m.insert("BINARY");
|
||||
m.insert("VARBINARY");
|
||||
m.insert("BLOB");
|
||||
m.insert("FLOAT");
|
||||
m.insert("REAL");
|
||||
m.insert("DOUBLE");
|
||||
m.insert("PRECISION");
|
||||
m.insert("INT");
|
||||
m.insert("INTEGER");
|
||||
m.insert("LONG");
|
||||
|
||||
// Arrow native types
|
||||
m.insert("SMALLINT");
|
||||
m.insert("BIGINT");
|
||||
m.insert("NUMERIC");
|
||||
m.insert("DECIMAL");
|
||||
m.insert("DEC");
|
||||
m.insert("BOOLEAN");
|
||||
m.insert("UINT8");
|
||||
m.insert("UINT16");
|
||||
m.insert("UINT32");
|
||||
m.insert("UINT64");
|
||||
m.insert("INT8");
|
||||
m.insert("INT16");
|
||||
m.insert("INT32");
|
||||
m.insert("INT64");
|
||||
m.insert("FLOAT32");
|
||||
m.insert("FLOAT64");
|
||||
m.insert("UTF8");
|
||||
m.insert("DATE");
|
||||
m.insert("TIME");
|
||||
m.insert("TIMESTAMP");
|
||||
|
||||
m
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue