From f0f6082eff61ab005490b1b7bf38a0a8e749d865 Mon Sep 17 00:00:00 2001 From: Zhiyuan Zheng Date: Mon, 8 Apr 2019 21:36:02 +0800 Subject: [PATCH 1/4] support create external table --- sql.sql | 1 + src/sqlast/mod.rs | 70 +++++++++++++++++++++++++ src/sqlparser.rs | 128 ++++++++++++++++++++++++++++------------------ 3 files changed, 150 insertions(+), 49 deletions(-) create mode 100644 sql.sql diff --git a/sql.sql b/sql.sql new file mode 100644 index 00000000..bc6fd49d --- /dev/null +++ b/sql.sql @@ -0,0 +1 @@ +CREATE EXTERNAL TABLE T (X INT) STORED AS TEXTFILE LOCATION '/home/admin/a.csv'; diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index fe30586b..31605b21 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -250,6 +250,15 @@ pub enum SQLStatement { /// Optional schema columns: Vec, }, + /// CREATE EXTERNAL TABLE + SQLCreateExternalTable { + /// Table name + name: SQLObjectName, + /// Optional schema + columns: Vec, + file_format: FileFormat, + location: String, + }, /// ALTER TABLE SQLAlterTable { /// Table name @@ -370,6 +379,17 @@ impl ToString for SQLStatement { .collect::>() .join(", ") ), + SQLStatement::SQLCreateExternalTable { name, columns, file_format, location } => format!( + "CREATE TABLE {} ({}) STORED AS {} LOCATION {}", + name.to_string(), + columns + .iter() + .map(|c| c.to_string()) + .collect::>() + .join(", "), + file_format.to_string(), + location + ), SQLStatement::SQLAlterTable { name, operation } => { format!("ALTER TABLE {} {}", name.to_string(), operation.to_string()) } @@ -429,3 +449,53 @@ impl ToString for SQLColumnDef { s } } + +/// External table's available file format +#[derive(Debug, Clone, PartialEq)] +pub enum FileFormat { + TEXTFILE, + SEQUENCEFILE, + ORC, + PARQUET, + AVRO, + RCFILE, + JSONFILE, +} + +impl ToString for FileFormat { + fn to_string(&self) -> String { + use self::FileFormat::*; + match self { + TEXTFILE => "TEXTFILE".to_string(), + SEQUENCEFILE => "SEQUENCEFILE".to_string(), + ORC => "ORC".to_string(), + PARQUET => "PARQUET".to_string(), + AVRO => "AVRO".to_string(), + RCFILE => "RCFILE".to_string(), + JSONFILE => "TEXTFILE".to_string(), + } + } +} + +use std::str::FromStr; +use sqlparser::ParserError; +impl FromStr for FileFormat { + type Err = ParserError; + + fn from_str(s: &str) -> Result { + use self::FileFormat::*; + match s { + "TEXTFILE" => Ok(TEXTFILE), + "SEQUENCEFILE" => Ok(SEQUENCEFILE), + "ORC" => Ok(ORC), + "PARQUET" => Ok(PARQUET), + "AVRO" => Ok(AVRO), + "RCFILE" => Ok(RCFILE), + "JSONFILE" => Ok(JSONFILE), + _ => Err(ParserError::ParserError(format!( + "Unexpected token for file format: {}", + s + ))) + } + } +} \ No newline at end of file diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 22d0e70c..89b7698c 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -620,6 +620,8 @@ impl Parser { } else if self.parse_keyword("MATERIALIZED") || self.parse_keyword("VIEW") { self.prev_token(); self.parse_create_view() + } else if self.parse_keyword("EXTERNAL") { + self.parse_create_external_table() } else { parser_err!(format!( "Unexpected token after CREATE: {:?}", @@ -628,6 +630,25 @@ impl Parser { } } + pub fn parse_create_external_table(&mut self) -> Result { + self.expect_keyword("TABLE")?; + let table_name = self.parse_object_name()?; + let columns = self.parse_columns()?; + self.expect_keyword("STORED")?; + self.expect_keyword("AS")?; + let file_format = self.parse_identifier()?.parse::()?; + + self.expect_keyword("LOCATION")?; + let location = self.parse_literal_string()?; + + Ok(SQLStatement::SQLCreateExternalTable { + name: table_name, + columns, + file_format, + location + }) + } + pub fn parse_create_view(&mut self) -> Result { let materialized = self.parse_keyword("MATERIALIZED"); self.expect_keyword("VIEW")?; @@ -650,62 +671,71 @@ impl Parser { pub fn parse_create_table(&mut self) -> Result { let table_name = self.parse_object_name()?; // parse optional column list (schema) - let mut columns = vec![]; - if self.consume_token(&Token::LParen) { - loop { - match self.next_token() { - Some(Token::SQLWord(column_name)) => { - let data_type = self.parse_data_type()?; - let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); - let is_unique = self.parse_keyword("UNIQUE"); - let default = if self.parse_keyword("DEFAULT") { - let expr = self.parse_default_expr(0)?; - Some(expr) - } else { - None - }; - let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { - false - } else if self.parse_keyword("NULL") { - true - } else { - true - }; - debug!("default: {:?}", default); + let columns = self.parse_columns()?; - columns.push(SQLColumnDef { - name: column_name.as_sql_ident(), - data_type: data_type, - allow_null, - is_primary, - is_unique, - default, - }); - match self.next_token() { - Some(Token::Comma) => {} - Some(Token::RParen) => { - break; - } - other => { - return parser_err!(format!( - "Expected ',' or ')' after column definition but found {:?}", - other - )); - } - } - } - unexpected => { - return parser_err!(format!("Expected column name, got {:?}", unexpected)); - } - } - } - } Ok(SQLStatement::SQLCreateTable { name: table_name, columns, }) } + fn parse_columns(&mut self) -> Result, ParserError> { + let mut columns = vec![]; + if !self.consume_token(&Token::LParen) { + return Ok(columns); + } + + loop { + match self.next_token() { + Some(Token::SQLWord(column_name)) => { + let data_type = self.parse_data_type()?; + let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); + let is_unique = self.parse_keyword("UNIQUE"); + let default = if self.parse_keyword("DEFAULT") { + let expr = self.parse_default_expr(0)?; + Some(expr) + } else { + None + }; + let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { + false + } else if self.parse_keyword("NULL") { + true + } else { + true + }; + debug!("default: {:?}", default); + + columns.push(SQLColumnDef { + name: column_name.as_sql_ident(), + data_type, + allow_null, + is_primary, + is_unique, + default, + }); + match self.next_token() { + Some(Token::Comma) => {} + Some(Token::RParen) => { + break; + } + other => { + return parser_err!(format!( + "Expected ',' or ')' after column definition but found {:?}", + other + )); + } + } + } + unexpected => { + return parser_err!(format!("Expected column name, got {:?}", unexpected)); + } + } + } + + Ok(columns) + } + pub fn parse_table_key(&mut self, constraint_name: SQLIdent) -> Result { let is_primary_key = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique_key = self.parse_keywords(vec!["UNIQUE", "KEY"]); From 26940920ac18ae9713f190bfe5b8ac56c8e89a10 Mon Sep 17 00:00:00 2001 From: Zhiyuan Zheng Date: Tue, 9 Apr 2019 13:28:01 +0800 Subject: [PATCH 2/4] Add unit tests. --- src/sqlast/mod.rs | 17 ++++--- src/sqlparser.rs | 8 +-- tests/sqlparser_generic.rs | 99 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 10 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 31605b21..bd2de545 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -379,8 +379,13 @@ impl ToString for SQLStatement { .collect::>() .join(", ") ), - SQLStatement::SQLCreateExternalTable { name, columns, file_format, location } => format!( - "CREATE TABLE {} ({}) STORED AS {} LOCATION {}", + SQLStatement::SQLCreateExternalTable { + name, + columns, + file_format, + location, + } => format!( + "CREATE EXTERNAL TABLE {} ({}) STORED AS {} LOCATION '{}'", name.to_string(), columns .iter() @@ -477,8 +482,8 @@ impl ToString for FileFormat { } } -use std::str::FromStr; use sqlparser::ParserError; +use std::str::FromStr; impl FromStr for FileFormat { type Err = ParserError; @@ -493,9 +498,9 @@ impl FromStr for FileFormat { "RCFILE" => Ok(RCFILE), "JSONFILE" => Ok(JSONFILE), _ => Err(ParserError::ParserError(format!( - "Unexpected token for file format: {}", + "Unexpected file format: {}", s - ))) + ))), } } -} \ No newline at end of file +} diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 89b7698c..726d9abe 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -645,7 +645,7 @@ impl Parser { name: table_name, columns, file_format, - location + location, }) } @@ -721,9 +721,9 @@ impl Parser { } other => { return parser_err!(format!( - "Expected ',' or ')' after column definition but found {:?}", - other - )); + "Expected ',' or ')' after column definition but found {:?}", + other + )); } } } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index b3e418a0..a46b3ee0 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -457,6 +457,105 @@ fn parse_create_table() { } } +#[test] +fn parse_create_external_table() { + let sql = String::from( + "CREATE EXTERNAL TABLE uk_cities (\ + name VARCHAR(100) NOT NULL,\ + lat DOUBLE NULL,\ + lng DOUBLE NULL)\ + STORED AS TEXTFILE LOCATION '/tmp/example.csv", + ); + let ast = one_statement_parses_to( + &sql, + "CREATE EXTERNAL TABLE uk_cities (\ + name character varying(100) NOT NULL, \ + lat double, \ + lng double) \ + STORED AS TEXTFILE LOCATION '/tmp/example.csv'", + ); + match ast { + SQLStatement::SQLCreateExternalTable { + name, + columns, + file_format, + location, + } => { + assert_eq!("uk_cities", name.to_string()); + assert_eq!(3, columns.len()); + + let c_name = &columns[0]; + assert_eq!("name", c_name.name); + assert_eq!(SQLType::Varchar(Some(100)), c_name.data_type); + assert_eq!(false, c_name.allow_null); + + let c_lat = &columns[1]; + assert_eq!("lat", c_lat.name); + assert_eq!(SQLType::Double, c_lat.data_type); + assert_eq!(true, c_lat.allow_null); + + let c_lng = &columns[2]; + assert_eq!("lng", c_lng.name); + assert_eq!(SQLType::Double, c_lng.data_type); + assert_eq!(true, c_lng.allow_null); + + assert_eq!(FileFormat::TEXTFILE, file_format); + assert_eq!("/tmp/example.csv", location); + } + _ => assert!(false), + } +} + +#[test] +fn parse_create_external_table_newline() { + let sql = String::from( + "CREATE EXTERNAL TABLE uk_cities (\ + name VARCHAR(100) NOT NULL,\ + lat DOUBLE NULL,\ + lng DOUBLE NULL)\ + STORED AS TEXTFILE + LOCATION '/tmp/example.csv", + ); + let ast = one_statement_parses_to( + &sql, + "CREATE EXTERNAL TABLE uk_cities (\ + name character varying(100) NOT NULL, \ + lat double, \ + lng double) \ + STORED AS TEXTFILE LOCATION '/tmp/example.csv'", + ); + match ast { + SQLStatement::SQLCreateExternalTable { + name, + columns, + file_format, + location, + } => { + assert_eq!("uk_cities", name.to_string()); + assert_eq!(3, columns.len()); + + let c_name = &columns[0]; + assert_eq!("name", c_name.name); + assert_eq!(SQLType::Varchar(Some(100)), c_name.data_type); + assert_eq!(false, c_name.allow_null); + + let c_lat = &columns[1]; + assert_eq!("lat", c_lat.name); + assert_eq!(SQLType::Double, c_lat.data_type); + assert_eq!(true, c_lat.allow_null); + + let c_lng = &columns[2]; + assert_eq!("lng", c_lng.name); + assert_eq!(SQLType::Double, c_lng.data_type); + assert_eq!(true, c_lng.allow_null); + + assert_eq!(FileFormat::TEXTFILE, file_format); + assert_eq!("/tmp/example.csv", location); + } + _ => assert!(false), + } +} + #[test] fn parse_scalar_function_in_projection() { let sql = "SELECT sqrt(id) FROM foo"; From 35556593f5a35fb368841273a89c36ab5f8164a5 Mon Sep 17 00:00:00 2001 From: Zhiyuan Zheng Date: Thu, 11 Apr 2019 10:53:33 +0800 Subject: [PATCH 3/4] Delete test sql file. --- sql.sql | 1 - 1 file changed, 1 deletion(-) delete mode 100644 sql.sql diff --git a/sql.sql b/sql.sql deleted file mode 100644 index bc6fd49d..00000000 --- a/sql.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE EXTERNAL TABLE T (X INT) STORED AS TEXTFILE LOCATION '/home/admin/a.csv'; From d8f824c400ddf573ef7103631b6ba0677892912f Mon Sep 17 00:00:00 2001 From: Zhiyuan Zheng Date: Sun, 14 Apr 2019 01:05:26 +0800 Subject: [PATCH 4/4] merge CreateExternalTable & CreateTable. --- src/sqlast/mod.rs | 45 ++++++++++++------------- src/sqlparser.rs | 10 ++++-- tests/sqlparser_generic.rs | 66 +++++++------------------------------ tests/sqlparser_postgres.rs | 24 ++++++++++++-- 4 files changed, 63 insertions(+), 82 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index bd2de545..eef10fcd 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -249,15 +249,9 @@ pub enum SQLStatement { name: SQLObjectName, /// Optional schema columns: Vec, - }, - /// CREATE EXTERNAL TABLE - SQLCreateExternalTable { - /// Table name - name: SQLObjectName, - /// Optional schema - columns: Vec, - file_format: FileFormat, - location: String, + external: bool, + file_format: Option, + location: Option, }, /// ALTER TABLE SQLAlterTable { @@ -370,21 +364,13 @@ impl ToString for SQLStatement { query.to_string() ) } - SQLStatement::SQLCreateTable { name, columns } => format!( - "CREATE TABLE {} ({})", - name.to_string(), - columns - .iter() - .map(|c| c.to_string()) - .collect::>() - .join(", ") - ), - SQLStatement::SQLCreateExternalTable { + SQLStatement::SQLCreateTable { name, columns, + external, file_format, location, - } => format!( + } if *external => format!( "CREATE EXTERNAL TABLE {} ({}) STORED AS {} LOCATION '{}'", name.to_string(), columns @@ -392,8 +378,23 @@ impl ToString for SQLStatement { .map(|c| c.to_string()) .collect::>() .join(", "), - file_format.to_string(), - location + file_format.as_ref().map(|f| f.to_string()).unwrap(), + location.as_ref().unwrap() + ), + SQLStatement::SQLCreateTable { + name, + columns, + external: _, + file_format: _, + location: _, + } => format!( + "CREATE TABLE {} ({})", + name.to_string(), + columns + .iter() + .map(|c| c.to_string()) + .collect::>() + .join(", ") ), SQLStatement::SQLAlterTable { name, operation } => { format!("ALTER TABLE {} {}", name.to_string(), operation.to_string()) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 726d9abe..624805a1 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -641,11 +641,12 @@ impl Parser { self.expect_keyword("LOCATION")?; let location = self.parse_literal_string()?; - Ok(SQLStatement::SQLCreateExternalTable { + Ok(SQLStatement::SQLCreateTable { name: table_name, columns, - file_format, - location, + external: true, + file_format: Some(file_format), + location: Some(location), }) } @@ -676,6 +677,9 @@ impl Parser { Ok(SQLStatement::SQLCreateTable { name: table_name, columns, + external: false, + file_format: None, + location: None, }) } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index a46b3ee0..9946448f 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -434,7 +434,13 @@ fn parse_create_table() { lng double)", ); match ast { - SQLStatement::SQLCreateTable { name, columns } => { + SQLStatement::SQLCreateTable { + name, + columns, + external: _, + file_format: _, + location: _, + } => { assert_eq!("uk_cities", name.to_string()); assert_eq!(3, columns.len()); @@ -475,9 +481,10 @@ fn parse_create_external_table() { STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); match ast { - SQLStatement::SQLCreateExternalTable { + SQLStatement::SQLCreateTable { name, columns, + external, file_format, location, } => { @@ -499,58 +506,9 @@ fn parse_create_external_table() { assert_eq!(SQLType::Double, c_lng.data_type); assert_eq!(true, c_lng.allow_null); - assert_eq!(FileFormat::TEXTFILE, file_format); - assert_eq!("/tmp/example.csv", location); - } - _ => assert!(false), - } -} - -#[test] -fn parse_create_external_table_newline() { - let sql = String::from( - "CREATE EXTERNAL TABLE uk_cities (\ - name VARCHAR(100) NOT NULL,\ - lat DOUBLE NULL,\ - lng DOUBLE NULL)\ - STORED AS TEXTFILE - LOCATION '/tmp/example.csv", - ); - let ast = one_statement_parses_to( - &sql, - "CREATE EXTERNAL TABLE uk_cities (\ - name character varying(100) NOT NULL, \ - lat double, \ - lng double) \ - STORED AS TEXTFILE LOCATION '/tmp/example.csv'", - ); - match ast { - SQLStatement::SQLCreateExternalTable { - name, - columns, - file_format, - location, - } => { - assert_eq!("uk_cities", name.to_string()); - assert_eq!(3, columns.len()); - - let c_name = &columns[0]; - assert_eq!("name", c_name.name); - assert_eq!(SQLType::Varchar(Some(100)), c_name.data_type); - assert_eq!(false, c_name.allow_null); - - let c_lat = &columns[1]; - assert_eq!("lat", c_lat.name); - assert_eq!(SQLType::Double, c_lat.data_type); - assert_eq!(true, c_lat.allow_null); - - let c_lng = &columns[2]; - assert_eq!("lng", c_lng.name); - assert_eq!(SQLType::Double, c_lng.data_type); - assert_eq!(true, c_lng.allow_null); - - assert_eq!(FileFormat::TEXTFILE, file_format); - assert_eq!("/tmp/example.csv", location); + assert!(external); + assert_eq!(FileFormat::TEXTFILE, file_format.unwrap()); + assert_eq!("/tmp/example.csv", location.unwrap()); } _ => assert!(false), } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 80e57176..4c35a3cf 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -163,7 +163,13 @@ fn parse_create_table_with_defaults() { active integer NOT NULL)", ); match one_statement_parses_to(&sql, "") { - SQLStatement::SQLCreateTable { name, columns } => { + SQLStatement::SQLCreateTable { + name, + columns, + external: _, + file_format: _, + location: _, + } => { assert_eq!("public.customer", name.to_string()); assert_eq!(10, columns.len()); @@ -204,7 +210,13 @@ fn parse_create_table_from_pg_dump() { active integer )"); match one_statement_parses_to(&sql, "") { - SQLStatement::SQLCreateTable { name, columns } => { + SQLStatement::SQLCreateTable { + name, + columns, + external: _, + file_format: _, + location: _, + } => { assert_eq!("public.customer", name.to_string()); let c_customer_id = &columns[0]; @@ -261,7 +273,13 @@ fn parse_create_table_with_inherit() { )", ); match verified_stmt(&sql) { - SQLStatement::SQLCreateTable { name, columns } => { + SQLStatement::SQLCreateTable { + name, + columns, + external: _, + file_format: _, + location: _, + } => { assert_eq!("bazaar.settings", name.to_string()); let c_name = &columns[0];