mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-07 07:48:02 +00:00
Add implementation for parsing SQL COPY
This commit is contained in:
parent
719df789e4
commit
199ec67da7
4 changed files with 123 additions and 0 deletions
|
@ -441,6 +441,8 @@ impl Dialect for GenericSqlDialect {
|
||||||
"BYTEA",
|
"BYTEA",
|
||||||
"TRUE",
|
"TRUE",
|
||||||
"FALSE",
|
"FALSE",
|
||||||
|
"COPY",
|
||||||
|
"STDIN",
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,14 @@ pub enum ASTNode {
|
||||||
/// VALUES (vector of rows to insert)
|
/// VALUES (vector of rows to insert)
|
||||||
values: Vec<Vec<ASTNode>>,
|
values: Vec<Vec<ASTNode>>,
|
||||||
},
|
},
|
||||||
|
SQLCopy{
|
||||||
|
/// TABLE
|
||||||
|
table_name: String,
|
||||||
|
/// COLUMNS
|
||||||
|
columns: Vec<String>,
|
||||||
|
/// VALUES a vector of values to be copied
|
||||||
|
values: Vec<SQLValue>,
|
||||||
|
},
|
||||||
/// UPDATE
|
/// UPDATE
|
||||||
SQLUpdate {
|
SQLUpdate {
|
||||||
/// TABLE
|
/// TABLE
|
||||||
|
@ -113,6 +121,23 @@ pub enum ASTNode {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SQL values such as int, double, string timestamp
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum SQLValue{
|
||||||
|
/// Literal signed long
|
||||||
|
SQLLiteralLong(i64),
|
||||||
|
/// Literal floating point value
|
||||||
|
SQLLiteralDouble(f64),
|
||||||
|
/// Literal string
|
||||||
|
SQLLiteralString(String),
|
||||||
|
/// Boolean value true or false,
|
||||||
|
SQLBoolean(bool),
|
||||||
|
/// NULL value in insert statements,
|
||||||
|
SQLNullValue,
|
||||||
|
/// Timestamp
|
||||||
|
SQLLiteralTimestamp(String),
|
||||||
|
}
|
||||||
|
|
||||||
/// SQL assignment `foo = expr` as used in SQLUpdate
|
/// SQL assignment `foo = expr` as used in SQLUpdate
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct SQLAssigment {
|
pub struct SQLAssigment {
|
||||||
|
|
|
@ -93,6 +93,7 @@ impl Parser {
|
||||||
"CREATE" => Ok(self.parse_create()?),
|
"CREATE" => Ok(self.parse_create()?),
|
||||||
"DELETE" => Ok(self.parse_delete()?),
|
"DELETE" => Ok(self.parse_delete()?),
|
||||||
"INSERT" => Ok(self.parse_insert()?),
|
"INSERT" => Ok(self.parse_insert()?),
|
||||||
|
"COPY" => Ok(self.parse_copy()?),
|
||||||
"TRUE" => Ok(ASTNode::SQLBoolean(true)),
|
"TRUE" => Ok(ASTNode::SQLBoolean(true)),
|
||||||
"FALSE" => Ok(ASTNode::SQLBoolean(false)),
|
"FALSE" => Ok(ASTNode::SQLBoolean(false)),
|
||||||
"NULL" => Ok(ASTNode::SQLNullValue),
|
"NULL" => Ok(ASTNode::SQLNullValue),
|
||||||
|
@ -450,6 +451,70 @@ impl Parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a copy statement
|
||||||
|
pub fn parse_copy(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
let table_name = self.parse_tablename()?;
|
||||||
|
let columns = if self.consume_token(&Token::LParen)?{
|
||||||
|
let column_names = self.parse_column_names()?;
|
||||||
|
self.consume_token(&Token::RParen)?;
|
||||||
|
column_names
|
||||||
|
}else{
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
self.parse_keyword("FROM");
|
||||||
|
self.parse_keyword("STDIN");
|
||||||
|
self.consume_token(&Token::SemiColon);
|
||||||
|
let values = self.parse_tsv()?;
|
||||||
|
Ok(ASTNode::SQLCopy{table_name, columns, values})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a tab separated values in
|
||||||
|
/// COPY payload
|
||||||
|
fn parse_tsv(&mut self) -> Result<Vec<SQLValue>, ParserError>{
|
||||||
|
let mut values: Vec<SQLValue> = vec![];
|
||||||
|
loop {
|
||||||
|
if let Ok(true) = self.consume_token(&Token::Backslash){
|
||||||
|
if let Ok(true) = self.consume_token(&Token::Period) {
|
||||||
|
break;
|
||||||
|
}else{
|
||||||
|
//TODO: handle escape of values in characters
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
values.push(self.parse_sql_value()?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(values)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_sql_value(&mut self) -> Result<SQLValue, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(t) => {
|
||||||
|
match t {
|
||||||
|
Token::Keyword(k) => match k.to_uppercase().as_ref() {
|
||||||
|
"TRUE" => Ok(SQLValue::SQLBoolean(true)),
|
||||||
|
"FALSE" => Ok(SQLValue::SQLBoolean(false)),
|
||||||
|
"NULL" => Ok(SQLValue::SQLNullValue),
|
||||||
|
_ => return parser_err!(format!("No value parser for keyword {}", k)),
|
||||||
|
},
|
||||||
|
//TODO: parse the timestamp here
|
||||||
|
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
|
||||||
|
Ok(n) => Ok(SQLValue::SQLLiteralDouble(n)),
|
||||||
|
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||||
|
},
|
||||||
|
Token::Number(ref n) => match n.parse::<i64>() {
|
||||||
|
Ok(n) => Ok(SQLValue::SQLLiteralLong(n)),
|
||||||
|
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||||
|
},
|
||||||
|
Token::Identifier(id) => Ok(SQLValue::SQLLiteralString(id.to_string())),
|
||||||
|
Token::String(ref s) => Ok(SQLValue::SQLLiteralString(s.to_string())),
|
||||||
|
other => parser_err!(format!("Unsupported value: {:?}", self.peek_token())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => parser_err!("Expecting a value, but found EOF"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a literal integer/long
|
/// Parse a literal integer/long
|
||||||
pub fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
|
pub fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
|
||||||
match self.next_token() {
|
match self.next_token() {
|
||||||
|
@ -1299,6 +1364,31 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_copy_example(){
|
||||||
|
let sql = String::from("
|
||||||
|
COPY public.actor (actor_id, first_name, last_name, last_update) FROM stdin;
|
||||||
|
1 PENELOPE GUINESS 2006-02-15 09:34:33
|
||||||
|
2 NICK WAHLBERG 2006-02-15 09:34:33
|
||||||
|
3 ED CHASE 2006-02-15 09:34:33
|
||||||
|
4 JENNIFER DAVIS 2006-02-15 09:34:33
|
||||||
|
5 JOHNNY LOLLOBRIGIDA 2006-02-15 09:34:33
|
||||||
|
6 BETTE NICHOLSON 2006-02-15 09:34:33
|
||||||
|
7 GRACE MOSTEL 2006-02-15 09:34:33
|
||||||
|
8 MATTHEW JOHANSSON 2006-02-15 09:34:33
|
||||||
|
9 JOE SWANK 2006-02-15 09:34:33
|
||||||
|
10 CHRISTIAN GABLE 2006-02-15 09:34:33
|
||||||
|
11 ZERO CAGE 2006-02-15 09:34:33
|
||||||
|
12 KARL BERRY 2006-02-15 09:34:33
|
||||||
|
\\.
|
||||||
|
");
|
||||||
|
let mut parser = parser(&sql);
|
||||||
|
let ast = parser.parse();
|
||||||
|
println!("ast: {:?}", ast);
|
||||||
|
assert!(ast.is_ok());
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_scalar_function_in_projection() {
|
fn parse_scalar_function_in_projection() {
|
||||||
let sql = String::from("SELECT sqrt(id) FROM foo");
|
let sql = String::from("SELECT sqrt(id) FROM foo");
|
||||||
|
|
|
@ -70,6 +70,10 @@ pub enum Token {
|
||||||
Colon,
|
Colon,
|
||||||
/// DoubleColon `::` (used for casting in postgresql)
|
/// DoubleColon `::` (used for casting in postgresql)
|
||||||
DoubleColon,
|
DoubleColon,
|
||||||
|
/// SemiColon `;` used as separator for COPY and payload
|
||||||
|
SemiColon,
|
||||||
|
/// Backslash `\` used in terminating the COPY payload with `\.`
|
||||||
|
Backslash,
|
||||||
/// Left bracket `[`
|
/// Left bracket `[`
|
||||||
LBracket,
|
LBracket,
|
||||||
/// Right bracket `]`
|
/// Right bracket `]`
|
||||||
|
@ -265,6 +269,8 @@ impl<'a> Tokenizer<'a> {
|
||||||
None => Ok(Some(Token::Colon)),
|
None => Ok(Some(Token::Colon)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
';' => self.consume_and_return(chars, Token::SemiColon),
|
||||||
|
'\\' => self.consume_and_return(chars, Token::Backslash),
|
||||||
// brakets
|
// brakets
|
||||||
'[' => self.consume_and_return(chars, Token::LBracket),
|
'[' => self.consume_and_return(chars, Token::LBracket),
|
||||||
']' => self.consume_and_return(chars, Token::RBracket),
|
']' => self.consume_and_return(chars, Token::RBracket),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue