mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-08-07 07:48:02 +00:00
Add implementation for parsing SQL COPY
This commit is contained in:
parent
719df789e4
commit
199ec67da7
4 changed files with 123 additions and 0 deletions
|
@ -441,6 +441,8 @@ impl Dialect for GenericSqlDialect {
|
|||
"BYTEA",
|
||||
"TRUE",
|
||||
"FALSE",
|
||||
"COPY",
|
||||
"STDIN",
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,14 @@ pub enum ASTNode {
|
|||
/// VALUES (vector of rows to insert)
|
||||
values: Vec<Vec<ASTNode>>,
|
||||
},
|
||||
SQLCopy{
|
||||
/// TABLE
|
||||
table_name: String,
|
||||
/// COLUMNS
|
||||
columns: Vec<String>,
|
||||
/// VALUES a vector of values to be copied
|
||||
values: Vec<SQLValue>,
|
||||
},
|
||||
/// UPDATE
|
||||
SQLUpdate {
|
||||
/// TABLE
|
||||
|
@ -113,6 +121,23 @@ pub enum ASTNode {
|
|||
},
|
||||
}
|
||||
|
||||
/// SQL values such as int, double, string timestamp
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum SQLValue{
|
||||
/// Literal signed long
|
||||
SQLLiteralLong(i64),
|
||||
/// Literal floating point value
|
||||
SQLLiteralDouble(f64),
|
||||
/// Literal string
|
||||
SQLLiteralString(String),
|
||||
/// Boolean value true or false,
|
||||
SQLBoolean(bool),
|
||||
/// NULL value in insert statements,
|
||||
SQLNullValue,
|
||||
/// Timestamp
|
||||
SQLLiteralTimestamp(String),
|
||||
}
|
||||
|
||||
/// SQL assignment `foo = expr` as used in SQLUpdate
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct SQLAssigment {
|
||||
|
|
|
@ -93,6 +93,7 @@ impl Parser {
|
|||
"CREATE" => Ok(self.parse_create()?),
|
||||
"DELETE" => Ok(self.parse_delete()?),
|
||||
"INSERT" => Ok(self.parse_insert()?),
|
||||
"COPY" => Ok(self.parse_copy()?),
|
||||
"TRUE" => Ok(ASTNode::SQLBoolean(true)),
|
||||
"FALSE" => Ok(ASTNode::SQLBoolean(false)),
|
||||
"NULL" => Ok(ASTNode::SQLNullValue),
|
||||
|
@ -450,6 +451,70 @@ impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a copy statement
|
||||
pub fn parse_copy(&mut self) -> Result<ASTNode, ParserError> {
|
||||
let table_name = self.parse_tablename()?;
|
||||
let columns = if self.consume_token(&Token::LParen)?{
|
||||
let column_names = self.parse_column_names()?;
|
||||
self.consume_token(&Token::RParen)?;
|
||||
column_names
|
||||
}else{
|
||||
vec![]
|
||||
};
|
||||
self.parse_keyword("FROM");
|
||||
self.parse_keyword("STDIN");
|
||||
self.consume_token(&Token::SemiColon);
|
||||
let values = self.parse_tsv()?;
|
||||
Ok(ASTNode::SQLCopy{table_name, columns, values})
|
||||
}
|
||||
|
||||
/// Parse a tab separated values in
|
||||
/// COPY payload
|
||||
fn parse_tsv(&mut self) -> Result<Vec<SQLValue>, ParserError>{
|
||||
let mut values: Vec<SQLValue> = vec![];
|
||||
loop {
|
||||
if let Ok(true) = self.consume_token(&Token::Backslash){
|
||||
if let Ok(true) = self.consume_token(&Token::Period) {
|
||||
break;
|
||||
}else{
|
||||
//TODO: handle escape of values in characters
|
||||
}
|
||||
}else{
|
||||
values.push(self.parse_sql_value()?);
|
||||
}
|
||||
}
|
||||
Ok(values)
|
||||
|
||||
}
|
||||
|
||||
fn parse_sql_value(&mut self) -> Result<SQLValue, ParserError> {
|
||||
match self.next_token() {
|
||||
Some(t) => {
|
||||
match t {
|
||||
Token::Keyword(k) => match k.to_uppercase().as_ref() {
|
||||
"TRUE" => Ok(SQLValue::SQLBoolean(true)),
|
||||
"FALSE" => Ok(SQLValue::SQLBoolean(false)),
|
||||
"NULL" => Ok(SQLValue::SQLNullValue),
|
||||
_ => return parser_err!(format!("No value parser for keyword {}", k)),
|
||||
},
|
||||
//TODO: parse the timestamp here
|
||||
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
|
||||
Ok(n) => Ok(SQLValue::SQLLiteralDouble(n)),
|
||||
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||
},
|
||||
Token::Number(ref n) => match n.parse::<i64>() {
|
||||
Ok(n) => Ok(SQLValue::SQLLiteralLong(n)),
|
||||
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||
},
|
||||
Token::Identifier(id) => Ok(SQLValue::SQLLiteralString(id.to_string())),
|
||||
Token::String(ref s) => Ok(SQLValue::SQLLiteralString(s.to_string())),
|
||||
other => parser_err!(format!("Unsupported value: {:?}", self.peek_token())),
|
||||
}
|
||||
}
|
||||
None => parser_err!("Expecting a value, but found EOF"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a literal integer/long
|
||||
pub fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
|
||||
match self.next_token() {
|
||||
|
@ -1299,6 +1364,31 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_copy_example(){
|
||||
let sql = String::from("
|
||||
COPY public.actor (actor_id, first_name, last_name, last_update) FROM stdin;
|
||||
1 PENELOPE GUINESS 2006-02-15 09:34:33
|
||||
2 NICK WAHLBERG 2006-02-15 09:34:33
|
||||
3 ED CHASE 2006-02-15 09:34:33
|
||||
4 JENNIFER DAVIS 2006-02-15 09:34:33
|
||||
5 JOHNNY LOLLOBRIGIDA 2006-02-15 09:34:33
|
||||
6 BETTE NICHOLSON 2006-02-15 09:34:33
|
||||
7 GRACE MOSTEL 2006-02-15 09:34:33
|
||||
8 MATTHEW JOHANSSON 2006-02-15 09:34:33
|
||||
9 JOE SWANK 2006-02-15 09:34:33
|
||||
10 CHRISTIAN GABLE 2006-02-15 09:34:33
|
||||
11 ZERO CAGE 2006-02-15 09:34:33
|
||||
12 KARL BERRY 2006-02-15 09:34:33
|
||||
\\.
|
||||
");
|
||||
let mut parser = parser(&sql);
|
||||
let ast = parser.parse();
|
||||
println!("ast: {:?}", ast);
|
||||
assert!(ast.is_ok());
|
||||
panic!();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_scalar_function_in_projection() {
|
||||
let sql = String::from("SELECT sqrt(id) FROM foo");
|
||||
|
|
|
@ -70,6 +70,10 @@ pub enum Token {
|
|||
Colon,
|
||||
/// DoubleColon `::` (used for casting in postgresql)
|
||||
DoubleColon,
|
||||
/// SemiColon `;` used as separator for COPY and payload
|
||||
SemiColon,
|
||||
/// Backslash `\` used in terminating the COPY payload with `\.`
|
||||
Backslash,
|
||||
/// Left bracket `[`
|
||||
LBracket,
|
||||
/// Right bracket `]`
|
||||
|
@ -265,6 +269,8 @@ impl<'a> Tokenizer<'a> {
|
|||
None => Ok(Some(Token::Colon)),
|
||||
}
|
||||
}
|
||||
';' => self.consume_and_return(chars, Token::SemiColon),
|
||||
'\\' => self.consume_and_return(chars, Token::Backslash),
|
||||
// brakets
|
||||
'[' => self.consume_and_return(chars, Token::LBracket),
|
||||
']' => self.consume_and_return(chars, Token::RBracket),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue