From 72cff632c1e58a703955d51fe1e30d986296c1ed Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 1 Sep 2018 08:39:27 -0600 Subject: [PATCH] Roughing out pratt parser logic --- examples/acme_parser.rs | 61 +++++++++++++++++++++++++++---------- src/ansi/parser.rs | 2 +- src/ansi/tokenizer.rs | 4 +++ src/parser.rs | 67 +++++++++++++++++++++++------------------ src/tokenizer.rs | 3 ++ 5 files changed, 91 insertions(+), 46 deletions(-) diff --git a/examples/acme_parser.rs b/examples/acme_parser.rs index a6f94a1e..5a9755ca 100644 --- a/examples/acme_parser.rs +++ b/examples/acme_parser.rs @@ -7,24 +7,21 @@ use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer; use datafusion_sql::tokenizer::*; use datafusion_sql::parser::*; -/// /// This example demonstrates building a custom ACME parser that extends the generic parser -/// by adding support for a factorial operator !! -/// +/// by adding support for a factorial expression `!! expr`. +/// Custom SQLToken #[derive(Debug,PartialEq)] enum AcmeToken { - /// Factorial operator `!!` + /// Factorial token `!!` Factorial } +/// Custom SQLExpr #[derive(Debug)] -enum AcmeOperator { - Factorial -} - -#[derive(Debug)] -enum AcmeTokenizerError { +enum AcmeExpr { + /// Factorial expression + Factorial(Box>) } struct AcmeTokenizer { @@ -34,6 +31,10 @@ struct AcmeTokenizer { /// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else impl SQLTokenizer for AcmeTokenizer { + fn precedence(&self, token: &SQLToken) -> usize { + unimplemented!() + } + fn peek_token(&self, chars: &mut Peekable) -> Result>, TokenizerError> { unimplemented!() } @@ -53,7 +54,7 @@ impl SQLTokenizer for AcmeTokenizer { }, None => Ok(Some(SQLToken::Not)) } - }, + } _ => self.generic.next_token(chars) } _ => self.generic.next_token(chars) @@ -61,19 +62,47 @@ impl SQLTokenizer for AcmeTokenizer { } } +struct AcmeParser<'a> { + chars: Peekable> +} + +impl<'a> AcmeParser<'a> { + + pub fn new(sql: &'a str) -> Self { + AcmeParser { + chars: sql.chars().peekable() + } + } +} + +impl<'a> SQLParser for AcmeParser<'a> { + + fn parse_prefix(&mut self) -> Result>, ParserError> { + unimplemented!() + } + + fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result>>, ParserError> { + unimplemented!() + } +} fn main() { let sql = "1 + !! 5 * 2"; - let mut acme_tokenizer = AcmeTokenizer { - generic: ANSISQLTokenizer { } - }; + let acme_parser = AcmeParser::new(sql); - let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap(); - println!("tokens = {:?}", tokens); + //acme_parser + +// let mut acme_tokenizer = AcmeTokenizer { +// generic: ANSISQLTokenizer { } +// }; +// +// let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap(); +// +// println!("tokens = {:?}", tokens); diff --git a/src/ansi/parser.rs b/src/ansi/parser.rs index e2580519..bf93dd20 100644 --- a/src/ansi/parser.rs +++ b/src/ansi/parser.rs @@ -29,7 +29,7 @@ impl<'a, TokenType, ExprType> SQLParser for ANSISQLParser<' } } - fn parse_infix(&mut self, left: SQLExpr) -> Result>>, ParserError> { + fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result>>, ParserError> { unimplemented!() } } diff --git a/src/ansi/tokenizer.rs b/src/ansi/tokenizer.rs index 81d88eda..b2144ff4 100644 --- a/src/ansi/tokenizer.rs +++ b/src/ansi/tokenizer.rs @@ -10,6 +10,10 @@ pub struct ANSISQLTokenizer {} impl SQLTokenizer for ANSISQLTokenizer where TokenType: Debug + PartialEq { + fn precedence(&self, token: &SQLToken) -> usize { + unimplemented!() + } + fn peek_token(&self, chars: &mut Peekable) -> Result>, TokenizerError> { unimplemented!() } diff --git a/src/parser.rs b/src/parser.rs index 2c32f01f..260984c6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,8 @@ use std::cmp::PartialEq; use std::fmt::Debug; +use std::rc::Rc; +use std::str::Chars; +use std::iter::Peekable; use super::tokenizer::*; @@ -105,41 +108,47 @@ impl From> for ParserError pub trait SQLParser - where TokenType: Debug + PartialEq, ExprType: Debug + PartialEq { + where TokenType: Debug + PartialEq, ExprType: Debug { /// parse the prefix and stop once an infix operator is reached fn parse_prefix(&mut self) -> Result>, ParserError> ; /// parse the next infix expression, returning None if the precedence has changed - fn parse_infix(&mut self, left: SQLExpr) -> Result>>, ParserError>; + fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result>>, ParserError>; +} + + + +struct PrattParser<'a, TokenType, ExprType> { + chars: Peekable>, + tokenizer: Rc>, + parser: SQLParser +} + +impl<'a, TokenType, ExprType> PrattParser<'a, TokenType, ExprType> + where TokenType: Debug + PartialEq, ExprType: Debug { + + fn parse_expr(&mut self) -> Result>, ParserError> { + + let precedence: usize = 0; + + let mut expr = self.parser.parse_prefix()?; + + while let Some(token) = self.tokenizer.peek_token(&mut self.chars)? { + + let next_precedence = self.tokenizer.precedence(&token); + + if precedence >= next_precedence { + break; + } + + expr = self.parser.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me + } + + Ok(expr) + } + } -// -// -//struct GenericParser { -// tokenizer: SQLTokenizer -//} -// -//impl GenericParser { -// -// fn parse_expr(&mut self, precedence: u8) -> Result, ParserError> { -// -// let mut expr = self.parse_prefix()?; -// -// // loop while there are more tokens and until the precedence changes -// while let Some(token) = self.tokenizer.peek_token()? { -// -// let next_precedence = self.get_precedence(&token); -// -// if precedence >= next_precedence { -// break; -// } -// -// expr = self.parse_infix(expr, next_precedence)?; -// } -// -// Ok(expr) -// } -// // fn parse_prefix(&mut self) -> Result, ParserError> { // // match self.tokenizer.peek_token()? { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 067f41b4..55604aa9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -50,6 +50,9 @@ pub enum SQLToken { pub trait SQLTokenizer where TokenType: Debug + PartialEq { + /// get the precendence of a token + fn precedence(&self, token: &SQLToken) -> usize; + /// return a reference to the next token but do not advance the index fn peek_token(&self, chars: &mut Peekable) -> Result>, TokenizerError>;