From b6c230f3caa7e25249799691f8d74724d6f8a902 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 6 Nov 2022 20:43:41 -0500 Subject: [PATCH] Implement Tok::Comment --- parser/python.lalrpop | 1 + parser/src/lexer.rs | 18 ++++++++++++------ parser/src/parser.rs | 11 ++++++++--- parser/src/token.rs | 2 ++ 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/parser/python.lalrpop b/parser/python.lalrpop index b2212a3..cfd6779 100644 --- a/parser/python.lalrpop +++ b/parser/python.lalrpop @@ -1414,5 +1414,6 @@ extern { name => lexer::Tok::Name { name: }, "\n" => lexer::Tok::Newline, ";" => lexer::Tok::Semi, + "#" => lexer::Tok::Comment, } } diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 2f72cf7..37b4194 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -399,13 +399,16 @@ where } /// Skip everything until end of line - fn lex_comment(&mut self) { + fn lex_comment(&mut self) -> LexResult { + let start_pos = self.get_pos(); self.next_char(); loop { match self.chr0 { - Some('\n') => return, + Some('\n') | None => { + let end_pos = self.get_pos(); + return Ok((start_pos, Tok::Comment, end_pos)); + } Some(_) => {} - None => return, } self.next_char(); } @@ -690,7 +693,8 @@ where tabs += 1; } Some('#') => { - self.lex_comment(); + let comment = self.lex_comment()?; + self.emit(comment); spaces = 0; tabs = 0; } @@ -832,7 +836,8 @@ where self.emit(number); } '#' => { - self.lex_comment(); + let comment = self.lex_comment()?; + self.emit(comment); } '"' | '\'' => { let string = self.lex_string(false, false, false, false)?; @@ -1350,7 +1355,7 @@ mod tests { fn $name() { let source = format!(r"99232 # {}", $eol); let tokens = lex_source(&source); - assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Newline]); + assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Comment, Tok::Newline]); } )* } @@ -1374,6 +1379,7 @@ mod tests { tokens, vec![ Tok::Int { value: BigInt::from(123) }, + Tok::Comment, Tok::Newline, Tok::Int { value: BigInt::from(456) }, Tok::Newline, diff --git a/parser/src/parser.rs b/parser/src/parser.rs index e46aa28..d0f71fc 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -5,9 +5,10 @@ //! parse a whole program, a single statement, or a single //! expression. -use crate::lexer::LexResult; +use crate::lexer::{LexResult, Tok}; pub use crate::mode::Mode; use crate::{ast, error::ParseError, lexer, python}; +use itertools::Itertools; use std::iter; /* @@ -74,7 +75,9 @@ pub fn parse_expression(source: &str, path: &str) -> Result Result { let lxr = lexer::make_tokenizer(source); let marker_token = (Default::default(), mode.to_marker(), Default::default()); - let tokenizer = iter::once(Ok(marker_token)).chain(lxr); + let tokenizer = iter::once(Ok(marker_token)) + .chain(lxr) + .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment)); python::TopParser::new() .parse(tokenizer) @@ -88,7 +91,9 @@ pub fn parse_tokens( source_path: &str, ) -> Result { let marker_token = (Default::default(), mode.to_marker(), Default::default()); - let tokenizer = iter::once(Ok(marker_token)).chain(lxr); + let tokenizer = iter::once(Ok(marker_token)) + .chain(lxr) + .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment)); python::TopParser::new() .parse(tokenizer) diff --git a/parser/src/token.rs b/parser/src/token.rs index 5fd6728..ba76f6a 100644 --- a/parser/src/token.rs +++ b/parser/src/token.rs @@ -25,6 +25,7 @@ pub enum Tok { Rsqb, Colon, Comma, + Comment, Semi, Plus, Minus, @@ -155,6 +156,7 @@ impl fmt::Display for Tok { Rsqb => f.write_str("']'"), Colon => f.write_str("':'"), Comma => f.write_str("','"), + Comment => f.write_str("#"), Semi => f.write_str("';'"), Plus => f.write_str("'+'"), Minus => f.write_str("'-'"),