diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index b58b86b38d..1a56878fdb 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -10,7 +10,7 @@ mod parser; mod expander; mod syntax_bridge; mod tt_iter; -mod subtree_source; +mod to_parser_tokens; #[cfg(test)] mod benchmark; diff --git a/crates/mbe/src/subtree_source.rs b/crates/mbe/src/subtree_source.rs deleted file mode 100644 index 6bdd787e30..0000000000 --- a/crates/mbe/src/subtree_source.rs +++ /dev/null @@ -1,174 +0,0 @@ -//! Our parser is generic over the source of tokens it parses. -//! -//! This module defines tokens sourced from declarative macros. - -use parser::{Token, TokenSource}; -use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T}; -use tt::buffer::TokenBuffer; - -#[derive(Debug, Clone, Eq, PartialEq)] -struct TtToken { - tt: Token, - text: SmolStr, -} - -pub(crate) struct SubtreeTokenSource { - cached: Vec, - curr: (Token, usize), -} - -impl<'a> SubtreeTokenSource { - pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource { - let mut current = buffer.begin(); - let mut cached = Vec::with_capacity(100); - - while !current.eof() { - let cursor = current; - let tt = cursor.token_tree(); - - // Check if it is lifetime - if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt { - if punct.char == '\'' { - let next = cursor.bump(); - if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) = - next.token_tree() - { - let text = SmolStr::new("'".to_string() + &ident.text); - cached.push(TtToken { - tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false }, - text, - }); - current = next.bump(); - continue; - } else { - panic!("Next token must be ident : {:#?}", next.token_tree()); - } - } - } - - current = match tt { - Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { - cached.push(convert_leaf(leaf)); - cursor.bump() - } - Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { - if let Some(d) = subtree.delimiter_kind() { - cached.push(convert_delim(d, false)); - } - cursor.subtree().unwrap() - } - None => match cursor.end() { - Some(subtree) => { - if let Some(d) = subtree.delimiter_kind() { - cached.push(convert_delim(d, true)); - } - cursor.bump() - } - None => continue, - }, - }; - } - - let mut res = SubtreeTokenSource { - curr: (Token { kind: EOF, is_jointed_to_next: false }, 0), - cached, - }; - res.curr = (res.token(0), 0); - res - } - - fn token(&self, pos: usize) -> Token { - match self.cached.get(pos) { - Some(it) => it.tt, - None => Token { kind: EOF, is_jointed_to_next: false }, - } - } -} - -impl<'a> TokenSource for SubtreeTokenSource { - fn current(&self) -> Token { - self.curr.0 - } - - /// Lookahead n token - fn lookahead_nth(&self, n: usize) -> Token { - self.token(self.curr.1 + n) - } - - /// bump cursor to next token - fn bump(&mut self) { - if self.current().kind == EOF { - return; - } - self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1); - } - - /// Is the current token a specified keyword? - fn is_keyword(&self, kw: &str) -> bool { - match self.cached.get(self.curr.1) { - Some(t) => t.text == *kw, - None => false, - } - } -} - -fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken { - let (kinds, texts) = match d { - tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"), - tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"), - tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"), - }; - - let idx = closing as usize; - let kind = kinds[idx]; - let text = &texts[idx..texts.len() - (1 - idx)]; - TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) } -} - -fn convert_literal(l: &tt::Literal) -> TtToken { - let is_negated = l.text.starts_with('-'); - let inner_text = &l.text[if is_negated { 1 } else { 0 }..]; - - let kind = lex_single_syntax_kind(inner_text) - .map(|(kind, _error)| kind) - .filter(|kind| { - kind.is_literal() && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER)) - }) - .unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l)); - - TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() } -} - -fn convert_ident(ident: &tt::Ident) -> TtToken { - let kind = match ident.text.as_ref() { - "true" => T![true], - "false" => T![false], - "_" => UNDERSCORE, - i if i.starts_with('\'') => LIFETIME_IDENT, - _ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT), - }; - - TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() } -} - -fn convert_punct(p: tt::Punct) -> TtToken { - let kind = match SyntaxKind::from_char(p.char) { - None => panic!("{:#?} is not a valid punct", p), - Some(kind) => kind, - }; - - let text = { - let mut buf = [0u8; 4]; - let s: &str = p.char.encode_utf8(&mut buf); - SmolStr::new(s) - }; - TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text } -} - -fn convert_leaf(leaf: &tt::Leaf) -> TtToken { - match leaf { - tt::Leaf::Literal(l) => convert_literal(l), - tt::Leaf::Ident(ident) => convert_ident(ident), - tt::Leaf::Punct(punct) => convert_punct(*punct), - } -} diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 0b65fa171f..28a23f6be2 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -12,7 +12,7 @@ use syntax::{ use tt::buffer::{Cursor, TokenBuffer}; use crate::{ - subtree_source::SubtreeTokenSource, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap, + to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap, }; /// Convert the syntax node to a `TokenTree` (what macro @@ -56,9 +56,9 @@ pub fn token_tree_to_syntax_node( } _ => TokenBuffer::from_subtree(tt), }; - let mut token_source = SubtreeTokenSource::new(&buffer); + let parser_tokens = to_parser_tokens(&buffer); let mut tree_sink = TtTreeSink::new(buffer.begin()); - parser::parse(&mut token_source, &mut tree_sink, entry_point); + parser::parse(&parser_tokens, &mut tree_sink, entry_point); if tree_sink.roots.len() != 1 { return Err(ExpandError::ConversionError); } diff --git a/crates/mbe/src/to_parser_tokens.rs b/crates/mbe/src/to_parser_tokens.rs new file mode 100644 index 0000000000..435226342e --- /dev/null +++ b/crates/mbe/src/to_parser_tokens.rs @@ -0,0 +1,97 @@ +//! Convert macro-by-example tokens which are specific to macro expansion into a +//! format that works for our parser. + +use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T}; +use tt::buffer::TokenBuffer; + +pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens { + let mut res = parser::Tokens::default(); + + let mut current = buffer.begin(); + + while !current.eof() { + let cursor = current; + let tt = cursor.token_tree(); + + // Check if it is lifetime + if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt { + if punct.char == '\'' { + let next = cursor.bump(); + match next.token_tree() { + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => { + res.push(LIFETIME_IDENT); + current = next.bump(); + continue; + } + _ => panic!("Next token must be ident : {:#?}", next.token_tree()), + } + } + } + + current = match tt { + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { + match leaf { + tt::Leaf::Literal(lit) => { + let is_negated = lit.text.starts_with('-'); + let inner_text = &lit.text[if is_negated { 1 } else { 0 }..]; + + let kind = lex_single_syntax_kind(inner_text) + .map(|(kind, _error)| kind) + .filter(|kind| { + kind.is_literal() + && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER)) + }) + .unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit)); + + res.push(kind); + } + tt::Leaf::Ident(ident) => match ident.text.as_ref() { + "_" => res.push(T![_]), + i if i.starts_with('\'') => res.push(LIFETIME_IDENT), + _ => match SyntaxKind::from_keyword(&ident.text) { + Some(kind) => res.push(kind), + None => { + let contextual_keyword = + SyntaxKind::from_contextual_keyword(&ident.text) + .unwrap_or(SyntaxKind::IDENT); + res.push_ident(contextual_keyword); + } + }, + }, + tt::Leaf::Punct(punct) => { + let kind = SyntaxKind::from_char(punct.char) + .unwrap_or_else(|| panic!("{:#?} is not a valid punct", punct)); + res.push(kind); + res.was_joint(punct.spacing == tt::Spacing::Joint); + } + } + cursor.bump() + } + Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { + if let Some(d) = subtree.delimiter_kind() { + res.push(match d { + tt::DelimiterKind::Parenthesis => T!['('], + tt::DelimiterKind::Brace => T!['{'], + tt::DelimiterKind::Bracket => T!['['], + }); + } + cursor.subtree().unwrap() + } + None => match cursor.end() { + Some(subtree) => { + if let Some(d) = subtree.delimiter_kind() { + res.push(match d { + tt::DelimiterKind::Parenthesis => T![')'], + tt::DelimiterKind::Brace => T!['}'], + tt::DelimiterKind::Bracket => T![']'], + }) + } + cursor.bump() + } + None => continue, + }, + }; + } + + res +} diff --git a/crates/mbe/src/tt_iter.rs b/crates/mbe/src/tt_iter.rs index ff0272808b..d05e84b0f0 100644 --- a/crates/mbe/src/tt_iter.rs +++ b/crates/mbe/src/tt_iter.rs @@ -1,7 +1,7 @@ //! A "Parser" structure for token trees. We use this when parsing a declarative //! macro definition into a list of patterns and templates. -use crate::{subtree_source::SubtreeTokenSource, ExpandError, ExpandResult, ParserEntryPoint}; +use crate::{to_parser_tokens::to_parser_tokens, ExpandError, ExpandResult, ParserEntryPoint}; use parser::TreeSink; use syntax::SyntaxKind; @@ -116,10 +116,10 @@ impl<'a> TtIter<'a> { } let buffer = TokenBuffer::from_tokens(self.inner.as_slice()); - let mut src = SubtreeTokenSource::new(&buffer); + let parser_tokens = to_parser_tokens(&buffer); let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false }; - parser::parse(&mut src, &mut sink, entry_point); + parser::parse(&parser_tokens, &mut sink, entry_point); let mut err = if !sink.cursor.is_root() || sink.error { Some(err!("expected {:?}", entry_point)) diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 1e9f59fa53..2e2d96d027 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,8 +1,11 @@ //! The Rust parser. //! +//! NOTE: The crate is undergoing refactors, don't believe everything the docs +//! say :-) +//! //! The parser doesn't know about concrete representation of tokens and syntax -//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. -//! As a consequence, this crate does not contain a lexer. +//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As +//! a consequence, this crate does not contain a lexer. //! //! The [`Parser`] struct from the [`parser`] module is a cursor into the //! sequence of tokens. Parsing routines use [`Parser`] to inspect current diff --git a/crates/parser/src/tokens.rs b/crates/parser/src/tokens.rs index e1aea6acfc..dff5e583b1 100644 --- a/crates/parser/src/tokens.rs +++ b/crates/parser/src/tokens.rs @@ -1,3 +1,8 @@ +//! Input for the parser -- a sequence of tokens. +//! +//! As of now, parser doesn't have access to the *text* of the tokens, and makes +//! decisions based solely on their classification. + use crate::SyntaxKind; #[allow(non_camel_case_types)] @@ -28,6 +33,22 @@ impl Tokens { pub fn push(&mut self, kind: SyntaxKind) { self.push_impl(kind, SyntaxKind::EOF) } + /// Sets jointness for the last token we've pushed. + /// + /// This is a separate API rather than an argument to the `push` to make it + /// convenient both for textual and mbe tokens. With text, you know whether + /// the *previous* token was joint, with mbe, you know whether the *current* + /// one is joint. This API allows for styles of usage: + /// + /// ``` + /// // In text: + /// tokens.was_joint(prev_joint); + /// tokens.push(curr); + /// + /// // In MBE: + /// token.push(curr); + /// tokens.push(curr_joint) + /// ``` pub fn was_joint(&mut self, yes: bool) { let idx = self.len(); if yes && idx > 0 {