mirror of
https://github.com/rust-lang/rust-analyzer.git
synced 2025-09-27 04:19:13 +00:00
port mbe to soa tokens
This commit is contained in:
parent
965585748e
commit
1055a6111a
7 changed files with 130 additions and 183 deletions
|
@ -10,7 +10,7 @@ mod parser;
|
||||||
mod expander;
|
mod expander;
|
||||||
mod syntax_bridge;
|
mod syntax_bridge;
|
||||||
mod tt_iter;
|
mod tt_iter;
|
||||||
mod subtree_source;
|
mod to_parser_tokens;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod benchmark;
|
mod benchmark;
|
||||||
|
|
|
@ -1,174 +0,0 @@
|
||||||
//! Our parser is generic over the source of tokens it parses.
|
|
||||||
//!
|
|
||||||
//! This module defines tokens sourced from declarative macros.
|
|
||||||
|
|
||||||
use parser::{Token, TokenSource};
|
|
||||||
use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
|
|
||||||
use tt::buffer::TokenBuffer;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
|
||||||
struct TtToken {
|
|
||||||
tt: Token,
|
|
||||||
text: SmolStr,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) struct SubtreeTokenSource {
|
|
||||||
cached: Vec<TtToken>,
|
|
||||||
curr: (Token, usize),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> SubtreeTokenSource {
|
|
||||||
pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource {
|
|
||||||
let mut current = buffer.begin();
|
|
||||||
let mut cached = Vec::with_capacity(100);
|
|
||||||
|
|
||||||
while !current.eof() {
|
|
||||||
let cursor = current;
|
|
||||||
let tt = cursor.token_tree();
|
|
||||||
|
|
||||||
// Check if it is lifetime
|
|
||||||
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
|
|
||||||
if punct.char == '\'' {
|
|
||||||
let next = cursor.bump();
|
|
||||||
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) =
|
|
||||||
next.token_tree()
|
|
||||||
{
|
|
||||||
let text = SmolStr::new("'".to_string() + &ident.text);
|
|
||||||
cached.push(TtToken {
|
|
||||||
tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false },
|
|
||||||
text,
|
|
||||||
});
|
|
||||||
current = next.bump();
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
panic!("Next token must be ident : {:#?}", next.token_tree());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
current = match tt {
|
|
||||||
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
|
||||||
cached.push(convert_leaf(leaf));
|
|
||||||
cursor.bump()
|
|
||||||
}
|
|
||||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
|
||||||
if let Some(d) = subtree.delimiter_kind() {
|
|
||||||
cached.push(convert_delim(d, false));
|
|
||||||
}
|
|
||||||
cursor.subtree().unwrap()
|
|
||||||
}
|
|
||||||
None => match cursor.end() {
|
|
||||||
Some(subtree) => {
|
|
||||||
if let Some(d) = subtree.delimiter_kind() {
|
|
||||||
cached.push(convert_delim(d, true));
|
|
||||||
}
|
|
||||||
cursor.bump()
|
|
||||||
}
|
|
||||||
None => continue,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut res = SubtreeTokenSource {
|
|
||||||
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
|
|
||||||
cached,
|
|
||||||
};
|
|
||||||
res.curr = (res.token(0), 0);
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
fn token(&self, pos: usize) -> Token {
|
|
||||||
match self.cached.get(pos) {
|
|
||||||
Some(it) => it.tt,
|
|
||||||
None => Token { kind: EOF, is_jointed_to_next: false },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> TokenSource for SubtreeTokenSource {
|
|
||||||
fn current(&self) -> Token {
|
|
||||||
self.curr.0
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lookahead n token
|
|
||||||
fn lookahead_nth(&self, n: usize) -> Token {
|
|
||||||
self.token(self.curr.1 + n)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// bump cursor to next token
|
|
||||||
fn bump(&mut self) {
|
|
||||||
if self.current().kind == EOF {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Is the current token a specified keyword?
|
|
||||||
fn is_keyword(&self, kw: &str) -> bool {
|
|
||||||
match self.cached.get(self.curr.1) {
|
|
||||||
Some(t) => t.text == *kw,
|
|
||||||
None => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken {
|
|
||||||
let (kinds, texts) = match d {
|
|
||||||
tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"),
|
|
||||||
tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"),
|
|
||||||
tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let idx = closing as usize;
|
|
||||||
let kind = kinds[idx];
|
|
||||||
let text = &texts[idx..texts.len() - (1 - idx)];
|
|
||||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_literal(l: &tt::Literal) -> TtToken {
|
|
||||||
let is_negated = l.text.starts_with('-');
|
|
||||||
let inner_text = &l.text[if is_negated { 1 } else { 0 }..];
|
|
||||||
|
|
||||||
let kind = lex_single_syntax_kind(inner_text)
|
|
||||||
.map(|(kind, _error)| kind)
|
|
||||||
.filter(|kind| {
|
|
||||||
kind.is_literal() && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
|
|
||||||
})
|
|
||||||
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l));
|
|
||||||
|
|
||||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_ident(ident: &tt::Ident) -> TtToken {
|
|
||||||
let kind = match ident.text.as_ref() {
|
|
||||||
"true" => T![true],
|
|
||||||
"false" => T![false],
|
|
||||||
"_" => UNDERSCORE,
|
|
||||||
i if i.starts_with('\'') => LIFETIME_IDENT,
|
|
||||||
_ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT),
|
|
||||||
};
|
|
||||||
|
|
||||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_punct(p: tt::Punct) -> TtToken {
|
|
||||||
let kind = match SyntaxKind::from_char(p.char) {
|
|
||||||
None => panic!("{:#?} is not a valid punct", p),
|
|
||||||
Some(kind) => kind,
|
|
||||||
};
|
|
||||||
|
|
||||||
let text = {
|
|
||||||
let mut buf = [0u8; 4];
|
|
||||||
let s: &str = p.char.encode_utf8(&mut buf);
|
|
||||||
SmolStr::new(s)
|
|
||||||
};
|
|
||||||
TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
|
|
||||||
match leaf {
|
|
||||||
tt::Leaf::Literal(l) => convert_literal(l),
|
|
||||||
tt::Leaf::Ident(ident) => convert_ident(ident),
|
|
||||||
tt::Leaf::Punct(punct) => convert_punct(*punct),
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -12,7 +12,7 @@ use syntax::{
|
||||||
use tt::buffer::{Cursor, TokenBuffer};
|
use tt::buffer::{Cursor, TokenBuffer};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
subtree_source::SubtreeTokenSource, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
|
to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Convert the syntax node to a `TokenTree` (what macro
|
/// Convert the syntax node to a `TokenTree` (what macro
|
||||||
|
@ -56,9 +56,9 @@ pub fn token_tree_to_syntax_node(
|
||||||
}
|
}
|
||||||
_ => TokenBuffer::from_subtree(tt),
|
_ => TokenBuffer::from_subtree(tt),
|
||||||
};
|
};
|
||||||
let mut token_source = SubtreeTokenSource::new(&buffer);
|
let parser_tokens = to_parser_tokens(&buffer);
|
||||||
let mut tree_sink = TtTreeSink::new(buffer.begin());
|
let mut tree_sink = TtTreeSink::new(buffer.begin());
|
||||||
parser::parse(&mut token_source, &mut tree_sink, entry_point);
|
parser::parse(&parser_tokens, &mut tree_sink, entry_point);
|
||||||
if tree_sink.roots.len() != 1 {
|
if tree_sink.roots.len() != 1 {
|
||||||
return Err(ExpandError::ConversionError);
|
return Err(ExpandError::ConversionError);
|
||||||
}
|
}
|
||||||
|
|
97
crates/mbe/src/to_parser_tokens.rs
Normal file
97
crates/mbe/src/to_parser_tokens.rs
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
//! Convert macro-by-example tokens which are specific to macro expansion into a
|
||||||
|
//! format that works for our parser.
|
||||||
|
|
||||||
|
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
|
||||||
|
use tt::buffer::TokenBuffer;
|
||||||
|
|
||||||
|
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
|
||||||
|
let mut res = parser::Tokens::default();
|
||||||
|
|
||||||
|
let mut current = buffer.begin();
|
||||||
|
|
||||||
|
while !current.eof() {
|
||||||
|
let cursor = current;
|
||||||
|
let tt = cursor.token_tree();
|
||||||
|
|
||||||
|
// Check if it is lifetime
|
||||||
|
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
|
||||||
|
if punct.char == '\'' {
|
||||||
|
let next = cursor.bump();
|
||||||
|
match next.token_tree() {
|
||||||
|
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => {
|
||||||
|
res.push(LIFETIME_IDENT);
|
||||||
|
current = next.bump();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => panic!("Next token must be ident : {:#?}", next.token_tree()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current = match tt {
|
||||||
|
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
||||||
|
match leaf {
|
||||||
|
tt::Leaf::Literal(lit) => {
|
||||||
|
let is_negated = lit.text.starts_with('-');
|
||||||
|
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
|
||||||
|
|
||||||
|
let kind = lex_single_syntax_kind(inner_text)
|
||||||
|
.map(|(kind, _error)| kind)
|
||||||
|
.filter(|kind| {
|
||||||
|
kind.is_literal()
|
||||||
|
&& (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit));
|
||||||
|
|
||||||
|
res.push(kind);
|
||||||
|
}
|
||||||
|
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
|
||||||
|
"_" => res.push(T![_]),
|
||||||
|
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
|
||||||
|
_ => match SyntaxKind::from_keyword(&ident.text) {
|
||||||
|
Some(kind) => res.push(kind),
|
||||||
|
None => {
|
||||||
|
let contextual_keyword =
|
||||||
|
SyntaxKind::from_contextual_keyword(&ident.text)
|
||||||
|
.unwrap_or(SyntaxKind::IDENT);
|
||||||
|
res.push_ident(contextual_keyword);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
tt::Leaf::Punct(punct) => {
|
||||||
|
let kind = SyntaxKind::from_char(punct.char)
|
||||||
|
.unwrap_or_else(|| panic!("{:#?} is not a valid punct", punct));
|
||||||
|
res.push(kind);
|
||||||
|
res.was_joint(punct.spacing == tt::Spacing::Joint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cursor.bump()
|
||||||
|
}
|
||||||
|
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||||
|
if let Some(d) = subtree.delimiter_kind() {
|
||||||
|
res.push(match d {
|
||||||
|
tt::DelimiterKind::Parenthesis => T!['('],
|
||||||
|
tt::DelimiterKind::Brace => T!['{'],
|
||||||
|
tt::DelimiterKind::Bracket => T!['['],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
cursor.subtree().unwrap()
|
||||||
|
}
|
||||||
|
None => match cursor.end() {
|
||||||
|
Some(subtree) => {
|
||||||
|
if let Some(d) = subtree.delimiter_kind() {
|
||||||
|
res.push(match d {
|
||||||
|
tt::DelimiterKind::Parenthesis => T![')'],
|
||||||
|
tt::DelimiterKind::Brace => T!['}'],
|
||||||
|
tt::DelimiterKind::Bracket => T![']'],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
cursor.bump()
|
||||||
|
}
|
||||||
|
None => continue,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
res
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
//! A "Parser" structure for token trees. We use this when parsing a declarative
|
//! A "Parser" structure for token trees. We use this when parsing a declarative
|
||||||
//! macro definition into a list of patterns and templates.
|
//! macro definition into a list of patterns and templates.
|
||||||
|
|
||||||
use crate::{subtree_source::SubtreeTokenSource, ExpandError, ExpandResult, ParserEntryPoint};
|
use crate::{to_parser_tokens::to_parser_tokens, ExpandError, ExpandResult, ParserEntryPoint};
|
||||||
|
|
||||||
use parser::TreeSink;
|
use parser::TreeSink;
|
||||||
use syntax::SyntaxKind;
|
use syntax::SyntaxKind;
|
||||||
|
@ -116,10 +116,10 @@ impl<'a> TtIter<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let buffer = TokenBuffer::from_tokens(self.inner.as_slice());
|
let buffer = TokenBuffer::from_tokens(self.inner.as_slice());
|
||||||
let mut src = SubtreeTokenSource::new(&buffer);
|
let parser_tokens = to_parser_tokens(&buffer);
|
||||||
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
|
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
|
||||||
|
|
||||||
parser::parse(&mut src, &mut sink, entry_point);
|
parser::parse(&parser_tokens, &mut sink, entry_point);
|
||||||
|
|
||||||
let mut err = if !sink.cursor.is_root() || sink.error {
|
let mut err = if !sink.cursor.is_root() || sink.error {
|
||||||
Some(err!("expected {:?}", entry_point))
|
Some(err!("expected {:?}", entry_point))
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
//! The Rust parser.
|
//! The Rust parser.
|
||||||
//!
|
//!
|
||||||
|
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
|
||||||
|
//! say :-)
|
||||||
|
//!
|
||||||
//! The parser doesn't know about concrete representation of tokens and syntax
|
//! The parser doesn't know about concrete representation of tokens and syntax
|
||||||
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead.
|
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
|
||||||
//! As a consequence, this crate does not contain a lexer.
|
//! a consequence, this crate does not contain a lexer.
|
||||||
//!
|
//!
|
||||||
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
|
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
|
||||||
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
|
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
//! Input for the parser -- a sequence of tokens.
|
||||||
|
//!
|
||||||
|
//! As of now, parser doesn't have access to the *text* of the tokens, and makes
|
||||||
|
//! decisions based solely on their classification.
|
||||||
|
|
||||||
use crate::SyntaxKind;
|
use crate::SyntaxKind;
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
|
@ -28,6 +33,22 @@ impl Tokens {
|
||||||
pub fn push(&mut self, kind: SyntaxKind) {
|
pub fn push(&mut self, kind: SyntaxKind) {
|
||||||
self.push_impl(kind, SyntaxKind::EOF)
|
self.push_impl(kind, SyntaxKind::EOF)
|
||||||
}
|
}
|
||||||
|
/// Sets jointness for the last token we've pushed.
|
||||||
|
///
|
||||||
|
/// This is a separate API rather than an argument to the `push` to make it
|
||||||
|
/// convenient both for textual and mbe tokens. With text, you know whether
|
||||||
|
/// the *previous* token was joint, with mbe, you know whether the *current*
|
||||||
|
/// one is joint. This API allows for styles of usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// // In text:
|
||||||
|
/// tokens.was_joint(prev_joint);
|
||||||
|
/// tokens.push(curr);
|
||||||
|
///
|
||||||
|
/// // In MBE:
|
||||||
|
/// token.push(curr);
|
||||||
|
/// tokens.push(curr_joint)
|
||||||
|
/// ```
|
||||||
pub fn was_joint(&mut self, yes: bool) {
|
pub fn was_joint(&mut self, yes: bool) {
|
||||||
let idx = self.len();
|
let idx = self.len();
|
||||||
if yes && idx > 0 {
|
if yes && idx > 0 {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue