Switch token trees to use Symbols

This commit is contained in:
Lukas Wirth 2024-07-16 09:59:39 +02:00
parent 0c95aaa08e
commit 93024ad411
51 changed files with 593 additions and 399 deletions

View file

@ -1,10 +1,11 @@
//! This module add real world mbe example for benchmark tests
use intern::Symbol;
use rustc_hash::FxHashMap;
use span::{Edition, Span};
use syntax::{
ast::{self, HasName},
AstNode, SmolStr,
AstNode,
};
use test_utils::{bench, bench_fixture, skip_slow_tests};
@ -228,7 +229,7 @@ fn invocation_fixtures(
fn make_ident(ident: &str) -> tt::TokenTree<Span> {
tt::Leaf::Ident(tt::Ident {
span: DUMMY,
text: SmolStr::new(ident),
sym: Symbol::intern(ident),
is_raw: tt::IdentIsRaw::No,
})
.into()
@ -239,7 +240,7 @@ fn invocation_fixtures(
fn make_literal(lit: &str) -> tt::TokenTree<Span> {
tt::Leaf::Literal(tt::Literal {
span: DUMMY,
text: SmolStr::new(lit),
symbol: Symbol::intern(lit),
kind: tt::LitKind::Str,
suffix: None,
})

View file

@ -5,9 +5,9 @@
mod matcher;
mod transcriber;
use intern::Symbol;
use rustc_hash::FxHashMap;
use span::{Edition, Span};
use syntax::SmolStr;
use crate::{parser::MetaVarKind, ExpandError, ExpandResult, MatchedArmIndex};
@ -110,12 +110,12 @@ pub(crate) fn expand_rules(
/// the `Bindings` we should take. We push to the stack when we enter a
/// repetition.
///
/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
/// In other words, `Bindings` is a *multi* mapping from `Symbol` to
/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
/// many is not a plain `usize`, but a `&[usize]`.
#[derive(Debug, Default, Clone, PartialEq, Eq)]
struct Bindings {
inner: FxHashMap<SmolStr, Binding>,
inner: FxHashMap<Symbol, Binding>,
}
#[derive(Debug, Clone, PartialEq, Eq)]

View file

@ -61,9 +61,9 @@
use std::{rc::Rc, sync::Arc};
use intern::{sym, Symbol};
use smallvec::{smallvec, SmallVec};
use span::{Edition, Span};
use syntax::SmolStr;
use tt::{iter::TtIter, DelimSpan};
use crate::{
@ -74,12 +74,12 @@ use crate::{
};
impl Bindings {
fn push_optional(&mut self, name: &SmolStr) {
self.inner.insert(name.clone(), Binding::Fragment(Fragment::Empty));
fn push_optional(&mut self, name: Symbol) {
self.inner.insert(name, Binding::Fragment(Fragment::Empty));
}
fn push_empty(&mut self, name: &SmolStr) {
self.inner.insert(name.clone(), Binding::Empty);
fn push_empty(&mut self, name: Symbol) {
self.inner.insert(name, Binding::Empty);
}
fn bindings(&self) -> impl Iterator<Item = &Binding> {
@ -127,10 +127,10 @@ pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree<Span>, edition:
#[derive(Debug, Clone)]
enum BindingKind {
Empty(SmolStr),
Optional(SmolStr),
Fragment(SmolStr, Fragment),
Missing(SmolStr, MetaVarKind),
Empty(Symbol),
Optional(Symbol),
Fragment(Symbol, Fragment),
Missing(Symbol, MetaVarKind),
Nested(usize, usize),
}
@ -178,20 +178,20 @@ impl BindingsBuilder {
}
}
fn push_empty(&mut self, idx: &mut BindingsIdx, var: &SmolStr) {
fn push_empty(&mut self, idx: &mut BindingsIdx, var: &Symbol) {
self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Empty(var.clone()))));
}
fn push_optional(&mut self, idx: &mut BindingsIdx, var: &SmolStr) {
fn push_optional(&mut self, idx: &mut BindingsIdx, var: &Symbol) {
self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Optional(var.clone()))));
}
fn push_fragment(&mut self, idx: &mut BindingsIdx, var: &SmolStr, fragment: Fragment) {
fn push_fragment(&mut self, idx: &mut BindingsIdx, var: &Symbol, fragment: Fragment) {
self.nodes[idx.0]
.push(LinkNode::Node(Rc::new(BindingKind::Fragment(var.clone(), fragment))));
}
fn push_missing(&mut self, idx: &mut BindingsIdx, var: &SmolStr, kind: MetaVarKind) {
fn push_missing(&mut self, idx: &mut BindingsIdx, var: &Symbol, kind: MetaVarKind) {
self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Missing(var.clone(), kind))));
}
@ -219,10 +219,10 @@ impl BindingsBuilder {
for cmd in nodes {
match cmd {
BindingKind::Empty(name) => {
bindings.push_empty(name);
bindings.push_empty(name.clone());
}
BindingKind::Optional(name) => {
bindings.push_optional(name);
bindings.push_optional(name.clone());
}
BindingKind::Fragment(name, fragment) => {
bindings.inner.insert(name.clone(), Binding::Fragment(fragment.clone()));
@ -507,7 +507,7 @@ fn match_loop_inner<'t>(
}
OpDelimited::Op(Op::Literal(lhs)) => {
if let Ok(rhs) = src.clone().expect_leaf() {
if matches!(rhs, tt::Leaf::Literal(it) if it.text == lhs.text) {
if matches!(rhs, tt::Leaf::Literal(it) if it.symbol == lhs.symbol) {
item.dot.next();
} else {
res.add_err(ExpandError::UnexpectedToken);
@ -521,7 +521,7 @@ fn match_loop_inner<'t>(
}
OpDelimited::Op(Op::Ident(lhs)) => {
if let Ok(rhs) = src.clone().expect_leaf() {
if matches!(rhs, tt::Leaf::Ident(it) if it.text == lhs.text) {
if matches!(rhs, tt::Leaf::Ident(it) if it.sym == lhs.sym) {
item.dot.next();
} else {
res.add_err(ExpandError::UnexpectedToken);
@ -554,7 +554,7 @@ fn match_loop_inner<'t>(
// ident, not a punct.
ExpandError::UnexpectedToken
} else {
let lhs: SmolStr = lhs.collect();
let lhs = lhs.collect::<String>();
ExpandError::binding_error(format!("expected punct: `{lhs}`"))
}
} else {
@ -759,7 +759,9 @@ fn match_meta_var(
// [1]: https://github.com/rust-lang/rust/blob/f0c4da499/compiler/rustc_expand/src/mbe/macro_parser.rs#L576
match input.peek_n(0) {
Some(tt::TokenTree::Leaf(tt::Leaf::Ident(it)))
if it.text == "_" || it.text == "let" || it.text == "const" =>
if it.sym == sym::underscore
|| it.sym == sym::let_
|| it.sym == sym::const_ =>
{
return ExpandResult::only_err(ExpandError::NoMatchingRule)
}
@ -824,7 +826,7 @@ fn match_meta_var(
expect_fragment(input, fragment, edition).map(|it| it.map(Fragment::Tokens))
}
fn collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate) {
fn collect_vars(collector_fun: &mut impl FnMut(Symbol), pattern: &MetaTemplate) {
for op in pattern.iter() {
match op {
Op::Var { name, .. } => collector_fun(name.clone()),
@ -908,13 +910,13 @@ fn expect_separator<S: Copy>(iter: &mut TtIter<'_, S>, separator: &Separator) ->
let mut fork = iter.clone();
let ok = match separator {
Separator::Ident(lhs) => match fork.expect_ident_or_underscore() {
Ok(rhs) => rhs.text == lhs.text,
Ok(rhs) => rhs.sym == lhs.sym,
Err(_) => false,
},
Separator::Literal(lhs) => match fork.expect_literal() {
Ok(rhs) => match rhs {
tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
tt::Leaf::Literal(rhs) => rhs.symbol == lhs.symbol,
tt::Leaf::Ident(rhs) => rhs.sym == lhs.symbol,
tt::Leaf::Punct(_) => false,
},
Err(_) => false,

View file

@ -1,8 +1,8 @@
//! Transcriber takes a template, like `fn $ident() {}`, a set of bindings like
//! `$ident => foo`, interpolates variables in the template, to get `fn foo() {}`
use intern::{sym, Symbol};
use span::Span;
use syntax::{format_smolstr, SmolStr};
use tt::Delimiter;
use crate::{
@ -12,16 +12,16 @@ use crate::{
};
impl Bindings {
fn get(&self, name: &str) -> Result<&Binding, ExpandError> {
fn get(&self, name: &Symbol) -> Result<&Binding, ExpandError> {
match self.inner.get(name) {
Some(binding) => Ok(binding),
None => Err(ExpandError::UnresolvedBinding(Box::new(Box::from(name)))),
None => Err(ExpandError::UnresolvedBinding(Box::new(Box::from(name.as_str())))),
}
}
fn get_fragment(
&self,
name: &str,
name: &Symbol,
mut span: Span,
nesting: &mut [NestingState],
marker: impl Fn(&mut Span),
@ -97,7 +97,7 @@ impl Bindings {
| MetaVarKind::Expr
| MetaVarKind::Ident => {
Fragment::Tokens(tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident {
text: SmolStr::new_static("missing"),
sym: sym::missing.clone(),
span,
is_raw: tt::IdentIsRaw::No,
})))
@ -112,7 +112,7 @@ impl Bindings {
spacing: tt::Spacing::Joint,
})),
tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident {
text: SmolStr::new_static("missing"),
sym: sym::missing.clone(),
span,
is_raw: tt::IdentIsRaw::No,
})),
@ -121,7 +121,7 @@ impl Bindings {
}
MetaVarKind::Literal => {
Fragment::Tokens(tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident {
text: SmolStr::new_static("\"missing\""),
sym: sym::missing.clone(),
span,
is_raw: tt::IdentIsRaw::No,
})))
@ -239,7 +239,7 @@ fn expand_subtree(
ctx.nesting.get(ctx.nesting.len() - 1 - depth).map_or(0, |nest| nest.idx);
arena.push(
tt::Leaf::Literal(tt::Literal {
text: format_smolstr!("{index}"),
symbol: Symbol::integer(index),
span: ctx.call_site,
kind: tt::LitKind::Integer,
suffix: None,
@ -254,7 +254,7 @@ fn expand_subtree(
});
arena.push(
tt::Leaf::Literal(tt::Literal {
text: format_smolstr!("{length}"),
symbol: Symbol::integer(length),
span: ctx.call_site,
kind: tt::LitKind::Integer,
suffix: None,
@ -263,7 +263,7 @@ fn expand_subtree(
);
}
Op::Count { name, depth } => {
let mut binding = match ctx.bindings.get(name.as_str()) {
let mut binding = match ctx.bindings.get(name) {
Ok(b) => b,
Err(e) => {
if err.is_none() {
@ -321,7 +321,7 @@ fn expand_subtree(
};
arena.push(
tt::Leaf::Literal(tt::Literal {
text: format_smolstr!("{c}"),
symbol: Symbol::integer(c),
span: ctx.call_site,
suffix: None,
kind: tt::LitKind::Integer,
@ -344,12 +344,12 @@ fn expand_subtree(
fn expand_var(
ctx: &mut ExpandCtx<'_>,
v: &SmolStr,
v: &Symbol,
id: Span,
marker: impl Fn(&mut Span),
) -> ExpandResult<Fragment> {
// We already handle $crate case in mbe parser
debug_assert!(v != "crate");
debug_assert!(*v != sym::crate_);
match ctx.bindings.get_fragment(v, id, &mut ctx.nesting, marker) {
Ok(it) => ExpandResult::ok(it),
@ -373,7 +373,7 @@ fn expand_var(
tt::Leaf::from(tt::Punct { char: '$', spacing: tt::Spacing::Alone, span: id })
.into(),
tt::Leaf::from(tt::Ident {
text: v.clone(),
sym: v.clone(),
span: id,
is_raw: tt::IdentIsRaw::No,
})

View file

@ -4,8 +4,8 @@
use std::sync::Arc;
use arrayvec::ArrayVec;
use intern::{sym, Symbol};
use span::{Edition, Span, SyntaxContextId};
use syntax::SmolStr;
use tt::iter::TtIter;
use crate::ParseError;
@ -67,12 +67,12 @@ impl MetaTemplate {
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum Op {
Var {
name: SmolStr,
name: Symbol,
kind: Option<MetaVarKind>,
id: Span,
},
Ignore {
name: SmolStr,
name: Symbol,
id: Span,
},
Index {
@ -82,7 +82,7 @@ pub(crate) enum Op {
depth: usize,
},
Count {
name: SmolStr,
name: Symbol,
// FIXME: `usize`` once we drop support for 1.76
depth: Option<usize>,
},
@ -138,8 +138,8 @@ impl PartialEq for Separator {
use Separator::*;
match (self, other) {
(Ident(a), Ident(b)) => a.text == b.text,
(Literal(a), Literal(b)) => a.text == b.text,
(Ident(a), Ident(b)) => a.sym == b.sym,
(Literal(a), Literal(b)) => a.symbol == b.symbol,
(Puncts(a), Puncts(b)) if a.len() == b.len() => {
let a_iter = a.iter().map(|a| a.char);
let b_iter = b.iter().map(|b| b.char);
@ -203,23 +203,23 @@ fn next_op(
}
},
tt::TokenTree::Leaf(leaf) => match leaf {
tt::Leaf::Ident(ident) if ident.text == "crate" => {
tt::Leaf::Ident(ident) if ident.sym == sym::crate_ => {
// We simply produce identifier `$crate` here. And it will be resolved when lowering ast to Path.
Op::Ident(tt::Ident {
text: "$crate".into(),
sym: sym::dollar_crate.clone(),
span: ident.span,
is_raw: tt::IdentIsRaw::No,
})
}
tt::Leaf::Ident(ident) => {
let kind = eat_fragment_kind(edition, src, mode)?;
let name = ident.text.clone();
let name = ident.sym.clone();
let id = ident.span;
Op::Var { name, kind, id }
}
tt::Leaf::Literal(lit) if is_boolean_literal(lit) => {
let kind = eat_fragment_kind(edition, src, mode)?;
let name = lit.text.clone();
let name = lit.symbol.clone();
let id = lit.span;
Op::Var { name, kind, id }
}
@ -277,7 +277,7 @@ fn eat_fragment_kind(
let ident = src
.expect_ident()
.map_err(|()| ParseError::unexpected("missing fragment specifier"))?;
let kind = match ident.text.as_str() {
let kind = match ident.sym.as_str() {
"path" => MetaVarKind::Path,
"ty" => MetaVarKind::Ty,
"pat" => match edition(ident.span.ctx) {
@ -303,7 +303,7 @@ fn eat_fragment_kind(
}
fn is_boolean_literal(lit: &tt::Literal<Span>) -> bool {
matches!(lit.text.as_str(), "true" | "false")
matches!(lit.symbol.as_str(), "true" | "false")
}
fn parse_repeat(src: &mut TtIter<'_, Span>) -> Result<(Option<Separator>, RepeatKind), ParseError> {
@ -353,23 +353,23 @@ fn parse_metavar_expr(new_meta_vars: bool, src: &mut TtIter<'_, Span>) -> Result
let mut args = TtIter::new(args);
let op = match &*func.text {
"ignore" => {
let op = match &func.sym {
s if sym::ignore == *s => {
if new_meta_vars {
args.expect_dollar()?;
}
let ident = args.expect_ident()?;
Op::Ignore { name: ident.text.clone(), id: ident.span }
Op::Ignore { name: ident.sym.clone(), id: ident.span }
}
"index" => Op::Index { depth: parse_depth(&mut args)? },
"len" => Op::Len { depth: parse_depth(&mut args)? },
"count" => {
s if sym::index == *s => Op::Index { depth: parse_depth(&mut args)? },
s if sym::len == *s => Op::Len { depth: parse_depth(&mut args)? },
s if sym::count == *s => {
if new_meta_vars {
args.expect_dollar()?;
}
let ident = args.expect_ident()?;
let depth = if try_eat_comma(&mut args) { Some(parse_depth(&mut args)?) } else { None };
Op::Count { name: ident.text.clone(), depth }
Op::Count { name: ident.sym.clone(), depth }
}
_ => return Err(()),
};
@ -384,11 +384,11 @@ fn parse_metavar_expr(new_meta_vars: bool, src: &mut TtIter<'_, Span>) -> Result
fn parse_depth(src: &mut TtIter<'_, Span>) -> Result<usize, ()> {
if src.len() == 0 {
Ok(0)
} else if let tt::Leaf::Literal(tt::Literal { text, suffix: None, .. }) =
} else if let tt::Leaf::Literal(tt::Literal { symbol: text, suffix: None, .. }) =
src.expect_literal()?
{
// Suffixes are not allowed.
text.parse().map_err(|_| ())
text.as_str().parse().map_err(|_| ())
} else {
Err(())
}

View file

@ -2,6 +2,7 @@
use std::fmt;
use intern::Symbol;
use rustc_hash::{FxHashMap, FxHashSet};
use span::{Edition, SpanAnchor, SpanData, SpanMap};
use stdx::{format_to, never, non_empty_vec::NonEmptyVec};
@ -322,7 +323,7 @@ where
() => {
tt::Ident {
span: conv.span_for(abs_range),
text: token.to_text(conv),
sym: Symbol::intern(&token.to_text(conv)),
is_raw: tt::IdentIsRaw::No,
}
.into()
@ -332,14 +333,14 @@ where
T![true] | T![false] => make_ident!(),
IDENT => {
let text = token.to_text(conv);
tt::Ident::new(text, conv.span_for(abs_range)).into()
tt::Ident::new(&text, conv.span_for(abs_range)).into()
}
UNDERSCORE => make_ident!(),
k if k.is_keyword() => make_ident!(),
k if k.is_literal() => {
let text = token.to_text(conv);
let span = conv.span_for(abs_range);
token_to_literal(text, span).into()
token_to_literal(&text, span).into()
}
LIFETIME_IDENT => {
let apostrophe = tt::Leaf::from(tt::Punct {
@ -351,7 +352,7 @@ where
token_trees.push(apostrophe.into());
let ident = tt::Leaf::from(tt::Ident {
text: SmolStr::new(&token.to_text(conv)[1..]),
sym: Symbol::intern(&token.to_text(conv)[1..]),
span: conv.span_for(TextRange::new(
abs_range.start() + TextSize::of('\''),
abs_range.end(),
@ -436,7 +437,7 @@ fn is_single_token_op(kind: SyntaxKind) -> bool {
/// And then quote the string, which is needed to convert to `tt::Literal`
///
/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals.
pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (SmolStr, tt::LitKind) {
pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) {
match mode {
DocCommentDesugarMode::Mbe => {
let mut num_of_hashes = 0;
@ -451,11 +452,11 @@ pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Smo
}
// Quote raw string with delimiters
(text.into(), tt::LitKind::StrRaw(num_of_hashes))
(Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes))
}
// Quote string with delimiters
DocCommentDesugarMode::ProcMacro => {
(format_smolstr!("{}", text.escape_debug()), tt::LitKind::Str)
(Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str)
}
}
}
@ -471,7 +472,7 @@ fn convert_doc_comment<S: Copy>(
let mk_ident = |s: &str| {
tt::TokenTree::from(tt::Leaf::from(tt::Ident {
text: s.into(),
sym: Symbol::intern(s),
span,
is_raw: tt::IdentIsRaw::No,
}))
@ -494,7 +495,7 @@ fn convert_doc_comment<S: Copy>(
text = &text[0..text.len() - 2];
}
let (text, kind) = desugar_doc_comment_text(text, mode);
let lit = tt::Literal { text, span, kind, suffix: None };
let lit = tt::Literal { symbol: text, span, kind, suffix: None };
tt::TokenTree::from(tt::Leaf::from(lit))
};
@ -928,7 +929,12 @@ where
fn float_split(&mut self, has_pseudo_dot: bool) {
let (text, span) = match self.cursor.token_tree() {
Some(tt::buffer::TokenTreeRef::Leaf(
tt::Leaf::Literal(tt::Literal { text, span, kind: tt::LitKind::Float, suffix: _ }),
tt::Leaf::Literal(tt::Literal {
symbol: text,
span,
kind: tt::LitKind::Float,
suffix: _,
}),
_,
)) => (text.as_str(), *span),
tt => unreachable!("{tt:?}"),
@ -988,7 +994,7 @@ where
self.buf.push_str("r#");
self.text_pos += TextSize::of("r#");
}
let r = (ident.text.as_str(), ident.span);
let r = (ident.sym.as_str(), ident.span);
self.cursor = self.cursor.bump();
r
}

View file

@ -49,23 +49,22 @@ pub(crate) fn to_parser_input<S: Copy + fmt::Debug>(buffer: &TokenBuffer<'_, S>)
};
res.push(kind);
if kind == FLOAT_NUMBER && !lit.text.ends_with('.') {
if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') {
// Tag the token as joint if it is float with a fractional part
// we use this jointness to inform the parser about what token split
// event to emit when we encounter a float literal in a field access
res.was_joint();
}
}
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
tt::Leaf::Ident(ident) => match ident.sym.as_str() {
"_" => res.push(T![_]),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
_ if ident.is_raw.yes() => res.push(IDENT),
_ => match SyntaxKind::from_keyword(&ident.text) {
text => match SyntaxKind::from_keyword(text) {
Some(kind) => res.push(kind),
None => {
let contextual_keyword =
SyntaxKind::from_contextual_keyword(&ident.text)
.unwrap_or(SyntaxKind::IDENT);
let contextual_keyword = SyntaxKind::from_contextual_keyword(text)
.unwrap_or(SyntaxKind::IDENT);
res.push_ident(contextual_keyword);
}
},