Auto merge of #13548 - lowr:fix/tt-punct-spacing, r=Veykril

Fix `tt::Punct`'s spacing calculation

Fixes #13499

We currently set a `tt::Punct`'s spacing to `Spacing::Joint` unless its next token is a trivia (i.e. whitespaces or comment). As I understand it, rustc only [sets `Spacing::Joint` if the next token is an operator](5b3e909075/compiler/rustc_parse/src/lexer/tokentrees.rs (L77-L78)) and we should follow it to guarantee the consistent behavior of proc macros.
This commit is contained in:
bors 2022-11-11 12:19:30 +00:00
commit 6f313cef8e
5 changed files with 229 additions and 65 deletions

View file

@ -94,11 +94,11 @@ macro_rules! m {
($($s:stmt)*) => (stringify!($($s |)*);) ($($s:stmt)*) => (stringify!($($s |)*);)
} }
stringify!(; stringify!(;
|; | ;
|92|; |92| ;
|let x = 92|; |let x = 92| ;
|loop {} |loop {}
|; | ;
|); |);
"#]], "#]],
); );
@ -118,7 +118,7 @@ m!(.. .. ..);
macro_rules! m { macro_rules! m {
($($p:pat)*) => (stringify!($($p |)*);) ($($p:pat)*) => (stringify!($($p |)*);)
} }
stringify!(.. .. ..|); stringify!(.. .. .. |);
"#]], "#]],
); );
} }

View file

@ -82,14 +82,14 @@ fn attribute_macro_syntax_completion_2() {
#[proc_macros::identity_when_valid] #[proc_macros::identity_when_valid]
fn foo() { bar.; blub } fn foo() { bar.; blub }
"#, "#,
expect![[r##" expect![[r#"
#[proc_macros::identity_when_valid] #[proc_macros::identity_when_valid]
fn foo() { bar.; blub } fn foo() { bar.; blub }
fn foo() { fn foo() {
bar.; bar. ;
blub blub
}"##]], }"#]],
); );
} }

View file

@ -4,6 +4,7 @@ use std::mem;
use mbe::{SyntheticToken, SyntheticTokenId, TokenMap}; use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use smallvec::SmallVec;
use syntax::{ use syntax::{
ast::{self, AstNode, HasLoopBody}, ast::{self, AstNode, HasLoopBody},
match_ast, SyntaxElement, SyntaxKind, SyntaxNode, TextRange, match_ast, SyntaxElement, SyntaxKind, SyntaxNode, TextRange,
@ -292,25 +293,34 @@ pub(crate) fn reverse_fixups(
token_map: &TokenMap, token_map: &TokenMap,
undo_info: &SyntaxFixupUndoInfo, undo_info: &SyntaxFixupUndoInfo,
) { ) {
tt.token_trees.retain(|tt| match tt { let tts = std::mem::take(&mut tt.token_trees);
tt::TokenTree::Leaf(leaf) => { tt.token_trees = tts
token_map.synthetic_token_id(leaf.id()).is_none() .into_iter()
|| token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID) .filter(|tt| match tt {
tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID),
tt::TokenTree::Subtree(st) => {
st.delimiter.map_or(true, |d| token_map.synthetic_token_id(d.id) != Some(EMPTY_ID))
}
})
.flat_map(|tt| match tt {
tt::TokenTree::Subtree(mut tt) => {
reverse_fixups(&mut tt, token_map, undo_info);
SmallVec::from_const([tt.into()])
} }
tt::TokenTree::Subtree(st) => st.delimiter.map_or(true, |d| {
token_map.synthetic_token_id(d.id).is_none()
|| token_map.synthetic_token_id(d.id) != Some(EMPTY_ID)
}),
});
tt.token_trees.iter_mut().for_each(|tt| match tt {
tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map, undo_info),
tt::TokenTree::Leaf(leaf) => { tt::TokenTree::Leaf(leaf) => {
if let Some(id) = token_map.synthetic_token_id(leaf.id()) { if let Some(id) = token_map.synthetic_token_id(leaf.id()) {
let original = &undo_info.original[id.0 as usize]; let original = undo_info.original[id.0 as usize].clone();
*tt = tt::TokenTree::Subtree(original.clone()); if original.delimiter.is_none() {
original.token_trees.into()
} else {
SmallVec::from_const([original.into()])
}
} else {
SmallVec::from_const([leaf.into()])
} }
} }
}); })
.collect();
} }
#[cfg(test)] #[cfg(test)]
@ -319,6 +329,31 @@ mod tests {
use super::reverse_fixups; use super::reverse_fixups;
// The following three functions are only meant to check partial structural equivalence of
// `TokenTree`s, see the last assertion in `check()`.
fn check_leaf_eq(a: &tt::Leaf, b: &tt::Leaf) -> bool {
match (a, b) {
(tt::Leaf::Literal(a), tt::Leaf::Literal(b)) => a.text == b.text,
(tt::Leaf::Punct(a), tt::Leaf::Punct(b)) => a.char == b.char,
(tt::Leaf::Ident(a), tt::Leaf::Ident(b)) => a.text == b.text,
_ => false,
}
}
fn check_subtree_eq(a: &tt::Subtree, b: &tt::Subtree) -> bool {
a.delimiter.map(|it| it.kind) == b.delimiter.map(|it| it.kind)
&& a.token_trees.len() == b.token_trees.len()
&& a.token_trees.iter().zip(&b.token_trees).all(|(a, b)| check_tt_eq(a, b))
}
fn check_tt_eq(a: &tt::TokenTree, b: &tt::TokenTree) -> bool {
match (a, b) {
(tt::TokenTree::Leaf(a), tt::TokenTree::Leaf(b)) => check_leaf_eq(a, b),
(tt::TokenTree::Subtree(a), tt::TokenTree::Subtree(b)) => check_subtree_eq(a, b),
_ => false,
}
}
#[track_caller] #[track_caller]
fn check(ra_fixture: &str, mut expect: Expect) { fn check(ra_fixture: &str, mut expect: Expect) {
let parsed = syntax::SourceFile::parse(ra_fixture); let parsed = syntax::SourceFile::parse(ra_fixture);
@ -331,17 +366,15 @@ mod tests {
fixups.append, fixups.append,
); );
let mut actual = tt.to_string(); let actual = format!("{}\n", tt);
actual.push('\n');
expect.indent(false); expect.indent(false);
expect.assert_eq(&actual); expect.assert_eq(&actual);
// the fixed-up tree should be syntactically valid // the fixed-up tree should be syntactically valid
let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems); let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
assert_eq!( assert!(
parse.errors(), parse.errors().is_empty(),
&[],
"parse has syntax errors. parse tree:\n{:#?}", "parse has syntax errors. parse tree:\n{:#?}",
parse.syntax_node() parse.syntax_node()
); );
@ -349,9 +382,12 @@ mod tests {
reverse_fixups(&mut tt, &tmap, &fixups.undo_info); reverse_fixups(&mut tt, &tmap, &fixups.undo_info);
// the fixed-up + reversed version should be equivalent to the original input // the fixed-up + reversed version should be equivalent to the original input
// (but token IDs don't matter) // modulo token IDs and `Punct`s' spacing.
let (original_as_tt, _) = mbe::syntax_node_to_token_tree(&parsed.syntax_node()); let (original_as_tt, _) = mbe::syntax_node_to_token_tree(&parsed.syntax_node());
assert_eq!(tt.to_string(), original_as_tt.to_string()); assert!(
check_subtree_eq(&tt, &original_as_tt),
"different token tree: {tt:?}, {original_as_tt:?}"
);
} }
#[test] #[test]
@ -468,7 +504,7 @@ fn foo() {
} }
"#, "#,
expect![[r#" expect![[r#"
fn foo () {a .__ra_fixup} fn foo () {a . __ra_fixup}
"#]], "#]],
) )
} }
@ -482,7 +518,7 @@ fn foo() {
} }
"#, "#,
expect![[r#" expect![[r#"
fn foo () {a .__ra_fixup ;} fn foo () {a . __ra_fixup ;}
"#]], "#]],
) )
} }
@ -497,7 +533,7 @@ fn foo() {
} }
"#, "#,
expect![[r#" expect![[r#"
fn foo () {a .__ra_fixup ; bar () ;} fn foo () {a . __ra_fixup ; bar () ;}
"#]], "#]],
) )
} }
@ -525,7 +561,7 @@ fn foo() {
} }
"#, "#,
expect![[r#" expect![[r#"
fn foo () {let x = a .__ra_fixup ;} fn foo () {let x = a . __ra_fixup ;}
"#]], "#]],
) )
} }
@ -541,7 +577,7 @@ fn foo() {
} }
"#, "#,
expect![[r#" expect![[r#"
fn foo () {a .b ; bar () ;} fn foo () {a . b ; bar () ;}
"#]], "#]],
) )
} }

View file

@ -12,6 +12,9 @@ use tt::buffer::{Cursor, TokenBuffer};
use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, TokenMap}; use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, TokenMap};
#[cfg(test)]
mod tests;
/// Convert the syntax node to a `TokenTree` (what macro /// Convert the syntax node to a `TokenTree` (what macro
/// will consume). /// will consume).
pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
@ -35,7 +38,7 @@ pub fn syntax_node_to_token_tree_with_modifications(
append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
) -> (tt::Subtree, TokenMap, u32) { ) -> (tt::Subtree, TokenMap, u32) {
let global_offset = node.text_range().start(); let global_offset = node.text_range().start();
let mut c = Convertor::new(node, global_offset, existing_token_map, next_id, replace, append); let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append);
let subtree = convert_tokens(&mut c); let subtree = convert_tokens(&mut c);
c.id_alloc.map.shrink_to_fit(); c.id_alloc.map.shrink_to_fit();
always!(c.replace.is_empty(), "replace: {:?}", c.replace); always!(c.replace.is_empty(), "replace: {:?}", c.replace);
@ -100,7 +103,7 @@ pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
return None; return None;
} }
let mut conv = RawConvertor { let mut conv = RawConverter {
lexed, lexed,
pos: 0, pos: 0,
id_alloc: TokenIdAlloc { id_alloc: TokenIdAlloc {
@ -148,7 +151,7 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
res res
} }
fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree { fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree {
struct StackEntry { struct StackEntry {
subtree: tt::Subtree, subtree: tt::Subtree,
idx: usize, idx: usize,
@ -228,7 +231,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
} }
let spacing = match conv.peek().map(|next| next.kind(conv)) { let spacing = match conv.peek().map(|next| next.kind(conv)) {
Some(kind) if !kind.is_trivia() => tt::Spacing::Joint, Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
_ => tt::Spacing::Alone, _ => tt::Spacing::Alone,
}; };
let char = match token.to_char(conv) { let char = match token.to_char(conv) {
@ -307,6 +310,35 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
} }
} }
fn is_single_token_op(kind: SyntaxKind) -> bool {
matches!(
kind,
EQ | L_ANGLE
| R_ANGLE
| BANG
| AMP
| PIPE
| TILDE
| AT
| DOT
| COMMA
| SEMICOLON
| COLON
| POUND
| DOLLAR
| QUESTION
| PLUS
| MINUS
| STAR
| SLASH
| PERCENT
| CARET
// LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
// identifier.
| LIFETIME_IDENT
)
}
/// Returns the textual content of a doc comment block as a quoted string /// Returns the textual content of a doc comment block as a quoted string
/// That is, strips leading `///` (or `/**`, etc) /// That is, strips leading `///` (or `/**`, etc)
/// and strips the ending `*/` /// and strips the ending `*/`
@ -425,8 +457,8 @@ impl TokenIdAlloc {
} }
} }
/// A raw token (straight from lexer) convertor /// A raw token (straight from lexer) converter
struct RawConvertor<'a> { struct RawConverter<'a> {
lexed: parser::LexedStr<'a>, lexed: parser::LexedStr<'a>,
pos: usize, pos: usize,
id_alloc: TokenIdAlloc, id_alloc: TokenIdAlloc,
@ -442,7 +474,7 @@ trait SrcToken<Ctx>: std::fmt::Debug {
fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>; fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
} }
trait TokenConvertor: Sized { trait TokenConverter: Sized {
type Token: SrcToken<Self>; type Token: SrcToken<Self>;
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>; fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
@ -454,25 +486,25 @@ trait TokenConvertor: Sized {
fn id_alloc(&mut self) -> &mut TokenIdAlloc; fn id_alloc(&mut self) -> &mut TokenIdAlloc;
} }
impl<'a> SrcToken<RawConvertor<'a>> for usize { impl<'a> SrcToken<RawConverter<'a>> for usize {
fn kind(&self, ctx: &RawConvertor<'a>) -> SyntaxKind { fn kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind {
ctx.lexed.kind(*self) ctx.lexed.kind(*self)
} }
fn to_char(&self, ctx: &RawConvertor<'a>) -> Option<char> { fn to_char(&self, ctx: &RawConverter<'a>) -> Option<char> {
ctx.lexed.text(*self).chars().next() ctx.lexed.text(*self).chars().next()
} }
fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr { fn to_text(&self, ctx: &RawConverter<'_>) -> SmolStr {
ctx.lexed.text(*self).into() ctx.lexed.text(*self).into()
} }
fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> { fn synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId> {
None None
} }
} }
impl<'a> TokenConvertor for RawConvertor<'a> { impl<'a> TokenConverter for RawConverter<'a> {
type Token = usize; type Token = usize;
fn convert_doc_comment(&self, &token: &usize) -> Option<Vec<tt::TokenTree>> { fn convert_doc_comment(&self, &token: &usize) -> Option<Vec<tt::TokenTree>> {
@ -504,7 +536,7 @@ impl<'a> TokenConvertor for RawConvertor<'a> {
} }
} }
struct Convertor { struct Converter {
id_alloc: TokenIdAlloc, id_alloc: TokenIdAlloc,
current: Option<SyntaxToken>, current: Option<SyntaxToken>,
current_synthetic: Vec<SyntheticToken>, current_synthetic: Vec<SyntheticToken>,
@ -515,7 +547,7 @@ struct Convertor {
punct_offset: Option<(SyntaxToken, TextSize)>, punct_offset: Option<(SyntaxToken, TextSize)>,
} }
impl Convertor { impl Converter {
fn new( fn new(
node: &SyntaxNode, node: &SyntaxNode,
global_offset: TextSize, global_offset: TextSize,
@ -523,11 +555,11 @@ impl Convertor {
next_id: u32, next_id: u32,
mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
) -> Convertor { ) -> Converter {
let range = node.text_range(); let range = node.text_range();
let mut preorder = node.preorder_with_tokens(); let mut preorder = node.preorder_with_tokens();
let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append); let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append);
Convertor { Converter {
id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } }, id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } },
current: first, current: first,
current_synthetic: synthetic, current_synthetic: synthetic,
@ -590,15 +622,15 @@ impl SynToken {
} }
} }
impl SrcToken<Convertor> for SynToken { impl SrcToken<Converter> for SynToken {
fn kind(&self, _ctx: &Convertor) -> SyntaxKind { fn kind(&self, ctx: &Converter) -> SyntaxKind {
match self { match self {
SynToken::Ordinary(token) => token.kind(), SynToken::Ordinary(token) => token.kind(),
SynToken::Punch(token, _) => token.kind(), SynToken::Punch(..) => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(),
SynToken::Synthetic(token) => token.kind, SynToken::Synthetic(token) => token.kind,
} }
} }
fn to_char(&self, _ctx: &Convertor) -> Option<char> { fn to_char(&self, _ctx: &Converter) -> Option<char> {
match self { match self {
SynToken::Ordinary(_) => None, SynToken::Ordinary(_) => None,
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
@ -606,7 +638,7 @@ impl SrcToken<Convertor> for SynToken {
SynToken::Synthetic(_) => None, SynToken::Synthetic(_) => None,
} }
} }
fn to_text(&self, _ctx: &Convertor) -> SmolStr { fn to_text(&self, _ctx: &Converter) -> SmolStr {
match self { match self {
SynToken::Ordinary(token) => token.text().into(), SynToken::Ordinary(token) => token.text().into(),
SynToken::Punch(token, _) => token.text().into(), SynToken::Punch(token, _) => token.text().into(),
@ -614,7 +646,7 @@ impl SrcToken<Convertor> for SynToken {
} }
} }
fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> { fn synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId> {
match self { match self {
SynToken::Synthetic(token) => Some(token.id), SynToken::Synthetic(token) => Some(token.id),
_ => None, _ => None,
@ -622,7 +654,7 @@ impl SrcToken<Convertor> for SynToken {
} }
} }
impl TokenConvertor for Convertor { impl TokenConverter for Converter {
type Token = SynToken; type Token = SynToken;
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> { fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> {
convert_doc_comment(token.token()?) convert_doc_comment(token.token()?)
@ -651,7 +683,7 @@ impl TokenConvertor for Convertor {
} }
let curr = self.current.clone()?; let curr = self.current.clone()?;
if !&self.range.contains_range(curr.text_range()) { if !self.range.contains_range(curr.text_range()) {
return None; return None;
} }
let (new_current, new_synth) = let (new_current, new_synth) =
@ -809,12 +841,15 @@ impl<'a> TtTreeSink<'a> {
let next = last.bump(); let next = last.bump();
if let ( if let (
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)),
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(_), _)), Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)),
) = (last.token_tree(), next.token_tree()) ) = (last.token_tree(), next.token_tree())
{ {
// Note: We always assume the semi-colon would be the last token in // Note: We always assume the semi-colon would be the last token in
// other parts of RA such that we don't add whitespace here. // other parts of RA such that we don't add whitespace here.
if curr.spacing == tt::Spacing::Alone && curr.char != ';' { //
// When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
// need to add whitespace either.
if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
self.inner.token(WHITESPACE, " "); self.inner.token(WHITESPACE, " ");
self.text_pos += TextSize::of(' '); self.text_pos += TextSize::of(' ');
} }

View file

@ -0,0 +1,93 @@
use std::collections::HashMap;
use syntax::{ast, AstNode};
use test_utils::extract_annotations;
use tt::{
buffer::{TokenBuffer, TokenTreeRef},
Leaf, Punct, Spacing,
};
use super::syntax_node_to_token_tree;
fn check_punct_spacing(fixture: &str) {
let source_file = ast::SourceFile::parse(fixture).ok().unwrap();
let (subtree, token_map) = syntax_node_to_token_tree(source_file.syntax());
let mut annotations: HashMap<_, _> = extract_annotations(fixture)
.into_iter()
.map(|(range, annotation)| {
let token = token_map.token_by_range(range).expect("no token found");
let spacing = match annotation.as_str() {
"Alone" => Spacing::Alone,
"Joint" => Spacing::Joint,
a => panic!("unknown annotation: {}", a),
};
(token, spacing)
})
.collect();
let buf = TokenBuffer::from_subtree(&subtree);
let mut cursor = buf.begin();
while !cursor.eof() {
while let Some(token_tree) = cursor.token_tree() {
if let TokenTreeRef::Leaf(Leaf::Punct(Punct { spacing, id, .. }), _) = token_tree {
if let Some(expected) = annotations.remove(&id) {
assert_eq!(expected, *spacing);
}
}
cursor = cursor.bump_subtree();
}
cursor = cursor.bump();
}
assert!(annotations.is_empty(), "unchecked annotations: {:?}", annotations);
}
#[test]
fn punct_spacing() {
check_punct_spacing(
r#"
fn main() {
0+0;
//^ Alone
0+(0);
//^ Alone
0<=0;
//^ Joint
// ^ Alone
0<=(0);
// ^ Alone
a=0;
//^ Alone
a=(0);
//^ Alone
a+=0;
//^ Joint
// ^ Alone
a+=(0);
// ^ Alone
a&&b;
//^ Joint
// ^ Alone
a&&(b);
// ^ Alone
foo::bar;
// ^ Joint
// ^ Alone
use foo::{bar,baz,};
// ^ Alone
// ^ Alone
// ^ Alone
struct Struct<'a> {};
// ^ Joint
// ^ Joint
Struct::<0>;
// ^ Alone
Struct::<{0}>;
// ^ Alone
;;
//^ Joint
// ^ Alone
}
"#,
);
}