Encode ident rawness and literal kind separately in tt::Leaf

This commit is contained in:
Lukas Wirth 2024-07-07 17:47:38 +02:00
parent 5784915618
commit e846c04fbe
33 changed files with 860 additions and 412 deletions

View file

@ -12,8 +12,54 @@ use stdx::impl_from;
pub use smol_str::SmolStr;
pub use text_size::{TextRange, TextSize};
#[derive(Clone, PartialEq, Debug)]
pub struct Lit {
pub kind: LitKind,
pub symbol: SmolStr,
pub suffix: Option<SmolStr>,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum IdentIsRaw {
No,
Yes,
}
impl IdentIsRaw {
pub fn yes(self) -> bool {
matches!(self, IdentIsRaw::Yes)
}
pub fn as_str(self) -> &'static str {
match self {
IdentIsRaw::No => "",
IdentIsRaw::Yes => "r#",
}
}
pub fn split_from_symbol(sym: &str) -> (Self, &str) {
if let Some(sym) = sym.strip_prefix("r#") {
(IdentIsRaw::Yes, sym)
} else {
(IdentIsRaw::No, sym)
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum LitKind {
Byte,
Char,
Integer, // e.g. `1`, `1u8`, `1f32`
Float, // e.g. `1.`, `1.0`, `1e3f32`
Str,
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
CStr,
CStrRaw(u8),
Err(()),
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TokenTree<S> {
pub enum TokenTree<S = u32> {
Leaf(Leaf<S>),
Subtree(Subtree<S>),
}
@ -103,6 +149,15 @@ pub struct DelimSpan<S> {
pub close: S,
}
impl<Span: Copy> DelimSpan<Span> {
pub fn from_single(sp: Span) -> Self {
DelimSpan { open: sp, close: sp }
}
pub fn from_pair(open: Span, close: Span) -> Self {
DelimSpan { open, close }
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Delimiter<S> {
pub open: S,
@ -134,8 +189,11 @@ pub enum DelimiterKind {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Literal<S> {
// escaped
pub text: SmolStr,
pub span: S,
pub kind: LitKind,
pub suffix: Option<Box<SmolStr>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -145,23 +203,85 @@ pub struct Punct<S> {
pub span: S,
}
/// Indicates whether a token can join with the following token to form a
/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
/// guide pretty-printing, which is where the `JointHidden` value (which isn't
/// part of `proc_macro::Spacing`) comes in useful.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Spacing {
/// The token cannot join with the following token to form a compound
/// token.
///
/// In token streams parsed from source code, the compiler will use `Alone`
/// for any token immediately followed by whitespace, a non-doc comment, or
/// EOF.
///
/// When constructing token streams within the compiler, use this for each
/// token that (a) should be pretty-printed with a space after it, or (b)
/// is the last token in the stream. (In the latter case the choice of
/// spacing doesn't matter because it is never used for the last token. We
/// arbitrarily use `Alone`.)
///
/// Converts to `proc_macro::Spacing::Alone`, and
/// `proc_macro::Spacing::Alone` converts back to this.
Alone,
/// Whether the following token is joint to this one.
/// The token can join with the following token to form a compound token.
///
/// In token streams parsed from source code, the compiler will use `Joint`
/// for any token immediately followed by punctuation (as determined by
/// `Token::is_punct`).
///
/// When constructing token streams within the compiler, use this for each
/// token that (a) should be pretty-printed without a space after it, and
/// (b) is followed by a punctuation token.
///
/// Converts to `proc_macro::Spacing::Joint`, and
/// `proc_macro::Spacing::Joint` converts back to this.
Joint,
/// The token can join with the following token to form a compound token,
/// but this will not be visible at the proc macro level. (This is what the
/// `Hidden` means; see below.)
///
/// In token streams parsed from source code, the compiler will use
/// `JointHidden` for any token immediately followed by anything not
/// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
/// literal, delimiter, doc comment.
///
/// When constructing token streams, use this for each token that (a)
/// should be pretty-printed without a space after it, and (b) is followed
/// by a non-punctuation token.
///
/// Converts to `proc_macro::Spacing::Alone`, but
/// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
/// Because of that, pretty-printing of `TokenStream`s produced by proc
/// macros is unavoidably uglier (with more whitespace between tokens) than
/// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
/// source code, internally constructed token streams, and token streams
/// produced by declarative macros).
JointHidden,
}
/// Identifier or keyword.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
/// Identifier or keyword. Unlike rustc, we keep "r#" prefix when it represents a raw identifier.
pub struct Ident<S> {
pub text: SmolStr,
pub span: S,
pub is_raw: IdentIsRaw,
}
impl<S> Ident<S> {
pub fn new(text: impl Into<SmolStr>, span: S) -> Self {
Ident { text: text.into(), span }
pub fn new(text: impl Into<SmolStr> + AsRef<str>, span: S) -> Self {
let t = text.as_ref();
// let raw_stripped = IdentIsRaw::split_from_symbol(text.as_ref());
let raw_stripped = t.strip_prefix("r#");
let is_raw = if raw_stripped.is_none() { IdentIsRaw::No } else { IdentIsRaw::Yes };
let text = match raw_stripped {
Some(derawed) => derawed.into(),
None => text.into(),
};
Ident { text, span, is_raw }
}
}
@ -207,22 +327,35 @@ fn print_debug_token<S: fmt::Debug>(
match tkn {
TokenTree::Leaf(leaf) => match leaf {
Leaf::Literal(lit) => {
write!(f, "{}LITERAL {}", align, lit.text)?;
fmt::Debug::fmt(&lit.span, f)?;
write!(
f,
"{}LITERAL {:?} {}{} {:#?}",
align,
lit.kind,
lit.text,
lit.suffix.as_ref().map(|it| &***it).unwrap_or(""),
lit.span
)?;
}
Leaf::Punct(punct) => {
write!(
f,
"{}PUNCH {} [{}] ",
"{}PUNCH {} [{}] {:#?}",
align,
punct.char,
if punct.spacing == Spacing::Alone { "alone" } else { "joint" },
punct.span
)?;
fmt::Debug::fmt(&punct.span, f)?;
}
Leaf::Ident(ident) => {
write!(f, "{}IDENT {} ", align, ident.text)?;
fmt::Debug::fmt(&ident.span, f)?;
write!(
f,
"{}IDENT {}{} {:#?}",
align,
ident.is_raw.as_str(),
ident.text,
ident.span
)?;
}
},
TokenTree::Subtree(subtree) => {
@ -288,13 +421,52 @@ impl<S> fmt::Display for Leaf<S> {
impl<S> fmt::Display for Ident<S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.is_raw.as_str(), f)?;
fmt::Display::fmt(&self.text, f)
}
}
impl<S> fmt::Display for Literal<S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.text, f)
match self.kind {
LitKind::Byte => write!(f, "b'{}'", self.text),
LitKind::Char => write!(f, "'{}'", self.text),
LitKind::Integer | LitKind::Float | LitKind::Err(_) => write!(f, "{}", self.text),
LitKind::Str => write!(f, "\"{}\"", self.text),
LitKind::ByteStr => write!(f, "b\"{}\"", self.text),
LitKind::CStr => write!(f, "c\"{}\"", self.text),
LitKind::StrRaw(num_of_hashes) => {
let num_of_hashes = num_of_hashes as usize;
write!(
f,
r#"r{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#,
"",
text = self.text
)
}
LitKind::ByteStrRaw(num_of_hashes) => {
let num_of_hashes = num_of_hashes as usize;
write!(
f,
r#"br{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#,
"",
text = self.text
)
}
LitKind::CStrRaw(num_of_hashes) => {
let num_of_hashes = num_of_hashes as usize;
write!(
f,
r#"cr{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#,
"",
text = self.text
)
}
}?;
if let Some(suffix) = &self.suffix {
write!(f, "{}", suffix)?;
}
Ok(())
}
}
@ -339,7 +511,7 @@ impl<S> Subtree<S> {
let s = match it {
Leaf::Literal(it) => it.text.to_string(),
Leaf::Punct(it) => it.char.to_string(),
Leaf::Ident(it) => it.text.to_string(),
Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.text),
};
match (it, last) {
(Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => {
@ -369,7 +541,9 @@ impl<S> Subtree<S> {
pub fn pretty<S>(tkns: &[TokenTree<S>]) -> String {
fn tokentree_to_text<S>(tkn: &TokenTree<S>) -> String {
match tkn {
TokenTree::Leaf(Leaf::Ident(ident)) => ident.text.clone().into(),
TokenTree::Leaf(Leaf::Ident(ident)) => {
format!("{}{}", ident.is_raw.as_str(), ident.text)
}
TokenTree::Leaf(Leaf::Literal(literal)) => literal.text.clone().into(),
TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char),
TokenTree::Subtree(subtree) => {