//! This module generates AST datatype used by rust-analyzer. //! //! Specifically, it generates the `SyntaxKind` enum and a number of newtype //! wrappers around `SyntaxNode` which implement `syntax::AstNode`. #![allow(clippy::disallowed_types)] use std::{ collections::{BTreeSet, HashSet}, fmt::Write, fs, }; use either::Either; use itertools::Itertools; use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; use stdx::panic_context; use ungrammar::{Grammar, Rule}; use crate::{ codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat}, project_root, }; mod ast_src; use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc}; pub(crate) fn generate(check: bool) { let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram")) .unwrap() .parse() .unwrap(); let ast = lower(&grammar); let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar); let syntax_kinds = generate_syntax_kinds(kinds_src); let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs"); ensure_file_contents( crate::flags::CodegenType::Grammar, syntax_kinds_file.as_path(), &syntax_kinds, check, ); let ast_tokens = generate_tokens(&ast); let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs"); ensure_file_contents( crate::flags::CodegenType::Grammar, ast_tokens_file.as_path(), &ast_tokens, check, ); let ast_nodes = generate_nodes(kinds_src, &ast); let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs"); ensure_file_contents( crate::flags::CodegenType::Grammar, ast_nodes_file.as_path(), &ast_nodes, check, ); } fn generate_tokens(grammar: &AstSrc) -> String { let tokens = grammar.tokens.iter().map(|token| { let name = format_ident!("{}", token); let kind = format_ident!("{}", to_upper_snake_case(token)); quote! { pub struct #name { pub(crate) syntax: SyntaxToken, } impl std::fmt::Display for #name { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(&self.syntax, f) } } impl AstToken for #name { fn can_cast(kind: SyntaxKind) -> bool { kind == #kind } fn cast(syntax: SyntaxToken) -> Option { if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } } fn syntax(&self) -> &SyntaxToken { &self.syntax } } impl fmt::Debug for #name { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(#token).field("syntax", &self.syntax).finish() } } impl Clone for #name { fn clone(&self) -> Self { Self { syntax: self.syntax.clone() } } } impl hash::Hash for #name { fn hash(&self, state: &mut H) { self.syntax.hash(state); } } impl Eq for #name {} impl PartialEq for #name { fn eq(&self, other: &Self) -> bool { self.syntax == other.syntax } } } }); add_preamble( crate::flags::CodegenType::Grammar, reformat( quote! { use std::{fmt, hash}; use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken}; #(#tokens)* } .to_string(), ), ) .replace("#[derive", "\n#[derive") } fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String { let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar .nodes .iter() .map(|node| { let node_str_name = &node.name; let name = format_ident!("{}", node.name); let kind = format_ident!("{}", to_upper_snake_case(&node.name)); let traits = node .traits .iter() .filter(|trait_name| { // Loops have two expressions so this might collide, therefore manual impl it node.name != "ForExpr" && node.name != "WhileExpr" || trait_name.as_str() != "HasLoopBody" }) .map(|trait_name| { let trait_name = format_ident!("{}", trait_name); quote!(impl ast::#trait_name for #name {}) }); let methods = node.fields.iter().map(|field| { let method_name = format_ident!("{}", field.method_name()); let ty = field.ty(); if field.is_many() { quote! { #[inline] pub fn #method_name(&self) -> AstChildren<#ty> { support::children(&self.syntax) } } } else if let Some(token_kind) = field.token_kind() { quote! { #[inline] pub fn #method_name(&self) -> Option<#ty> { support::token(&self.syntax, #token_kind) } } } else { quote! { #[inline] pub fn #method_name(&self) -> Option<#ty> { support::child(&self.syntax) } } } }); ( quote! { #[pretty_doc_comment_placeholder_workaround] pub struct #name { pub(crate) syntax: SyntaxNode, } #(#traits)* impl #name { #(#methods)* } }, quote! { impl AstNode for #name { #[inline] fn kind() -> SyntaxKind where Self: Sized { #kind } #[inline] fn can_cast(kind: SyntaxKind) -> bool { kind == #kind } #[inline] fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } } #[inline] fn syntax(&self) -> &SyntaxNode { &self.syntax } } impl hash::Hash for #name { fn hash(&self, state: &mut H) { self.syntax.hash(state); } } impl Eq for #name {} impl PartialEq for #name { fn eq(&self, other: &Self) -> bool { self.syntax == other.syntax } } impl Clone for #name { fn clone(&self) -> Self { Self { syntax: self.syntax.clone() } } } impl fmt::Debug for #name { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish() } } }, ) }) .unzip(); let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar .enums .iter() .map(|en| { let variants: Vec<_> = en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect(); let name = format_ident!("{}", en.name); let kinds: Vec<_> = variants .iter() .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) .collect(); let traits = en.traits.iter().sorted().map(|trait_name| { let trait_name = format_ident!("{}", trait_name); quote!(impl ast::#trait_name for #name {}) }); let ast_node = if en.name == "Stmt" { quote! {} } else { quote! { impl AstNode for #name { #[inline] fn can_cast(kind: SyntaxKind) -> bool { matches!(kind, #(#kinds)|*) } #[inline] fn cast(syntax: SyntaxNode) -> Option { let res = match syntax.kind() { #( #kinds => #name::#variants(#variants { syntax }), )* _ => return None, }; Some(res) } #[inline] fn syntax(&self) -> &SyntaxNode { match self { #( #name::#variants(it) => &it.syntax, )* } } } } }; ( quote! { #[pretty_doc_comment_placeholder_workaround] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum #name { #(#variants(#variants),)* } #(#traits)* }, quote! { #( impl From<#variants> for #name { #[inline] fn from(node: #variants) -> #name { #name::#variants(node) } } )* #ast_node }, ) }) .unzip(); let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar .nodes .iter() .flat_map(|node| node.traits.iter().map(move |t| (t, node))) .into_group_map() .into_iter() .sorted_by_key(|(name, _)| *name) .map(|(trait_name, nodes)| { let name = format_ident!("Any{}", trait_name); let node_str_name = name.to_string(); let trait_name = format_ident!("{}", trait_name); let kinds: Vec<_> = nodes .iter() .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string()))) .collect(); let nodes = nodes.iter().map(|node| format_ident!("{}", node.name)); ( quote! { #[pretty_doc_comment_placeholder_workaround] pub struct #name { pub(crate) syntax: SyntaxNode, } impl #name { #[inline] pub fn new(node: T) -> #name { #name { syntax: node.syntax().clone() } } } }, quote! { impl ast::#trait_name for #name {} impl AstNode for #name { #[inline] fn can_cast(kind: SyntaxKind) -> bool { matches!(kind, #(#kinds)|*) } #[inline] fn cast(syntax: SyntaxNode) -> Option { Self::can_cast(syntax.kind()).then_some(#name { syntax }) } #[inline] fn syntax(&self) -> &SyntaxNode { &self.syntax } } impl hash::Hash for #name { fn hash(&self, state: &mut H) { self.syntax.hash(state); } } impl Eq for #name {} impl PartialEq for #name { fn eq(&self, other: &Self) -> bool { self.syntax == other.syntax } } impl Clone for #name { fn clone(&self) -> Self { Self { syntax: self.syntax.clone() } } } impl fmt::Debug for #name { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish() } } #( impl From<#nodes> for #name { #[inline] fn from(node: #nodes) -> #name { #name { syntax: node.syntax } } } )* }, ) }) .unzip(); let enum_names = grammar.enums.iter().map(|it| &it.name); let node_names = grammar.nodes.iter().map(|it| &it.name); let display_impls = enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { quote! { impl std::fmt::Display for #name { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } } }); let defined_nodes: HashSet<_> = node_names.collect(); for node in kinds .nodes .iter() .map(|kind| to_pascal_case(kind)) .filter(|name| !defined_nodes.iter().any(|&it| it == name)) { eprintln!("Warning: node {node} not defined in AST source"); drop(node); } let ast = quote! { #![allow(non_snake_case)] use std::{fmt, hash}; use crate::{ SyntaxNode, SyntaxToken, SyntaxKind::{self, *}, ast::{self, AstNode, AstChildren, support}, T, }; #(#node_defs)* #(#enum_defs)* #(#any_node_defs)* #(#node_boilerplate_impls)* #(#enum_boilerplate_impls)* #(#any_node_boilerplate_impls)* #(#display_impls)* }; let ast = ast.to_string().replace("T ! [", "T!["); let mut res = String::with_capacity(ast.len() * 2); let mut docs = grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc)); for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") { res.push_str(chunk); if let Some(doc) = docs.next() { write_doc_comment(doc, &mut res); } } let res = add_preamble(crate::flags::CodegenType::Grammar, reformat(res)); res.replace("#[derive", "\n#[derive") } fn write_doc_comment(contents: &[String], dest: &mut String) { for line in contents { writeln!(dest, "///{line}").unwrap(); } } fn generate_syntax_kinds(grammar: KindsSrc) -> String { let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar .punct .iter() .filter(|(token, _name)| token.len() == 1) .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) .unzip(); let punctuation_values = grammar.punct.iter().map(|(token, _name)| { if "{}[]()".contains(token) { let c = token.chars().next().unwrap(); quote! { #c } // underscore is an identifier in the proc-macro api } else if *token == "_" { quote! { _ } } else { let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); quote! { #(#cs)* } } }); let punctuation = grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::>(); let punctuation_texts = grammar.punct.iter().map(|&(text, _name)| text); let fmt_kw_as_variant = |&name| match name { "Self" => format_ident!("SELF_TYPE_KW"), name => format_ident!("{}_KW", to_upper_snake_case(name)), }; let strict_keywords = grammar.keywords; let strict_keywords_variants = strict_keywords.iter().map(fmt_kw_as_variant).collect::>(); let strict_keywords_tokens = strict_keywords.iter().map(|it| format_ident!("{it}")); let edition_dependent_keywords_variants_match_arm = grammar .edition_dependent_keywords .iter() .map(|(kw, ed)| { let kw = fmt_kw_as_variant(kw); quote! { #kw if #ed <= edition } }) .collect::>(); let edition_dependent_keywords_str_match_arm = grammar .edition_dependent_keywords .iter() .map(|(kw, ed)| { quote! { #kw if #ed <= edition } }) .collect::>(); let edition_dependent_keywords = grammar.edition_dependent_keywords.iter().map(|&(it, _)| it); let edition_dependent_keywords_variants = grammar .edition_dependent_keywords .iter() .map(|(kw, _)| fmt_kw_as_variant(kw)) .collect::>(); let edition_dependent_keywords_tokens = grammar.edition_dependent_keywords.iter().map(|(it, _)| format_ident!("{it}")); let contextual_keywords = grammar.contextual_keywords; let contextual_keywords_variants = contextual_keywords.iter().map(fmt_kw_as_variant).collect::>(); let contextual_keywords_tokens = contextual_keywords.iter().map(|it| format_ident!("{it}")); let contextual_keywords_str_match_arm = grammar.contextual_keywords.iter().map(|kw| { match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw) { Some((_, ed)) => quote! { #kw if edition < #ed }, None => quote! { #kw }, } }); let contextual_keywords_variants_match_arm = grammar .contextual_keywords .iter() .map(|kw_s| { let kw = fmt_kw_as_variant(kw_s); match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw_s) { Some((_, ed)) => quote! { #kw if edition < #ed }, None => quote! { #kw }, } }) .collect::>(); let non_strict_keyword_variants = contextual_keywords_variants .iter() .chain(edition_dependent_keywords_variants.iter()) .sorted() .dedup() .collect::>(); let literals = grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::>(); let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::>(); let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::>(); let ast = quote! { #![allow(bad_style, missing_docs, unreachable_pub)] use crate::Edition; /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`. #[derive(Debug)] #[repr(u16)] pub enum SyntaxKind { // Technical SyntaxKinds: they appear temporally during parsing, // but never end up in the final tree #[doc(hidden)] TOMBSTONE, #[doc(hidden)] EOF, #(#punctuation,)* #(#strict_keywords_variants,)* #(#non_strict_keyword_variants,)* #(#literals,)* #(#tokens,)* #(#nodes,)* // Technical kind so that we can cast from u16 safely #[doc(hidden)] __LAST, } use self::SyntaxKind::*; impl SyntaxKind { #[allow(unreachable_patterns)] pub const fn text(self) -> &'static str { match self { TOMBSTONE | EOF | __LAST #( | #literals )* #( | #nodes )* #( | #tokens )* => panic!("no text for these `SyntaxKind`s"), #( #punctuation => #punctuation_texts ,)* #( #strict_keywords_variants => #strict_keywords ,)* #( #contextual_keywords_variants => #contextual_keywords ,)* #( #edition_dependent_keywords_variants => #edition_dependent_keywords ,)* } } /// Checks whether this syntax kind is a strict keyword for the given edition. /// Strict keywords are identifiers that are always considered keywords. pub fn is_strict_keyword(self, edition: Edition) -> bool { matches!(self, #(#strict_keywords_variants)|*) || match self { #(#edition_dependent_keywords_variants_match_arm => true,)* _ => false, } } /// Checks whether this syntax kind is a weak keyword for the given edition. /// Weak keywords are identifiers that are considered keywords only in certain contexts. pub fn is_contextual_keyword(self, edition: Edition) -> bool { match self { #(#contextual_keywords_variants_match_arm => true,)* _ => false, } } /// Checks whether this syntax kind is a strict or weak keyword for the given edition. pub fn is_keyword(self, edition: Edition) -> bool { matches!(self, #(#strict_keywords_variants)|*) || match self { #(#edition_dependent_keywords_variants_match_arm => true,)* #(#contextual_keywords_variants_match_arm => true,)* _ => false, } } pub fn is_punct(self) -> bool { matches!(self, #(#punctuation)|*) } pub fn is_literal(self) -> bool { matches!(self, #(#literals)|*) } pub fn from_keyword(ident: &str, edition: Edition) -> Option { let kw = match ident { #(#strict_keywords => #strict_keywords_variants,)* #(#edition_dependent_keywords_str_match_arm => #edition_dependent_keywords_variants,)* _ => return None, }; Some(kw) } pub fn from_contextual_keyword(ident: &str, edition: Edition) -> Option { let kw = match ident { #(#contextual_keywords_str_match_arm => #contextual_keywords_variants,)* _ => return None, }; Some(kw) } pub fn from_char(c: char) -> Option { let tok = match c { #(#single_byte_tokens_values => #single_byte_tokens,)* _ => return None, }; Some(tok) } } #[macro_export] macro_rules! T_ { #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)* #([#strict_keywords_tokens] => { $crate::SyntaxKind::#strict_keywords_variants };)* #([#contextual_keywords_tokens] => { $crate::SyntaxKind::#contextual_keywords_variants };)* #([#edition_dependent_keywords_tokens] => { $crate::SyntaxKind::#edition_dependent_keywords_variants };)* [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT }; [int_number] => { $crate::SyntaxKind::INT_NUMBER }; [ident] => { $crate::SyntaxKind::IDENT }; [string] => { $crate::SyntaxKind::STRING }; [shebang] => { $crate::SyntaxKind::SHEBANG }; [frontmatter] => { $crate::SyntaxKind::FRONTMATTER }; } impl ::core::marker::Copy for SyntaxKind {} impl ::core::clone::Clone for SyntaxKind { #[inline] fn clone(&self) -> Self { *self } } impl ::core::cmp::PartialEq for SyntaxKind { #[inline] fn eq(&self, other: &Self) -> bool { (*self as u16) == (*other as u16) } } impl ::core::cmp::Eq for SyntaxKind {} impl ::core::cmp::PartialOrd for SyntaxKind { #[inline] fn partial_cmp(&self, other: &Self) -> core::option::Option { Some(self.cmp(other)) } } impl ::core::cmp::Ord for SyntaxKind { #[inline] fn cmp(&self, other: &Self) -> core::cmp::Ordering { (*self as u16).cmp(&(*other as u16)) } } impl ::core::hash::Hash for SyntaxKind { fn hash(&self, state: &mut H) { ::core::mem::discriminant(self).hash(state); } } }; add_preamble(crate::flags::CodegenType::Grammar, reformat(ast.to_string())) } fn to_upper_snake_case(s: &str) -> String { let mut buf = String::with_capacity(s.len()); let mut prev = false; for c in s.chars() { if c.is_ascii_uppercase() && prev { buf.push('_') } prev = true; buf.push(c.to_ascii_uppercase()); } buf } fn to_lower_snake_case(s: &str) -> String { let mut buf = String::with_capacity(s.len()); let mut prev = false; for c in s.chars() { if c.is_ascii_uppercase() && prev { buf.push('_') } prev = true; buf.push(c.to_ascii_lowercase()); } buf } fn to_pascal_case(s: &str) -> String { let mut buf = String::with_capacity(s.len()); let mut prev_is_underscore = true; for c in s.chars() { if c == '_' { prev_is_underscore = true; } else if prev_is_underscore { buf.push(c.to_ascii_uppercase()); prev_is_underscore = false; } else { buf.push(c.to_ascii_lowercase()); } } buf } fn pluralize(s: &str) -> String { format!("{s}s") } impl Field { fn is_many(&self) -> bool { matches!(self, Field::Node { cardinality: Cardinality::Many, .. }) } fn token_kind(&self) -> Option { match self { Field::Token(token) => { let token: proc_macro2::TokenStream = token.parse().unwrap(); Some(quote! { T![#token] }) } _ => None, } } fn method_name(&self) -> String { match self { Field::Token(name) => { let name = match name.as_str() { ";" => "semicolon", "->" => "thin_arrow", "'{'" => "l_curly", "'}'" => "r_curly", "'('" => "l_paren", "')'" => "r_paren", "'['" => "l_brack", "']'" => "r_brack", "<" => "l_angle", ">" => "r_angle", "=" => "eq", "!" => "excl", "*" => "star", "&" => "amp", "-" => "minus", "_" => "underscore", "." => "dot", ".." => "dotdot", "..." => "dotdotdot", "..=" => "dotdoteq", "=>" => "fat_arrow", "@" => "at", ":" => "colon", "::" => "coloncolon", "#" => "pound", "?" => "question_mark", "," => "comma", "|" => "pipe", "~" => "tilde", _ => name, }; format!("{name}_token",) } Field::Node { name, .. } => { if name == "type" { String::from("ty") } else { name.to_owned() } } } } fn ty(&self) -> proc_macro2::Ident { match self { Field::Token(_) => format_ident!("SyntaxToken"), Field::Node { ty, .. } => format_ident!("{}", ty), } } } fn clean_token_name(name: &str) -> String { let cleaned = name.trim_start_matches(['@', '#', '?']); if cleaned.is_empty() { name.to_owned() } else { cleaned.to_owned() } } fn lower(grammar: &Grammar) -> AstSrc { let mut res = AstSrc { tokens: "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident" .split_ascii_whitespace() .map(|it| it.to_owned()) .collect::>(), ..Default::default() }; let nodes = grammar.iter().collect::>(); for &node in &nodes { let name = grammar[node].name.clone(); let rule = &grammar[node].rule; let _g = panic_context::enter(name.clone()); match lower_enum(grammar, rule) { Some(variants) => { let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; res.enums.push(enum_src); } None => { let mut fields = Vec::new(); lower_rule(&mut fields, grammar, None, rule); res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); } } } deduplicate_fields(&mut res); extract_enums(&mut res); extract_struct_traits(&mut res); extract_enum_traits(&mut res); res.nodes.sort_by_key(|it| it.name.clone()); res.enums.sort_by_key(|it| it.name.clone()); res.tokens.sort(); res.nodes.iter_mut().for_each(|it| { it.traits.sort(); it.fields.sort_by_key(|it| match it { Field::Token(name) => (true, name.clone()), Field::Node { name, .. } => (false, name.clone()), }); }); res.enums.iter_mut().for_each(|it| { it.traits.sort(); it.variants.sort(); }); res } fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { let alternatives = match rule { Rule::Alt(it) => it, _ => return None, }; let mut variants = Vec::new(); for alternative in alternatives { match alternative { Rule::Node(it) => variants.push(grammar[*it].name.clone()), Rule::Token(it) if grammar[*it].name == ";" => (), _ => return None, } } Some(variants) } fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, rule: &Rule) { if lower_separated_list(acc, grammar, label, rule) { return; } match rule { Rule::Node(node) => { let ty = grammar[*node].name.clone(); let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty)); let field = Field::Node { name, ty, cardinality: Cardinality::Optional }; acc.push(field); } Rule::Token(token) => { assert!(label.is_none()); let mut name = clean_token_name(&grammar[*token].name); if "[]{}()".contains(&name) { name = format!("'{name}'"); } let field = Field::Token(name); acc.push(field); } Rule::Rep(inner) => { if let Rule::Node(node) = &**inner { let ty = grammar[*node].name.clone(); let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); let field = Field::Node { name, ty, cardinality: Cardinality::Many }; acc.push(field); return; } panic!("unhandled rule: {rule:?}") } Rule::Labeled { label: l, rule } => { assert!(label.is_none()); let manually_implemented = matches!( l.as_str(), "lhs" | "rhs" | "then_branch" | "else_branch" | "start" | "end" | "op" | "index" | "base" | "value" | "trait" | "self_ty" | "iterable" | "condition" | "args" | "body" ); if manually_implemented { return; } lower_rule(acc, grammar, Some(l), rule); } Rule::Seq(rules) | Rule::Alt(rules) => { for rule in rules { lower_rule(acc, grammar, label, rule) } } Rule::Opt(rule) => lower_rule(acc, grammar, label, rule), } } // (T (',' T)* ','?) fn lower_separated_list( acc: &mut Vec, grammar: &Grammar, label: Option<&String>, rule: &Rule, ) -> bool { let rule = match rule { Rule::Seq(it) => it, _ => return false, }; let (nt, repeat, trailing_sep) = match rule.as_slice() { [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => { (Either::Left(node), repeat, Some(trailing_sep)) } [Rule::Node(node), Rule::Rep(repeat)] => (Either::Left(node), repeat, None), [Rule::Token(token), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => { (Either::Right(token), repeat, Some(trailing_sep)) } [Rule::Token(token), Rule::Rep(repeat)] => (Either::Right(token), repeat, None), _ => return false, }; let repeat = match &**repeat { Rule::Seq(it) => it, _ => return false, }; if !matches!( repeat.as_slice(), [comma, nt_] if trailing_sep.is_none_or(|it| comma == &**it) && match (nt, nt_) { (Either::Left(node), Rule::Node(nt_)) => node == nt_, (Either::Right(token), Rule::Token(nt_)) => token == nt_, _ => false, } ) { return false; } match nt { Either::Right(token) => { let name = clean_token_name(&grammar[*token].name); let field = Field::Token(name); acc.push(field); } Either::Left(node) => { let ty = grammar[*node].name.clone(); let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); let field = Field::Node { name, ty, cardinality: Cardinality::Many }; acc.push(field); } } true } fn deduplicate_fields(ast: &mut AstSrc) { for node in &mut ast.nodes { let mut i = 0; 'outer: while i < node.fields.len() { for j in 0..i { let f1 = &node.fields[i]; let f2 = &node.fields[j]; if f1 == f2 { node.fields.remove(i); continue 'outer; } } i += 1; } } } fn extract_enums(ast: &mut AstSrc) { for node in &mut ast.nodes { for enm in &ast.enums { let mut to_remove = Vec::new(); for (i, field) in node.fields.iter().enumerate() { let ty = field.ty().to_string(); if enm.variants.iter().any(|it| it == &ty) { to_remove.push(i); } } if to_remove.len() == enm.variants.len() { node.remove_field(to_remove); let ty = enm.name.clone(); let name = to_lower_snake_case(&ty); node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional }); } } } } const TRAITS: &[(&str, &[&str])] = &[ ("HasAttrs", &["attrs"]), ("HasName", &["name"]), ("HasVisibility", &["visibility"]), ("HasGenericParams", &["generic_param_list", "where_clause"]), ("HasGenericArgs", &["generic_arg_list"]), ("HasTypeBounds", &["type_bound_list", "colon_token"]), ("HasModuleItem", &["items"]), ("HasLoopBody", &["label", "loop_body"]), ("HasArgList", &["arg_list"]), ]; fn extract_struct_traits(ast: &mut AstSrc) { for node in &mut ast.nodes { for (name, methods) in TRAITS { extract_struct_trait(node, name, methods); } } let nodes_with_doc_comments = [ "SourceFile", "Fn", "Struct", "Union", "RecordField", "TupleField", "Enum", "Variant", "Trait", "TraitAlias", "Module", "Static", "Const", "TypeAlias", "Impl", "ExternBlock", "ExternCrate", "MacroCall", "MacroRules", "MacroDef", "Use", ]; for node in &mut ast.nodes { if nodes_with_doc_comments.contains(&&*node.name) { node.traits.push("HasDocComments".into()); } } } fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { let mut to_remove = Vec::new(); for (i, field) in node.fields.iter().enumerate() { let method_name = field.method_name(); if methods.iter().any(|&it| it == method_name) { to_remove.push(i); } } if to_remove.len() == methods.len() { node.traits.push(trait_name.to_owned()); node.remove_field(to_remove); } } fn extract_enum_traits(ast: &mut AstSrc) { for enm in &mut ast.enums { if enm.name == "Stmt" { continue; } let nodes = &ast.nodes; let mut variant_traits = enm .variants .iter() .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) .map(|node| node.traits.iter().cloned().collect::>()); let mut enum_traits = match variant_traits.next() { Some(it) => it, None => continue, }; for traits in variant_traits { enum_traits = enum_traits.intersection(&traits).cloned().collect(); } enm.traits = enum_traits.into_iter().collect(); } } impl AstNodeSrc { fn remove_field(&mut self, to_remove: Vec) { to_remove.into_iter().rev().for_each(|idx| { self.fields.remove(idx); }); } } #[test] fn test() { generate(true); }