mirror of
https://github.com/rust-lang/rust-analyzer.git
synced 2025-09-27 20:42:04 +00:00
Switch to ungrammar from ast_src
The primary advantage of ungrammar is that it (eventually) allows one to describe concrete syntax tree structure -- with alternatives and specific sequence of tokens & nodes. That should be re-usable for: * generate `make` calls * Rust reference * Hypothetical parser's evented API We loose doc comments for the time being unfortunately. I don't think we should add support for doc comments to ungrammar -- they'll make grammar file hard to read. We might supply docs as out-of band info, or maybe just via a reference, but we'll think about that once things are no longer in flux
This commit is contained in:
parent
525ae706b3
commit
3d28292157
9 changed files with 2393 additions and 4994 deletions
|
@ -3,19 +3,27 @@
|
|||
//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
|
||||
//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
|
||||
|
||||
use std::{collections::HashSet, fmt::Write};
|
||||
use std::{
|
||||
collections::{BTreeSet, HashSet},
|
||||
fmt::Write,
|
||||
};
|
||||
|
||||
use proc_macro2::{Punct, Spacing};
|
||||
use quote::{format_ident, quote};
|
||||
use ungrammar::{Grammar, Rule};
|
||||
|
||||
use crate::{
|
||||
ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
|
||||
ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
|
||||
codegen::{self, update, Mode},
|
||||
project_root, Result,
|
||||
};
|
||||
|
||||
pub fn generate_syntax(mode: Mode) -> Result<()> {
|
||||
let ast = rust_ast();
|
||||
let grammar = include_str!("rust.ungram")
|
||||
.parse::<Grammar>()
|
||||
.unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
|
||||
let ast = lower(&grammar);
|
||||
|
||||
let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
|
||||
let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
|
||||
update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
|
||||
|
@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
|
|||
.map(|kind| to_pascal_case(kind))
|
||||
.filter(|name| !defined_nodes.iter().any(|&it| it == name))
|
||||
{
|
||||
eprintln!("Warning: node {} not defined in ast source", node);
|
||||
drop(node)
|
||||
// TODO: restore this
|
||||
// eprintln!("Warning: node {} not defined in ast source", node);
|
||||
}
|
||||
|
||||
let ast = quote! {
|
||||
|
@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String {
|
|||
buf
|
||||
}
|
||||
|
||||
fn pluralize(s: &str) -> String {
|
||||
format!("{}s", s)
|
||||
}
|
||||
|
||||
impl Field {
|
||||
fn is_many(&self) -> bool {
|
||||
matches!(self, Field::Node { src: FieldSrc::Many(_), .. })
|
||||
|
@ -449,6 +463,7 @@ impl Field {
|
|||
"." => "dot",
|
||||
".." => "dotdot",
|
||||
"..." => "dotdotdot",
|
||||
"..=" => "dotdoteq",
|
||||
"=>" => "fat_arrow",
|
||||
"@" => "at",
|
||||
":" => "colon",
|
||||
|
@ -475,3 +490,204 @@ impl Field {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lower(grammar: &Grammar) -> AstSrc {
|
||||
let mut res = AstSrc::default();
|
||||
res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
|
||||
|
||||
let nodes = grammar
|
||||
.iter()
|
||||
.filter(|&node| match grammar[node].rule {
|
||||
Rule::Node(it) if it == node => false,
|
||||
_ => true,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for &node in &nodes {
|
||||
let name = grammar[node].name.clone();
|
||||
let rule = &grammar[node].rule;
|
||||
match lower_enum(grammar, rule) {
|
||||
Some(variants) => {
|
||||
let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
|
||||
res.enums.push(enum_src);
|
||||
}
|
||||
None => {
|
||||
let mut fields = Vec::new();
|
||||
lower_rule(&mut fields, grammar, rule);
|
||||
res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deduplicate_fields(&mut res);
|
||||
extract_enums(&mut res);
|
||||
extract_struct_traits(&mut res);
|
||||
extract_enum_traits(&mut res);
|
||||
res
|
||||
}
|
||||
|
||||
fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
|
||||
let alternatives = match rule {
|
||||
Rule::Alt(it) => it,
|
||||
_ => return None,
|
||||
};
|
||||
let mut variants = Vec::new();
|
||||
for alternative in alternatives {
|
||||
match alternative {
|
||||
Rule::Node(it) => variants.push(grammar[*it].name.clone()),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(variants)
|
||||
}
|
||||
|
||||
fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) {
|
||||
match rule {
|
||||
Rule::Node(node) => {
|
||||
let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand };
|
||||
acc.push(field);
|
||||
}
|
||||
Rule::Token(token) => {
|
||||
let mut name = grammar[*token].name.clone();
|
||||
if name != "int_number" && name != "string" {
|
||||
if "[]{}()".contains(&name) {
|
||||
name = format!("'{}'", name);
|
||||
}
|
||||
let field = Field::Token(name);
|
||||
acc.push(field);
|
||||
}
|
||||
}
|
||||
Rule::Rep(inner) => {
|
||||
if let Rule::Node(node) = &**inner {
|
||||
let name = grammar[*node].name.clone();
|
||||
let label = pluralize(&to_lower_snake_case(&name));
|
||||
let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) };
|
||||
acc.push(field);
|
||||
return;
|
||||
}
|
||||
todo!("{:?}", rule)
|
||||
}
|
||||
Rule::Labeled { label, rule } => {
|
||||
let node = match &**rule {
|
||||
Rule::Rep(inner) | Rule::Opt(inner) => match &**inner {
|
||||
Rule::Node(node) => node,
|
||||
_ => todo!("{:?}", rule),
|
||||
},
|
||||
Rule::Node(node) => node,
|
||||
_ => todo!("{:?}", rule),
|
||||
};
|
||||
let field = Field::Node {
|
||||
name: label.clone(),
|
||||
src: match &**rule {
|
||||
Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()),
|
||||
_ => FieldSrc::Optional(grammar[*node].name.clone()),
|
||||
},
|
||||
};
|
||||
acc.push(field);
|
||||
}
|
||||
Rule::Seq(rules) | Rule::Alt(rules) => {
|
||||
for rule in rules {
|
||||
lower_rule(acc, grammar, rule)
|
||||
}
|
||||
}
|
||||
Rule::Opt(rule) => lower_rule(acc, grammar, rule),
|
||||
}
|
||||
}
|
||||
|
||||
fn deduplicate_fields(ast: &mut AstSrc) {
|
||||
eprintln!();
|
||||
for node in &mut ast.nodes {
|
||||
let mut i = 0;
|
||||
'outer: while i < node.fields.len() {
|
||||
for j in 0..i {
|
||||
let f1 = &node.fields[i];
|
||||
let f2 = &node.fields[j];
|
||||
if f1 == f2 {
|
||||
node.fields.remove(i);
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_enums(ast: &mut AstSrc) {
|
||||
for node in &mut ast.nodes {
|
||||
for enm in &ast.enums {
|
||||
let mut to_remove = Vec::new();
|
||||
for (i, field) in node.fields.iter().enumerate() {
|
||||
let ty = field.ty().to_string();
|
||||
if enm.variants.iter().any(|it| it == &ty) {
|
||||
to_remove.push(i);
|
||||
}
|
||||
}
|
||||
if to_remove.len() == enm.variants.len() {
|
||||
node.remove_field(to_remove);
|
||||
node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_struct_traits(ast: &mut AstSrc) {
|
||||
let traits: &[(&str, &[&str])] = &[
|
||||
("AttrsOwner", &["attrs"]),
|
||||
("NameOwner", &["name"]),
|
||||
("VisibilityOwner", &["visibility"]),
|
||||
("TypeParamsOwner", &["type_param_list", "where_clause"]),
|
||||
("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
|
||||
("ModuleItemOwner", &["items"]),
|
||||
("TypeAscriptionOwner", &["ascribed_type"]),
|
||||
("LoopBodyOwner", &["label", "loop_body"]),
|
||||
("ArgListOwner", &["arg_list"]),
|
||||
];
|
||||
|
||||
for node in &mut ast.nodes {
|
||||
for (name, methods) in traits {
|
||||
extract_struct_trait(node, name, methods);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
|
||||
let mut to_remove = Vec::new();
|
||||
for (i, field) in node.fields.iter().enumerate() {
|
||||
let method_name = field.method_name().to_string();
|
||||
if methods.iter().any(|&it| it == &method_name) {
|
||||
to_remove.push(i);
|
||||
}
|
||||
}
|
||||
if to_remove.len() == methods.len() {
|
||||
node.traits.push(trait_name.to_string());
|
||||
node.remove_field(to_remove);
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_enum_traits(ast: &mut AstSrc) {
|
||||
for enm in &mut ast.enums {
|
||||
let nodes = &ast.nodes;
|
||||
let mut variant_traits = enm
|
||||
.variants
|
||||
.iter()
|
||||
.map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
|
||||
.map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
|
||||
|
||||
let mut enum_traits = match variant_traits.next() {
|
||||
Some(it) => it,
|
||||
None => continue,
|
||||
};
|
||||
for traits in variant_traits {
|
||||
enum_traits = enum_traits.intersection(&traits).cloned().collect();
|
||||
}
|
||||
enm.traits = enum_traits.into_iter().collect();
|
||||
}
|
||||
}
|
||||
|
||||
impl AstNodeSrc {
|
||||
fn remove_field(&mut self, to_remove: Vec<usize>) {
|
||||
to_remove.into_iter().rev().for_each(|idx| {
|
||||
self.fields.remove(idx);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue