move whitespace handling to tree builder

This commit is contained in:
Aleksey Kladov 2019-02-21 12:03:42 +03:00
parent c84561bb62
commit 79ce0fa8d7
4 changed files with 157 additions and 186 deletions

View file

@ -9,11 +9,11 @@ mod grammar;
mod reparsing; mod reparsing;
use crate::{ use crate::{
SyntaxKind, SmolStr, SyntaxError, SyntaxKind, SyntaxError,
parsing::{ parsing::{
builder::GreenBuilder, builder::TreeBuilder,
input::ParserInput, input::ParserInput,
event::EventProcessor, event::process,
parser::Parser, parser::Parser,
}, },
syntax_node::GreenNode, syntax_node::GreenNode,
@ -28,22 +28,24 @@ pub(crate) use self::reparsing::incremental_reparse;
pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
let tokens = tokenize(&text); let tokens = tokenize(&text);
parse_with(GreenBuilder::default(), text, &tokens, grammar::root) let tree_sink = TreeBuilder::new(text, &tokens);
parse_with(tree_sink, text, &tokens, grammar::root)
} }
fn parse_with<S: TreeSink>( fn parse_with<S: TreeSink>(
tree_sink: S, mut tree_sink: S,
text: &str, text: &str,
tokens: &[Token], tokens: &[Token],
f: fn(&mut Parser), f: fn(&mut Parser),
) -> S::Tree { ) -> S::Tree {
let mut events = { let events = {
let input = ParserInput::new(text, &tokens); let input = ParserInput::new(text, &tokens);
let mut p = Parser::new(&input); let mut p = Parser::new(&input);
f(&mut p); f(&mut p);
p.finish() p.finish()
}; };
EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() process(&mut tree_sink, events);
tree_sink.finish()
} }
/// `TreeSink` abstracts details of a particular syntax tree implementation. /// `TreeSink` abstracts details of a particular syntax tree implementation.
@ -51,14 +53,14 @@ trait TreeSink {
type Tree; type Tree;
/// Adds new leaf to the current branch. /// Adds new leaf to the current branch.
fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
/// Start new branch and make it current. /// Start new branch and make it current.
fn start_branch(&mut self, kind: SyntaxKind); fn start_branch(&mut self, kind: SyntaxKind, root: bool);
/// Finish current branch and restore previous /// Finish current branch and restore previous
/// branch as current. /// branch as current.
fn finish_branch(&mut self); fn finish_branch(&mut self, root: bool);
fn error(&mut self, error: ParseError); fn error(&mut self, error: ParseError);

View file

@ -1,40 +1,63 @@
use crate::{ use crate::{
SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange,
parsing::{TreeSink, ParseError}, SyntaxKind::{self, *},
parsing::{TreeSink, ParseError, Token},
syntax_node::{GreenNode, RaTypes}, syntax_node::{GreenNode, RaTypes},
}; };
use rowan::GreenNodeBuilder; use rowan::GreenNodeBuilder;
pub(crate) struct GreenBuilder { pub(crate) struct TreeBuilder<'a> {
text: &'a str,
tokens: &'a [Token],
text_pos: TextUnit, text_pos: TextUnit,
token_pos: usize,
errors: Vec<SyntaxError>, errors: Vec<SyntaxError>,
inner: GreenNodeBuilder<RaTypes>, inner: GreenNodeBuilder<RaTypes>,
} }
impl Default for GreenBuilder { impl<'a> TreeSink for TreeBuilder<'a> {
fn default() -> GreenBuilder {
GreenBuilder {
text_pos: TextUnit::default(),
errors: Vec::new(),
inner: GreenNodeBuilder::new(),
}
}
}
impl TreeSink for GreenBuilder {
type Tree = (GreenNode, Vec<SyntaxError>); type Tree = (GreenNode, Vec<SyntaxError>);
fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) {
self.text_pos += TextUnit::of_str(text.as_str()); self.eat_trivias();
self.inner.leaf(kind, text); let n_tokens = n_tokens as usize;
let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
.iter()
.map(|it| it.len)
.sum::<TextUnit>();
self.do_leaf(kind, len, n_tokens);
} }
fn start_branch(&mut self, kind: SyntaxKind) { fn start_branch(&mut self, kind: SyntaxKind, root: bool) {
self.inner.start_internal(kind) if root {
self.inner.start_internal(kind);
return;
}
let n_trivias =
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
let mut trivia_end =
self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
let n_attached_trivias = {
let leading_trivias = leading_trivias.iter().rev().map(|it| {
let next_end = trivia_end - it.len;
let range = TextRange::from_to(next_end, trivia_end);
trivia_end = next_end;
(it.kind, &self.text[range])
});
n_attached_trivias(kind, leading_trivias)
};
self.eat_n_trivias(n_trivias - n_attached_trivias);
self.inner.start_internal(kind);
self.eat_n_trivias(n_attached_trivias);
} }
fn finish_branch(&mut self) { fn finish_branch(&mut self, root: bool) {
if root {
self.eat_trivias()
}
self.inner.finish_internal(); self.inner.finish_internal();
} }
@ -47,3 +70,67 @@ impl TreeSink for GreenBuilder {
(self.inner.finish(), self.errors) (self.inner.finish(), self.errors)
} }
} }
impl<'a> TreeBuilder<'a> {
pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TreeBuilder<'a> {
TreeBuilder {
text,
tokens,
text_pos: 0.into(),
token_pos: 0,
errors: Vec::new(),
inner: GreenNodeBuilder::new(),
}
}
fn eat_trivias(&mut self) {
while let Some(&token) = self.tokens.get(self.token_pos) {
if !token.kind.is_trivia() {
break;
}
self.do_leaf(token.kind, token.len, 1);
}
}
fn eat_n_trivias(&mut self, n: usize) {
for _ in 0..n {
let token = self.tokens[self.token_pos];
assert!(token.kind.is_trivia());
self.do_leaf(token.kind, token.len, 1);
}
}
fn do_leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
let range = TextRange::offset_len(self.text_pos, len);
let text: SmolStr = self.text[range].into();
self.text_pos += len;
self.token_pos += n_tokens;
self.inner.leaf(kind, text);
}
}
fn n_attached_trivias<'a>(
kind: SyntaxKind,
trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
) -> usize {
match kind {
CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
| MODULE | NAMED_FIELD_DEF => {
let mut res = 0;
for (i, (kind, text)) in trivias.enumerate() {
match kind {
WHITESPACE => {
if text.contains("\n\n") {
break;
}
}
COMMENT => {
res = i + 1;
}
_ => (),
}
}
res
}
_ => 0,
}
}

View file

@ -10,13 +10,8 @@
use std::mem; use std::mem;
use crate::{ use crate::{
SmolStr,
SyntaxKind::{self, *}, SyntaxKind::{self, *},
TextRange, TextUnit, parsing::{ParseError, TreeSink},
parsing::{
ParseError, TreeSink,
lexer::Token,
},
}; };
/// `Parser` produces a flat list of `Event`s. /// `Parser` produces a flat list of `Event`s.
@ -88,31 +83,12 @@ impl Event {
} }
} }
pub(super) struct EventProcessor<'a, S: TreeSink> {
sink: S,
text_pos: TextUnit,
text: &'a str,
token_pos: usize,
tokens: &'a [Token],
events: &'a mut [Event],
}
impl<'a, S: TreeSink> EventProcessor<'a, S> {
pub(super) fn new(
sink: S,
text: &'a str,
tokens: &'a [Token],
events: &'a mut [Event],
) -> EventProcessor<'a, S> {
EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events }
}
/// Generate the syntax tree with the control of events. /// Generate the syntax tree with the control of events.
pub(crate) fn process(mut self) -> S { pub(super) fn process(sink: &mut impl TreeSink, mut events: Vec<Event>) {
let mut forward_parents = Vec::new(); let mut forward_parents = Vec::new();
for i in 0..self.events.len() { for i in 0..events.len() {
match mem::replace(&mut self.events[i], Event::tombstone()) { match mem::replace(&mut events[i], Event::tombstone()) {
Event::Start { kind: TOMBSTONE, .. } => (), Event::Start { kind: TOMBSTONE, .. } => (),
Event::Start { kind, forward_parent } => { Event::Start { kind, forward_parent } => {
@ -127,7 +103,7 @@ impl<'a, S: TreeSink> EventProcessor<'a, S> {
while let Some(fwd) = fp { while let Some(fwd) = fp {
idx += fwd as usize; idx += fwd as usize;
// append `A`'s forward_parent `B` // append `A`'s forward_parent `B`
fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start { kind, forward_parent } => { Event::Start { kind, forward_parent } => {
forward_parents.push(kind); forward_parents.push(kind);
forward_parent forward_parent
@ -137,111 +113,16 @@ impl<'a, S: TreeSink> EventProcessor<'a, S> {
// append `B`'s forward_parent `C` in the next stage. // append `B`'s forward_parent `C` in the next stage.
} }
for kind in forward_parents.drain(..).rev() { for (j, kind) in forward_parents.drain(..).rev().enumerate() {
self.start(kind); let is_root_node = i == 0 && j == 0;
sink.start_branch(kind, is_root_node);
} }
} }
Event::Finish => { Event::Finish => sink.finish_branch(i == events.len() - 1),
let is_last = i == self.events.len() - 1;
self.finish(is_last);
}
Event::Token { kind, n_raw_tokens } => { Event::Token { kind, n_raw_tokens } => {
self.eat_trivias(); sink.leaf(kind, n_raw_tokens);
let n_raw_tokens = n_raw_tokens as usize;
let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens]
.iter()
.map(|it| it.len)
.sum::<TextUnit>();
self.leaf(kind, len, n_raw_tokens);
} }
Event::Error { msg } => self.sink.error(msg), Event::Error { msg } => sink.error(msg),
} }
} }
self.sink
}
/// Add the node into syntax tree but discard the comments/whitespaces.
fn start(&mut self, kind: SyntaxKind) {
if kind == SOURCE_FILE {
self.sink.start_branch(kind);
return;
}
let n_trivias =
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
let mut trivia_end =
self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
let n_attached_trivias = {
let leading_trivias = leading_trivias.iter().rev().map(|it| {
let next_end = trivia_end - it.len;
let range = TextRange::from_to(next_end, trivia_end);
trivia_end = next_end;
(it.kind, &self.text[range])
});
n_attached_trivias(kind, leading_trivias)
};
self.eat_n_trivias(n_trivias - n_attached_trivias);
self.sink.start_branch(kind);
self.eat_n_trivias(n_attached_trivias);
}
fn finish(&mut self, is_last: bool) {
if is_last {
self.eat_trivias()
}
self.sink.finish_branch();
}
fn eat_trivias(&mut self) {
while let Some(&token) = self.tokens.get(self.token_pos) {
if !token.kind.is_trivia() {
break;
}
self.leaf(token.kind, token.len, 1);
}
}
fn eat_n_trivias(&mut self, n: usize) {
for _ in 0..n {
let token = self.tokens[self.token_pos];
assert!(token.kind.is_trivia());
self.leaf(token.kind, token.len, 1);
}
}
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
let range = TextRange::offset_len(self.text_pos, len);
let text: SmolStr = self.text[range].into();
self.text_pos += len;
self.token_pos += n_tokens;
self.sink.leaf(kind, text);
}
}
fn n_attached_trivias<'a>(
kind: SyntaxKind,
trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
) -> usize {
match kind {
CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
| MODULE | NAMED_FIELD_DEF => {
let mut res = 0;
for (i, (kind, text)) in trivias.enumerate() {
match kind {
WHITESPACE => {
if text.contains("\n\n") {
break;
}
}
COMMENT => {
res = i + 1;
}
_ => (),
}
}
res
}
_ => 0,
}
} }

View file

@ -5,7 +5,7 @@ use crate::{
syntax_error::SyntaxError, syntax_error::SyntaxError,
parsing::{ parsing::{
grammar, parse_with, grammar, parse_with,
builder::GreenBuilder, builder::TreeBuilder,
parser::Parser, parser::Parser,
lexer::{tokenize, Token}, lexer::{tokenize, Token},
} }
@ -61,7 +61,8 @@ fn reparse_block<'node>(
if !is_balanced(&tokens) { if !is_balanced(&tokens) {
return None; return None;
} }
let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); let tree_sink = TreeBuilder::new(&text, &tokens);
let (green, new_errors) = parse_with(tree_sink, &text, &tokens, reparser);
Some((node, green, new_errors)) Some((node, green, new_errors))
} }