organizize

This commit is contained in:
Aleksey Kladov 2018-08-10 22:33:29 +03:00
parent 26262aaf05
commit 7c67612b8a
376 changed files with 27 additions and 145 deletions

View file

@ -0,0 +1,154 @@
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
//!
//! The `Sink` trait is the bridge between the parser and the
//! tree builder: the parser produces a stream of events like
//! `start node`, `finish node`, and `FileBuilder` converts
//! this stream to a real tree.
use std::mem;
use {
lexer::Token,
parser_impl::Sink,
SyntaxKind::{self, TOMBSTONE},
};
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
#[derive(Debug)]
pub(crate) enum Event {
/// This event signifies the start of the node.
/// It should be either abandoned (in which case the
/// `kind` is `TOMBSTONE`, and the event is ignored),
/// or completed via a `Finish` event.
///
/// All tokens between a `Start` and a `Finish` would
/// become the children of the respective node.
///
/// For left-recursive syntactic constructs, the parser produces
/// a child node before it sees a parent. `forward_parent`
/// exists to allow to tweak parent-child relationships.
///
/// Consider this path
///
/// foo::bar
///
/// The events for it would look like this:
///
///
/// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
/// | /\
/// | |
/// +------forward-parent------+
///
/// And the tree would look like this
///
/// +--PATH---------+
/// | | |
/// | | |
/// | '::' 'bar'
/// |
/// PATH
/// |
/// 'foo'
///
/// See also `CompletedMarker::precede`.
Start {
kind: SyntaxKind,
forward_parent: Option<u32>,
},
/// Complete the previous `Start` event
Finish,
/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: SyntaxKind,
n_raw_tokens: u8,
},
Error {
msg: String,
},
}
pub(super) fn process<'a, S: Sink<'a>>(builder: &mut S, tokens: &[Token], mut events: Vec<Event>) {
fn tombstone() -> Event {
Event::Start { kind: TOMBSTONE, forward_parent: None }
}
let eat_ws = |idx: &mut usize, builder: &mut S| {
while let Some(token) = tokens.get(*idx) {
if !token.kind.is_trivia() {
break;
}
builder.leaf(token.kind, token.len);
*idx += 1
}
};
let events: &mut [Event] = &mut events;
let mut depth = 0;
let mut forward_parents = Vec::new();
let mut next_tok_idx = 0;
for i in 0..events.len() {
match mem::replace(&mut events[i], tombstone()) {
Event::Start {
kind: TOMBSTONE, ..
} => (),
Event::Start { kind, forward_parent } => {
forward_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
fp = match mem::replace(&mut events[idx], tombstone()) {
Event::Start {
kind,
forward_parent,
} => {
forward_parents.push(kind);
forward_parent
},
_ => unreachable!(),
};
}
for kind in forward_parents.drain(..).rev() {
if depth > 0 {
eat_ws(&mut next_tok_idx, builder);
}
depth += 1;
builder.start_internal(kind);
}
}
Event::Finish => {
depth -= 1;
if depth == 0 {
eat_ws(&mut next_tok_idx, builder);
}
builder.finish_internal();
}
Event::Token {
kind,
mut n_raw_tokens,
} => {
eat_ws(&mut next_tok_idx, builder);
let mut len = 0.into();
for _ in 0..n_raw_tokens {
len += tokens[next_tok_idx].len;
next_tok_idx += 1;
}
builder.leaf(kind, len);
}
Event::Error { msg } => builder.error(msg),
}
}
}

View file

@ -0,0 +1,86 @@
use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit};
use std::ops::{Add, AddAssign};
pub(crate) struct ParserInput<'t> {
text: &'t str,
start_offsets: Vec<TextUnit>,
tokens: Vec<Token>, // non-whitespace tokens
}
impl<'t> ParserInput<'t> {
pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
let mut tokens = Vec::new();
let mut start_offsets = Vec::new();
let mut len = 0.into();
for &token in raw_tokens.iter() {
if !token.kind.is_trivia() {
tokens.push(token);
start_offsets.push(len);
}
len += token.len;
}
ParserInput {
text,
start_offsets,
tokens,
}
}
pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return EOF;
}
self.tokens[idx].kind
}
pub fn len(&self, pos: InputPosition) -> TextUnit {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return 0.into();
}
self.tokens[idx].len
}
pub fn start(&self, pos: InputPosition) -> TextUnit {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return 0.into();
}
self.start_offsets[idx]
}
pub fn text(&self, pos: InputPosition) -> &'t str {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return "";
}
let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
&self.text[range]
}
}
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub(crate) struct InputPosition(u32);
impl InputPosition {
pub fn new() -> Self {
InputPosition(0)
}
}
impl Add<u32> for InputPosition {
type Output = InputPosition;
fn add(self, rhs: u32) -> InputPosition {
InputPosition(self.0 + rhs)
}
}
impl AddAssign<u32> for InputPosition {
fn add_assign(&mut self, rhs: u32) {
self.0 += rhs
}
}

View file

@ -0,0 +1,170 @@
mod event;
mod input;
use {
grammar,
lexer::Token,
parser_api::Parser,
parser_impl::{
event::{process, Event},
input::{InputPosition, ParserInput},
},
TextUnit,
};
use SyntaxKind::{self, EOF, TOMBSTONE};
pub(crate) trait Sink<'a> {
type Tree;
fn new(text: &'a str) -> Self;
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit);
fn start_internal(&mut self, kind: SyntaxKind);
fn finish_internal(&mut self);
fn error(&mut self, err: String);
fn finish(self) -> Self::Tree;
}
/// Parse a sequence of tokens into the representative node tree
pub(crate) fn parse<'a, S: Sink<'a>>(text: &'a str, tokens: &[Token]) -> S::Tree {
let events = {
let input = input::ParserInput::new(text, tokens);
let parser_impl = ParserImpl::new(&input);
let mut parser_api = Parser(parser_impl);
grammar::file(&mut parser_api);
parser_api.0.into_events()
};
let mut sink = S::new(text);
process(&mut sink, tokens, events);
sink.finish()
}
/// Implementation details of `Parser`, extracted
/// to a separate struct in order not to pollute
/// the public API of the `Parser`.
pub(crate) struct ParserImpl<'t> {
inp: &'t ParserInput<'t>,
pos: InputPosition,
events: Vec<Event>,
}
impl<'t> ParserImpl<'t> {
pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
ParserImpl {
inp,
pos: InputPosition::new(),
events: Vec::new(),
}
}
pub(crate) fn into_events(self) -> Vec<Event> {
assert_eq!(self.nth(0), EOF);
self.events
}
pub(super) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool {
self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2
&& self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos)
}
pub(super) fn at_compound3(&self, c1: SyntaxKind, c2: SyntaxKind, c3: SyntaxKind) -> bool {
self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 && self.inp.kind(self.pos + 2) == c3
&& self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos)
&& self.inp.start(self.pos + 2) == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1)
}
pub(super) fn nth(&self, n: u32) -> SyntaxKind {
self.inp.kind(self.pos + n)
}
pub(super) fn at_kw(&self, t: &str) -> bool {
self.inp.text(self.pos) == t
}
pub(super) fn start(&mut self) -> u32 {
let pos = self.events.len() as u32;
self.event(Event::Start {
kind: TOMBSTONE,
forward_parent: None,
});
pos
}
pub(super) fn bump(&mut self) {
let kind = self.nth(0);
if kind == EOF {
return;
}
self.do_bump(kind, 1);
}
pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
if self.nth(0) == EOF {
// TODO: panic!?
return;
}
self.do_bump(kind, 1);
}
pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
self.do_bump(kind, n);
}
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
self.pos += u32::from(n_raw_tokens);
self.event(Event::Token {
kind,
n_raw_tokens,
});
}
pub(super) fn error(&mut self, msg: String) {
self.event(Event::Error { msg })
}
pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
match self.events[pos as usize] {
Event::Start {
kind: ref mut slot, ..
} => {
*slot = kind;
}
_ => unreachable!(),
}
self.event(Event::Finish);
}
pub(super) fn abandon(&mut self, pos: u32) {
let idx = pos as usize;
if idx == self.events.len() - 1 {
match self.events.pop() {
Some(Event::Start {
kind: TOMBSTONE,
forward_parent: None,
}) => (),
_ => unreachable!(),
}
}
}
pub(super) fn precede(&mut self, pos: u32) -> u32 {
let new_pos = self.start();
match self.events[pos as usize] {
Event::Start {
ref mut forward_parent,
..
} => {
*forward_parent = Some(new_pos - pos);
}
_ => unreachable!(),
}
new_pos
}
fn event(&mut self, event: Event) {
self.events.push(event)
}
}