route parsing via TokenSource trait

This commit is contained in:
Aleksey Kladov 2019-02-20 21:50:07 +03:00
parent 3517c175ac
commit 0c81b9deee
3 changed files with 59 additions and 64 deletions

View file

@ -17,7 +17,9 @@ use crate::{
/// tree, but rather a flat stream of events of the form /// tree, but rather a flat stream of events of the form
/// "start expression, consume number literal, /// "start expression, consume number literal,
/// finish expression". See `Event` docs for more. /// finish expression". See `Event` docs for more.
pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); pub(crate) struct Parser<'t>(
pub(super) ParserImpl<crate::parsing::parser_impl::input::ParserInput<'t>>,
);
impl<'t> Parser<'t> { impl<'t> Parser<'t> {
/// Returns the kind of the current token. /// Returns the kind of the current token.

View file

@ -1,5 +1,5 @@
mod event; mod event;
mod input; pub(crate) mod input;
use std::cell::Cell; use std::cell::Cell;
@ -11,7 +11,7 @@ use crate::{
parser_api::Parser, parser_api::Parser,
parser_impl::{ parser_impl::{
event::{Event, EventProcessor}, event::{Event, EventProcessor},
input::{InputPosition, ParserInput}, input::InputPosition,
}, },
}, },
}; };
@ -39,6 +39,12 @@ pub(super) trait TreeSink {
fn finish(self) -> Self::Tree; fn finish(self) -> Self::Tree;
} }
pub(super) trait TokenSource {
fn token_kind(&self, pos: InputPosition) -> SyntaxKind;
fn is_token_joint_to_next(&self, pos: InputPosition) -> bool;
fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool;
}
/// Parse a sequence of tokens into the representative node tree /// Parse a sequence of tokens into the representative node tree
pub(super) fn parse_with<S: TreeSink>( pub(super) fn parse_with<S: TreeSink>(
sink: S, sink: S,
@ -48,7 +54,7 @@ pub(super) fn parse_with<S: TreeSink>(
) -> S::Tree { ) -> S::Tree {
let mut events = { let mut events = {
let input = input::ParserInput::new(text, tokens); let input = input::ParserInput::new(text, tokens);
let parser_impl = ParserImpl::new(&input); let parser_impl = ParserImpl::new(input);
let mut parser_api = Parser(parser_impl); let mut parser_api = Parser(parser_impl);
parser(&mut parser_api); parser(&mut parser_api);
parser_api.0.into_events() parser_api.0.into_events()
@ -59,17 +65,17 @@ pub(super) fn parse_with<S: TreeSink>(
/// Implementation details of `Parser`, extracted /// Implementation details of `Parser`, extracted
/// to a separate struct in order not to pollute /// to a separate struct in order not to pollute
/// the public API of the `Parser`. /// the public API of the `Parser`.
pub(super) struct ParserImpl<'t> { pub(super) struct ParserImpl<S> {
parser_input: &'t ParserInput<'t>, token_source: S,
pos: InputPosition, pos: InputPosition,
events: Vec<Event>, events: Vec<Event>,
steps: Cell<u32>, steps: Cell<u32>,
} }
impl<'t> ParserImpl<'t> { impl<S: TokenSource> ParserImpl<S> {
fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { fn new(token_source: S) -> ParserImpl<S> {
ParserImpl { ParserImpl {
parser_input: inp, token_source,
pos: InputPosition::new(), pos: InputPosition::new(),
events: Vec::new(), events: Vec::new(),
steps: Cell::new(0), steps: Cell::new(0),
@ -82,11 +88,9 @@ impl<'t> ParserImpl<'t> {
} }
pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
let c1 = self.parser_input.kind(self.pos); let c1 = self.token_source.token_kind(self.pos);
let c2 = self.parser_input.kind(self.pos + 1); let c2 = self.token_source.token_kind(self.pos + 1);
if self.parser_input.token_start_at(self.pos + 1) if self.token_source.is_token_joint_to_next(self.pos) {
== self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos)
{
Some((c1, c2)) Some((c1, c2))
} else { } else {
None None
@ -94,14 +98,11 @@ impl<'t> ParserImpl<'t> {
} }
pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
let c1 = self.parser_input.kind(self.pos); let c1 = self.token_source.token_kind(self.pos);
let c2 = self.parser_input.kind(self.pos + 1); let c2 = self.token_source.token_kind(self.pos + 1);
let c3 = self.parser_input.kind(self.pos + 2); let c3 = self.token_source.token_kind(self.pos + 2);
if self.parser_input.token_start_at(self.pos + 1) if self.token_source.is_token_joint_to_next(self.pos)
== self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) && self.token_source.is_token_joint_to_next(self.pos + 1)
&& self.parser_input.token_start_at(self.pos + 2)
== self.parser_input.token_start_at(self.pos + 1)
+ self.parser_input.token_len(self.pos + 1)
{ {
Some((c1, c2, c3)) Some((c1, c2, c3))
} else { } else {
@ -114,12 +115,11 @@ impl<'t> ParserImpl<'t> {
let steps = self.steps.get(); let steps = self.steps.get();
assert!(steps <= 10_000_000, "the parser seems stuck"); assert!(steps <= 10_000_000, "the parser seems stuck");
self.steps.set(steps + 1); self.steps.set(steps + 1);
self.token_source.token_kind(self.pos + n)
self.parser_input.kind(self.pos + n)
} }
pub(super) fn at_kw(&self, t: &str) -> bool { pub(super) fn at_kw(&self, kw: &str) -> bool {
self.parser_input.token_text(self.pos) == t self.token_source.is_keyword(self.pos, kw)
} }
/// Start parsing right behind the last event. /// Start parsing right behind the last event.

View file

@ -1,10 +1,40 @@
use crate::{ use crate::{
SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit,
parsing::lexer::Token, parsing::{
parser_impl::TokenSource,
lexer::Token,
},
}; };
use std::ops::{Add, AddAssign}; use std::ops::{Add, AddAssign};
impl<'t> TokenSource for ParserInput<'t> {
fn token_kind(&self, pos: InputPosition) -> SyntaxKind {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return EOF;
}
self.tokens[idx].kind
}
fn is_token_joint_to_next(&self, pos: InputPosition) -> bool {
let idx_curr = pos.0 as usize;
let idx_next = pos.0 as usize;
if !(idx_next < self.tokens.len()) {
return true;
}
self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next]
}
fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return false;
}
let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
self.text[range] == *kw
}
}
pub(crate) struct ParserInput<'t> { pub(crate) struct ParserInput<'t> {
text: &'t str, text: &'t str,
/// start position of each token(expect whitespace and comment) /// start position of each token(expect whitespace and comment)
@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> {
ParserInput { text, start_offsets, tokens } ParserInput { text, start_offsets, tokens }
} }
/// Get the syntax kind of token at given input position.
pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return EOF;
}
self.tokens[idx].kind
}
/// Get the length of a token at given input position.
pub fn token_len(&self, pos: InputPosition) -> TextUnit {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return 0.into();
}
self.tokens[idx].len
}
/// Get the start position of a taken at given input position.
pub fn token_start_at(&self, pos: InputPosition) -> TextUnit {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return 0.into();
}
self.start_offsets[idx]
}
/// Get the raw text of a token at given input position.
pub fn token_text(&self, pos: InputPosition) -> &'t str {
let idx = pos.0 as usize;
if !(idx < self.tokens.len()) {
return "";
}
let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
&self.text[range]
}
} }
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]