slint/sixtyfps_compiler/parser.rs
Simon Hausmann e914715d88 Rename Diagnostics to FileDiagnostics
As this structure holds the diagnostics just for one file.
2020-07-16 18:25:42 +02:00

616 lines
20 KiB
Rust

/*! The sixtyfps language parser
This module is responsible to parse a string onto a syntax tree.
The core of it is the `DefaultParser` class that holds a list of token and
generates a `rowan::GreenNode`
This module has different sub modules with the actual parser functions
*/
use crate::diagnostics::FileDiagnostics;
pub use rowan::SmolStr;
use std::convert::TryFrom;
mod document;
mod expressions;
mod statements;
/// Each parser submodule would simply do `use super::prelude::*` to import typically used items
mod prelude {
#[cfg(test)]
pub use super::{syntax_nodes, SyntaxNode, SyntaxNodeVerify};
pub use super::{DefaultParser, Parser, SyntaxKind};
#[cfg(test)]
pub use parser_test_macro::parser_test;
}
#[cfg(test)]
pub trait SyntaxNodeVerify {
/// The SyntaxKind corresponding to this type
const KIND: SyntaxKind;
/// Asserts that the node is of the given SyntaxKind and that it has the expected children
/// Panic if this is not the case
fn verify(node: SyntaxNode) {
assert_eq!(node.kind(), Self::KIND)
}
}
/// Check that a node has the assumed children
#[cfg(test)]
macro_rules! verify_node {
// nothing to verify
($node:ident, _) => {};
// Some combination of children
($node:ident, [ $($t1:tt $($t2:ident)?),* ]) => {
// Check that every children is there
$(verify_node!(@check_has_children $node, $t1 $($t2)* );)*
// check that there are not too many nodes
for c in $node.children() {
assert!(
false $(|| c.kind() == verify_node!(@extract_kind $t1 $($t2)*))*,
format!("Node is none of [{}]\n{:?}", stringify!($($t1 $($t2)*),*) ,c));
}
// recurse
$(
for _c in $node.children().filter(|n| n.kind() == verify_node!(@extract_kind $t1 $($t2)*)) {
<verify_node!(@extract_type $t1 $($t2)*)>::verify(_c)
}
)*
};
// Any number of this kind.
(@check_has_children $node:ident, * $kind:ident) => {};
// 1 or 0
(@check_has_children $node:ident, ? $kind:ident) => {
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
assert!(count <= 1, "Expecting one or zero sub-node of type {}, found {}\n{:?}", stringify!($kind), count, $node);
};
// Exactly one
(@check_has_children $node:ident, $kind:ident) => {
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
assert_eq!(count, 1, "Expecting exactly one sub-node of type {}\n{:?}", stringify!($kind), $node);
};
// Exact number
(@check_has_children $node:ident, $count:literal $kind:ident) => {
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
assert_eq!(count, $count, "Expecting {} sub-node of type {}, found {}\n{:?}", $count, stringify!($kind), count, $node);
};
(@extract_kind * $kind:ident) => {SyntaxKind::$kind};
(@extract_kind ? $kind:ident) => {SyntaxKind::$kind};
(@extract_kind $count:literal $kind:ident) => {SyntaxKind::$kind};
(@extract_kind $kind:ident) => {SyntaxKind::$kind};
(@extract_type * $kind:ident) => {$crate::parser::syntax_nodes::$kind};
(@extract_type ? $kind:ident) => {$crate::parser::syntax_nodes::$kind};
(@extract_type $count:literal $kind:ident) => {$crate::parser::syntax_nodes::$kind};
(@extract_type $kind:ident) => {$crate::parser::syntax_nodes::$kind};
}
macro_rules! node_accessors {
// nothing
(_) => {};
// Some combination of children
([ $($t1:tt $($t2:ident)?),* ]) => {
$(node_accessors!{@ $t1 $($t2)*} )*
};
(@ * $kind:ident) => {
#[allow(non_snake_case)]
pub fn $kind(&self) -> impl Iterator<Item = $kind> {
self.0.children().filter(|n| n.kind() ==SyntaxKind::$kind).map(Into::into)
}
};
(@ ? $kind:ident) => {
#[allow(non_snake_case)]
pub fn $kind(&self) -> Option<$kind> {
self.0.child_node(SyntaxKind::$kind).map(Into::into)
}
};
(@ 2 $kind:ident) => {
#[allow(non_snake_case)]
pub fn $kind(&self) -> ($kind, $kind) {
let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind);
let a = it.next().unwrap();
let b = it.next().unwrap();
debug_assert!(it.next().is_none());
(a.into(), b.into())
}
};
(@ 3 $kind:ident) => {
#[allow(non_snake_case)]
pub fn $kind(&self) -> ($kind, $kind, $kind) {
let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind);
let a = it.next().unwrap();
let b = it.next().unwrap();
let c = it.next().unwrap();
debug_assert!(it.next().is_none());
(a.into(), b.into(), c.into())
}
};
(@ $kind:ident) => {
#[allow(non_snake_case)]
pub fn $kind(&self) -> $kind {
self.0.child_node(SyntaxKind::$kind).unwrap().into()
}
};
}
/// This macro is invoked once, to declare all the token and syntax kind.
/// The purpose of this macro is to declare the token with its regexp at the same place,
/// and the nodes with their contents.
macro_rules! declare_syntax {
({
$($token:ident -> $rx:expr ,)*
}
{
$( $(#[$attr:meta])* $nodekind:ident -> $children:tt ,)*
})
=> {
#[repr(u16)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, num_enum::IntoPrimitive, num_enum::TryFromPrimitive)]
pub enum SyntaxKind {
Error,
Eof,
// Tokens:
$(
/// Token matching this regexp:
/// ```text
#[doc = $rx]
/// ```
$token,
)*
// Nodes:
$(
$(#[$attr])*
$nodekind,
)*
}
fn lexer() -> m_lexer::Lexer {
m_lexer::LexerBuilder::new()
.error_token(m_lexer::TokenKind(SyntaxKind::Error.into()))
.tokens(&[
$((m_lexer::TokenKind(SyntaxKind::$token.into()), $rx)),*
])
.build()
}
pub mod syntax_nodes {
use super::*;
use derive_more::*;
$(
#[derive(Debug, Clone, From, Deref, DerefMut, Into)]
pub struct $nodekind(pub SyntaxNode);
#[cfg(test)]
impl SyntaxNodeVerify for $nodekind {
const KIND: SyntaxKind = SyntaxKind::$nodekind;
fn verify(node: SyntaxNode) {
assert_eq!(node.kind(), Self::KIND);
verify_node!(node, $children);
}
}
impl $nodekind {
node_accessors!{$children}
}
)*
}
}
}
declare_syntax! {
// Tokens.
// WARNING: when changing this, do not forget to update the tokenizer in the sixtyfps-rs-macro crate!
{
Whitespace -> r"\s+",
Comment -> r"//.*\n|(?sU)/\*.*\*/", // FIXME: comments within comments
StringLiteral -> r#""[^"]*""#, // FIXME: escapes
NumberLiteral -> r"[\d]+(\.[\d]*)?[\w]*%?",
ColorLiteral -> r"#[\w]+",
Identifier -> r"[\w]+",
LBrace -> r"\{",
RBrace -> r"\}",
LParent -> r"\(",
RParent -> r"\)",
LAngle -> r"<",
RAngle -> r">",
LBracket -> r"\[",
RBracket -> r"\]",
Plus -> r"\+",
Minus -> r"-",
Star -> r"\*",
Div -> r"/",
PlusEqual -> r"\+=",
MinusEqual -> r"-=",
StarEqual -> r"\*=",
DivEqual -> r"/=",
LessEqual -> r"<=",
GreaterEqual -> r">=",
EqualEqual -> r"==",
NotEqual -> r"!=",
ColonEqual -> r":=",
FatArrow -> r"=>",
OrOr -> r"\|\|",
AndAnd -> r"&&",
Equal -> r"=",
Colon -> r":",
Comma -> r",",
Semicolon -> r";",
Bang -> r"!",
Dot -> r"\.",
Question -> r"\?",
}
// syntax kind
{
Document -> [ *Component ],
Component -> [ Element ],
/// Note: This is in fact the same as Component as far as the parser is concerned
SubElement -> [ Element ],
Element -> [ QualifiedName, *PropertyDeclaration, *Binding, *SignalConnection, *SignalDeclaration, *SubElement, *RepeatedElement, *PropertyAnimation ],
RepeatedElement -> [ ?DeclaredIdentifier, ?RepeatedIndex, Expression , Element],
RepeatedIndex -> [],
ConditionalElement -> [ Expression , Element],
SignalDeclaration -> [ DeclaredIdentifier ],
SignalConnection -> [ CodeBlock ],
PropertyDeclaration-> [ QualifiedName , DeclaredIdentifier, ?BindingExpression ],
PropertyAnimation-> [ DeclaredIdentifier, *Binding ],
/// wraps Identifiers, like `Rectangle` or `SomeModule.SomeType`
QualifiedName-> [],
/// Wraps single identifier (to disambiguate when there are other identifiar in the production)
DeclaredIdentifier -> [],
Binding-> [ BindingExpression ],
/// the right-hand-side of a binding
// Fixme: the test should be a or
BindingExpression-> [ ?CodeBlock, ?Expression ],
CodeBlock-> [ *Expression ],
// FIXME: the test should test that as alternative rather than several of them (but it can also be a literal)
Expression-> [ ?Expression, ?BangExpression, ?FunctionCallExpression, ?SelfAssignment,
?ConditionalExpression, ?QualifiedName, ?BinaryExpression, ?Array, ?ObjectLiteral,
?UnaryOpExpression],
/// `foo!bar`
BangExpression -> [Expression],
/// expression()
FunctionCallExpression -> [Expression],
/// `expression += expression`
SelfAssignment -> [2 Expression],
/// `condition ? first : second`
ConditionalExpression -> [3 Expression],
/// `expr + expr`
BinaryExpression -> [2 Expression],
/// `- expr`
UnaryOpExpression -> [Expression],
/// `[ ... ]`
Array -> [ *Expression ],
/// `{ foo: bar }`
ObjectLiteral -> [ *ObjectMember ],
/// `foo: bar` inside an ObjectLiteral
ObjectMember -> [ Expression ],
}
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(v: SyntaxKind) -> Self {
rowan::SyntaxKind(v.into())
}
}
#[derive(Clone, Debug)]
pub struct Token {
pub kind: SyntaxKind,
pub text: SmolStr,
pub offset: usize,
#[cfg(feature = "proc_macro_span")]
pub span: Option<proc_macro::Span>,
}
impl Default for Token {
fn default() -> Self {
Token {
kind: SyntaxKind::Eof,
text: Default::default(),
offset: 0,
#[cfg(feature = "proc_macro_span")]
span: None,
}
}
}
impl Token {
pub fn as_str(&self) -> &str {
self.text.as_str()
}
}
mod parser_trait {
//! module allowing to keep implementation details of the node private
use super::*;
pub trait Parser: Sized {
type Checkpoint: Clone;
/// Enter a new node. The node is going to be finished when
/// The return value of this function is drop'ed
///
/// (do not re-implement this function, re-implement
/// start_node_impl and finish_node_impl)
#[must_use = "The node will be finished when it is dropped"]
fn start_node(&mut self, kind: SyntaxKind) -> Node<Self> {
self.start_node_impl(kind, None, NodeToken(()));
Node(self)
}
#[must_use = "use start_node_at to use this checkpoint"]
fn checkpoint(&mut self) -> Self::Checkpoint;
#[must_use = "The node will be finished when it is dropped"]
fn start_node_at(&mut self, checkpoint: Self::Checkpoint, kind: SyntaxKind) -> Node<Self> {
self.start_node_impl(kind, Some(checkpoint), NodeToken(()));
Node(self)
}
/// Can only be called by Node::drop
fn finish_node_impl(&mut self, token: NodeToken);
/// Can only be called by Self::start_node
fn start_node_impl(
&mut self,
kind: SyntaxKind,
checkpoint: Option<Self::Checkpoint>,
token: NodeToken,
);
fn peek(&mut self) -> Token;
/// Peek the n'th token, not including whitespaces and comments
fn nth(&mut self, n: usize) -> SyntaxKind;
fn consume(&mut self);
fn error(&mut self, e: impl Into<String>);
/// Consume the token if it has the right kind, otherwise report a syntax error.
/// Returns true if the token was consumed.
fn expect(&mut self, kind: SyntaxKind) -> bool {
if !self.test(kind) {
self.error(format!("Syntax error: expected {:?}", kind));
return false;
}
return true;
}
/// If the token if of this type, consume it and return true, otherwise return false
fn test(&mut self, kind: SyntaxKind) -> bool {
if self.nth(0) != kind {
return false;
}
self.consume();
return true;
}
/// consume everyting until reaching a token of this kind
fn until(&mut self, kind: SyntaxKind) {
// FIXME! match {} () []
while {
let k = self.nth(0);
k != kind && k != SyntaxKind::Eof
} {
self.consume();
}
self.expect(kind);
}
}
/// A token to proof that start_node_impl and finish_node_impl are only
/// called from the Node implementation
///
/// Since the constructor is private, it cannot be produced by anything else.
pub struct NodeToken(());
/// The return value of `DefaultParser::start_node`. This borrows the parser
/// and finishes the node on Drop
#[derive(derive_more::DerefMut)]
pub struct Node<'a, P: Parser>(&'a mut P);
impl<'a, P: Parser> Drop for Node<'a, P> {
fn drop(&mut self) {
self.0.finish_node_impl(NodeToken(()));
}
}
impl<'a, P: Parser> core::ops::Deref for Node<'a, P> {
type Target = P;
fn deref(&self) -> &Self::Target {
self.0
}
}
}
#[doc(inline)]
pub use parser_trait::*;
pub struct DefaultParser {
builder: rowan::GreenNodeBuilder<'static>,
tokens: Vec<Token>,
cursor: usize,
diags: FileDiagnostics,
}
impl From<Vec<Token>> for DefaultParser {
fn from(tokens: Vec<Token>) -> Self {
Self { builder: Default::default(), tokens, cursor: 0, diags: Default::default() }
}
}
impl DefaultParser {
/// Constructor that create a parser from the source code
pub fn new(source: String) -> Self {
fn lex(source: &str) -> Vec<Token> {
lexer()
.tokenize(source)
.into_iter()
.scan(0usize, |start_offset, t| {
let s: rowan::SmolStr = source[*start_offset..*start_offset + t.len].into();
let offset = *start_offset;
*start_offset += t.len;
Some(Token {
kind: SyntaxKind::try_from(t.kind.0).unwrap(),
text: s,
offset,
..Default::default()
})
})
.collect()
}
let mut parser = Self::from(lex(&source));
parser.diags.source = Some(source);
parser
}
fn current_token(&self) -> Token {
self.tokens.get(self.cursor).cloned().unwrap_or_default()
}
/// Consume all the whitespace
pub fn consume_ws(&mut self) {
while matches!(self.current_token().kind, SyntaxKind::Whitespace | SyntaxKind::Comment) {
self.consume()
}
}
}
impl Parser for DefaultParser {
fn start_node_impl(
&mut self,
kind: SyntaxKind,
checkpoint: Option<Self::Checkpoint>,
_: NodeToken,
) {
match checkpoint {
None => self.builder.start_node(kind.into()),
Some(cp) => self.builder.start_node_at(cp, kind.into()),
}
}
fn finish_node_impl(&mut self, _: NodeToken) {
self.builder.finish_node();
}
fn peek(&mut self) -> Token {
self.consume_ws();
self.current_token()
}
/// Peek the n'th token, not including whitespaces and comments
fn nth(&mut self, mut n: usize) -> SyntaxKind {
self.consume_ws();
let mut c = self.cursor;
while n > 0 {
n -= 1;
c += 1;
while c < self.tokens.len()
&& matches!(self.tokens[c].kind, SyntaxKind::Whitespace | SyntaxKind::Comment)
{
c += 1;
}
}
self.tokens.get(c).map_or(SyntaxKind::Eof, |x| x.kind)
}
/// Consume the current token
fn consume(&mut self) {
let t = self.current_token();
self.builder.token(t.kind.into(), t.text);
self.cursor += 1;
}
/// Reports an error at the current token location
fn error(&mut self, e: impl Into<String>) {
let current_token = self.current_token();
#[allow(unused_mut)]
let mut span = crate::diagnostics::Span::new(current_token.offset);
#[cfg(feature = "proc_macro_span")]
{
span.span = current_token.span;
}
self.diags.push_error(e.into(), span);
}
type Checkpoint = rowan::Checkpoint;
fn checkpoint(&mut self) -> Self::Checkpoint {
self.builder.checkpoint()
}
}
#[derive(Clone, Copy, Debug, Eq, Ord, Hash, PartialEq, PartialOrd)]
pub enum Language {}
impl rowan::Language for Language {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
SyntaxKind::try_from(raw.0).unwrap()
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}
pub type SyntaxNode = rowan::SyntaxNode<Language>;
pub type SyntaxToken = rowan::SyntaxToken<Language>;
/// Helper functions to easily get the children of a given kind.
/// This traits is only supposed to be implemented on SyntaxNope
pub trait SyntaxNodeEx {
fn child_node(&self, kind: SyntaxKind) -> Option<SyntaxNode>;
fn child_token(&self, kind: SyntaxKind) -> Option<SyntaxToken>;
fn child_text(&self, kind: SyntaxKind) -> Option<String>;
}
impl SyntaxNodeEx for SyntaxNode {
fn child_node(&self, kind: SyntaxKind) -> Option<SyntaxNode> {
self.children().find(|n| n.kind() == kind)
}
fn child_token(&self, kind: SyntaxKind) -> Option<SyntaxToken> {
self.children_with_tokens().find(|n| n.kind() == kind).and_then(|x| x.into_token())
}
fn child_text(&self, kind: SyntaxKind) -> Option<String> {
self.children_with_tokens()
.find(|n| n.kind() == kind)
.and_then(|x| x.as_token().map(|x| x.text().to_string()))
}
}
/// Returns a span. This is implemented for tokens and nodes
pub trait Spanned {
fn span(&self) -> crate::diagnostics::Span;
}
impl Spanned for SyntaxNode {
fn span(&self) -> crate::diagnostics::Span {
crate::diagnostics::Span::new(self.text_range().start().into())
}
}
impl Spanned for SyntaxToken {
fn span(&self) -> crate::diagnostics::Span {
crate::diagnostics::Span::new(self.text_range().start().into())
}
}
// Actual parser
pub fn parse(source: String, path: Option<&std::path::Path>) -> (SyntaxNode, FileDiagnostics) {
let mut p = DefaultParser::new(source);
document::parse_document(&mut p);
if let Some(path) = path {
p.diags.current_path = path.to_path_buf();
}
(SyntaxNode::new_root(p.builder.finish()), p.diags)
}
pub fn parse_file<P: AsRef<std::path::Path>>(
path: P,
) -> std::io::Result<(SyntaxNode, FileDiagnostics)> {
let source = std::fs::read_to_string(&path)?;
Ok(parse(source, Some(path.as_ref())))
}
#[allow(dead_code)]
pub fn parse_tokens(tokens: Vec<Token>) -> (SyntaxNode, FileDiagnostics) {
let mut p = DefaultParser::from(tokens);
document::parse_document(&mut p);
(SyntaxNode::new_root(p.builder.finish()), p.diags)
}