/*! The sixtyfps language parser This module is responsible to parse a string onto a syntax tree. The core of it is the `DefaultParser` class that holds a list of token and generates a `rowan::GreenNode` This module has different sub modules with the actual parser functions */ use crate::diagnostics::Diagnostics; pub use rowan::SmolStr; use std::convert::TryFrom; mod document; mod expressions; mod statements; /// Each parser submodule would simply do `use super::prelude::*` to import typically used items mod prelude { #[cfg(test)] pub use super::{syntax_nodes, SyntaxNode, SyntaxNodeVerify}; pub use super::{DefaultParser, Parser, SyntaxKind}; #[cfg(test)] pub use parser_test_macro::parser_test; } #[cfg(test)] pub trait SyntaxNodeVerify { /// The SyntaxKind corresponding to this type const KIND: SyntaxKind; /// Asserts that the node is of the given SyntaxKind and that it has the expected children /// Panic if this is not the case fn verify(node: SyntaxNode) { assert_eq!(node.kind(), Self::KIND) } } /// Check that a node has the assumed children #[cfg(test)] macro_rules! verify_node { // nothing to verify ($node:ident, _) => {}; // Some combination of children ($node:ident, [ $($t1:tt $($t2:ident)?),* ]) => { // Check that every children is there $(verify_node!(@check_has_children $node, $t1 $($t2)* );)* // check that there are not too many nodes for c in $node.children() { assert!( false $(|| c.kind() == verify_node!(@extract_kind $t1 $($t2)*))*, format!("Node is none of [{}]\n{:?}", stringify!($($t1 $($t2)*),*) ,c)); } // recurse $( for _c in $node.children().filter(|n| n.kind() == verify_node!(@extract_kind $t1 $($t2)*)) { ::verify(_c) } )* }; // Any number of this kind. (@check_has_children $node:ident, * $kind:ident) => {}; // 1 or 0 (@check_has_children $node:ident, ? $kind:ident) => { let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count(); assert!(count <= 1, "Expecting one or zero sub-node of type {}, found {}\n{:?}", stringify!($kind), count, $node); }; // Exactly one (@check_has_children $node:ident, $kind:ident) => { let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count(); assert_eq!(count, 1, "Expecting exactly one sub-node of type {}\n{:?}", stringify!($kind), $node); }; // Exact number (@check_has_children $node:ident, $count:literal $kind:ident) => { let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count(); assert_eq!(count, $count, "Expecting {} sub-node of type {}, found {}\n{:?}", $count, stringify!($kind), count, $node); }; (@extract_kind * $kind:ident) => {SyntaxKind::$kind}; (@extract_kind ? $kind:ident) => {SyntaxKind::$kind}; (@extract_kind $count:literal $kind:ident) => {SyntaxKind::$kind}; (@extract_kind $kind:ident) => {SyntaxKind::$kind}; (@extract_type * $kind:ident) => {$crate::parser::syntax_nodes::$kind}; (@extract_type ? $kind:ident) => {$crate::parser::syntax_nodes::$kind}; (@extract_type $count:literal $kind:ident) => {$crate::parser::syntax_nodes::$kind}; (@extract_type $kind:ident) => {$crate::parser::syntax_nodes::$kind}; } macro_rules! node_accessors { // nothing (_) => {}; // Some combination of children ([ $($t1:tt $($t2:ident)?),* ]) => { $(node_accessors!{@ $t1 $($t2)*} )* }; (@ * $kind:ident) => { #[allow(non_snake_case)] pub fn $kind(&self) -> impl Iterator { self.0.children().filter(|n| n.kind() ==SyntaxKind::$kind).map(Into::into) } }; (@ ? $kind:ident) => { #[allow(non_snake_case)] pub fn $kind(&self) -> Option<$kind> { self.0.child_node(SyntaxKind::$kind).map(Into::into) } }; (@ 2 $kind:ident) => { #[allow(non_snake_case)] pub fn $kind(&self) -> ($kind, $kind) { let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind); let a = it.next().unwrap(); let b = it.next().unwrap(); debug_assert!(it.next().is_none()); (a.into(), b.into()) } }; (@ 3 $kind:ident) => { #[allow(non_snake_case)] pub fn $kind(&self) -> ($kind, $kind, $kind) { let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind); let a = it.next().unwrap(); let b = it.next().unwrap(); let c = it.next().unwrap(); debug_assert!(it.next().is_none()); (a.into(), b.into(), c.into()) } }; (@ $kind:ident) => { #[allow(non_snake_case)] pub fn $kind(&self) -> $kind { self.0.child_node(SyntaxKind::$kind).unwrap().into() } }; } /// This macro is invoked once, to declare all the token and syntax kind. /// The purpose of this macro is to declare the token with its regexp at the same place, /// and the nodes with their contents. macro_rules! declare_syntax { ({ $($token:ident -> $rx:expr ,)* } { $( $(#[$attr:meta])* $nodekind:ident -> $children:tt ,)* }) => { #[repr(u16)] #[derive(Debug, Copy, Clone, Eq, PartialEq, num_enum::IntoPrimitive, num_enum::TryFromPrimitive)] pub enum SyntaxKind { Error, Eof, // Tokens: $( /// Token matching this regexp: /// ```text #[doc = $rx] /// ``` $token, )* // Nodes: $( $(#[$attr])* $nodekind, )* } fn lexer() -> m_lexer::Lexer { m_lexer::LexerBuilder::new() .error_token(m_lexer::TokenKind(SyntaxKind::Error.into())) .tokens(&[ $((m_lexer::TokenKind(SyntaxKind::$token.into()), $rx)),* ]) .build() } pub mod syntax_nodes { use super::*; use derive_more::*; $( #[derive(Debug, Clone, From, Deref, DerefMut, Into)] pub struct $nodekind(pub SyntaxNode); #[cfg(test)] impl SyntaxNodeVerify for $nodekind { const KIND: SyntaxKind = SyntaxKind::$nodekind; fn verify(node: SyntaxNode) { assert_eq!(node.kind(), Self::KIND); verify_node!(node, $children); } } impl $nodekind { node_accessors!{$children} } )* } } } declare_syntax! { // Tokens. // WARNING: when changing this, do not forget to update the tokenizer in the sixtyfps-rs-macro crate! { Whitespace -> r"\s+", Comment -> r"//.*\n|(?sU)/\*.*\*/", // FIXME: comments within comments StringLiteral -> r#""[^"]*""#, // FIXME: escapes NumberLiteral -> r"[\d]+(\.[\d]*)?", ColorLiteral -> r"#[\w]+", Identifier -> r"[\w]+", LBrace -> r"\{", RBrace -> r"\}", LParent -> r"\(", RParent -> r"\)", LAngle -> r"<", RAngle -> r">", LBracket -> r"\[", RBracket -> r"\]", Plus -> r"\+", Minus -> r"-", Star -> r"\*", Div -> r"/", PlusEqual -> r"\+=", MinusEqual -> r"-=", StarEqual -> r"\*=", DivEqual -> r"/=", ColonEqual -> r":=", FatArrow -> r"=>", Equal -> r"=", Colon -> r":", Comma -> r",", Semicolon -> r";", Bang -> r"!", Dot -> r"\.", Question -> r"\?", } // syntax kind { Document -> [ *Component ], Component -> [ Element ], /// Note: This is in fact the same as Component as far as the parser is concerned SubElement -> [ Element ], Element -> [ QualifiedName, *PropertyDeclaration, *Binding, *SignalConnection, *SignalDeclaration, *SubElement, *RepeatedElement ], RepeatedElement -> [ ?DeclaredIdentifier, ?RepeatedIndex, Expression , Element], RepeatedIndex -> [], SignalDeclaration -> [ DeclaredIdentifier ], SignalConnection -> [ CodeBlock ], PropertyDeclaration-> [ QualifiedName , DeclaredIdentifier, ?BindingExpression ], /// wraps Identifiers, like `Rectangle` or `SomeModule.SomeType` QualifiedName-> [], /// Wraps single identifier (to disambiguate when there are other identifiar in the production) DeclaredIdentifier -> [], Binding-> [ BindingExpression ], /// the right-hand-side of a binding // Fixme: the test should be a or BindingExpression-> [ ?CodeBlock, ?Expression ], CodeBlock-> [ *Expression ], // FIXME: the test should test that as alternative rather than several of them (but it can also be a literal) Expression-> [ ?Expression, ?BangExpression, ?FunctionCallExpression, ?SelfAssignment, ?ConditionalExpression, ?QualifiedName, ?BinaryExpression, ?Array, ?ObjectLiteral], /// `foo!bar` BangExpression -> [Expression], /// expression() FunctionCallExpression -> [Expression], /// `expression += expression` SelfAssignment -> [2 Expression], /// `condition ? first : second` ConditionalExpression -> [3 Expression], /// `expr + expr` BinaryExpression -> [2 Expression], /// `[ ... ]` Array -> [ *Expression ], /// `{ foo: bar }` ObjectLiteral -> [ *ObjectMember ], /// `foo: bar` inside an ObjectLiteral ObjectMember -> [ Expression ], } } impl From for rowan::SyntaxKind { fn from(v: SyntaxKind) -> Self { rowan::SyntaxKind(v.into()) } } #[derive(Clone, Debug)] pub struct Token { pub kind: SyntaxKind, pub text: SmolStr, pub offset: usize, #[cfg(feature = "proc_macro_span")] pub span: Option, } impl Default for Token { fn default() -> Self { Token { kind: SyntaxKind::Eof, text: Default::default(), offset: 0, #[cfg(feature = "proc_macro_span")] span: None, } } } impl Token { pub fn as_str(&self) -> &str { self.text.as_str() } } mod parser_trait { //! module allowing to keep implementation details of the node private use super::*; pub trait Parser: Sized { type Checkpoint: Clone; /// Enter a new node. The node is going to be finished when /// The return value of this function is drop'ed /// /// (do not re-implement this function, re-implement /// start_node_impl and finish_node_impl) #[must_use = "The node will be finished when it is dropped"] fn start_node(&mut self, kind: SyntaxKind) -> Node { self.start_node_impl(kind, None, NodeToken(())); Node(self) } #[must_use = "use start_node_at to use this checkpoint"] fn checkpoint(&mut self) -> Self::Checkpoint; #[must_use = "The node will be finished when it is dropped"] fn start_node_at(&mut self, checkpoint: Self::Checkpoint, kind: SyntaxKind) -> Node { self.start_node_impl(kind, Some(checkpoint), NodeToken(())); Node(self) } /// Can only be called by Node::drop fn finish_node_impl(&mut self, token: NodeToken); /// Can only be called by Self::start_node fn start_node_impl( &mut self, kind: SyntaxKind, checkpoint: Option, token: NodeToken, ); fn peek(&mut self) -> Token; /// Peek the n'th token, not including whitespaces and comments fn nth(&mut self, n: usize) -> SyntaxKind; fn consume(&mut self); fn error(&mut self, e: impl Into); /// Consume the token if it has the right kind, otherwise report a syntax error. /// Returns true if the token was consumed. fn expect(&mut self, kind: SyntaxKind) -> bool { if !self.test(kind) { self.error(format!("Syntax error: expected {:?}", kind)); return false; } return true; } /// If the token if of this type, consume it and return true, otherwise return false fn test(&mut self, kind: SyntaxKind) -> bool { if self.nth(0) != kind { return false; } self.consume(); return true; } /// consume everyting until reaching a token of this kind fn until(&mut self, kind: SyntaxKind) { // FIXME! match {} () [] while { let k = self.nth(0); k != kind && k != SyntaxKind::Eof } { self.consume(); } self.expect(kind); } } /// A token to proof that start_node_impl and finish_node_impl are only /// called from the Node implementation /// /// Since the constructor is private, it cannot be produced by anything else. pub struct NodeToken(()); /// The return value of `DefaultParser::start_node`. This borrows the parser /// and finishes the node on Drop #[derive(derive_more::DerefMut)] pub struct Node<'a, P: Parser>(&'a mut P); impl<'a, P: Parser> Drop for Node<'a, P> { fn drop(&mut self) { self.0.finish_node_impl(NodeToken(())); } } impl<'a, P: Parser> core::ops::Deref for Node<'a, P> { type Target = P; fn deref(&self) -> &Self::Target { self.0 } } } #[doc(inline)] pub use parser_trait::*; pub struct DefaultParser { builder: rowan::GreenNodeBuilder<'static>, tokens: Vec, cursor: usize, diags: Diagnostics, } impl From> for DefaultParser { fn from(tokens: Vec) -> Self { Self { builder: Default::default(), tokens, cursor: 0, diags: Default::default() } } } impl DefaultParser { /// Constructor that create a parser from the source code pub fn new(source: &str) -> Self { fn lex(source: &str) -> Vec { lexer() .tokenize(source) .into_iter() .scan(0usize, |start_offset, t| { let s: rowan::SmolStr = source[*start_offset..*start_offset + t.len].into(); let offset = *start_offset; *start_offset += t.len; Some(Token { kind: SyntaxKind::try_from(t.kind.0).unwrap(), text: s, offset, ..Default::default() }) }) .collect() } Self::from(lex(source)) } fn current_token(&self) -> Token { self.tokens.get(self.cursor).cloned().unwrap_or_default() } /// Consume all the whitespace pub fn consume_ws(&mut self) { while matches!(self.current_token().kind, SyntaxKind::Whitespace | SyntaxKind::Comment) { self.consume() } } } impl Parser for DefaultParser { fn start_node_impl( &mut self, kind: SyntaxKind, checkpoint: Option, _: NodeToken, ) { match checkpoint { None => self.builder.start_node(kind.into()), Some(cp) => self.builder.start_node_at(cp, kind.into()), } } fn finish_node_impl(&mut self, _: NodeToken) { self.builder.finish_node(); } fn peek(&mut self) -> Token { self.consume_ws(); self.current_token() } /// Peek the n'th token, not including whitespaces and comments fn nth(&mut self, mut n: usize) -> SyntaxKind { self.consume_ws(); let mut c = self.cursor; while n > 0 { n -= 1; c += 1; while c < self.tokens.len() && matches!(self.tokens[c].kind, SyntaxKind::Whitespace | SyntaxKind::Comment) { c += 1; } } self.tokens.get(c).map_or(SyntaxKind::Eof, |x| x.kind) } /// Consume the current token fn consume(&mut self) { let t = self.current_token(); self.builder.token(t.kind.into(), t.text); self.cursor += 1; } /// Reports an error at the current token location fn error(&mut self, e: impl Into) { let current_token = self.current_token(); #[allow(unused_mut)] let mut span = crate::diagnostics::Span::new(current_token.offset); #[cfg(feature = "proc_macro_span")] { span.span = current_token.span; } self.diags.push_error(e.into(), span); } type Checkpoint = rowan::Checkpoint; fn checkpoint(&mut self) -> Self::Checkpoint { self.builder.checkpoint() } } #[derive(Clone, Copy, Debug, Eq, Ord, Hash, PartialEq, PartialOrd)] pub enum Language {} impl rowan::Language for Language { type Kind = SyntaxKind; fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { SyntaxKind::try_from(raw.0).unwrap() } fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { kind.into() } } pub type SyntaxNode = rowan::SyntaxNode; pub type SyntaxToken = rowan::SyntaxToken; /// Helper functions to easily get the children of a given kind. /// This traits is only supposed to be implemented on SyntaxNope pub trait SyntaxNodeEx { fn child_node(&self, kind: SyntaxKind) -> Option; fn child_token(&self, kind: SyntaxKind) -> Option; fn child_text(&self, kind: SyntaxKind) -> Option; } impl SyntaxNodeEx for SyntaxNode { fn child_node(&self, kind: SyntaxKind) -> Option { self.children().find(|n| n.kind() == kind) } fn child_token(&self, kind: SyntaxKind) -> Option { self.children_with_tokens().find(|n| n.kind() == kind).and_then(|x| x.into_token()) } fn child_text(&self, kind: SyntaxKind) -> Option { self.children_with_tokens() .find(|n| n.kind() == kind) .and_then(|x| x.as_token().map(|x| x.text().to_string())) } } /// Returns a span. This is implemented for tokens and nodes pub trait Spanned { fn span(&self) -> crate::diagnostics::Span; } impl Spanned for SyntaxNode { fn span(&self) -> crate::diagnostics::Span { crate::diagnostics::Span::new(self.text_range().start().into()) } } impl Spanned for SyntaxToken { fn span(&self) -> crate::diagnostics::Span { crate::diagnostics::Span::new(self.text_range().start().into()) } } // Actual parser pub fn parse(source: &str) -> (SyntaxNode, Diagnostics) { let mut p = DefaultParser::new(source); document::parse_document(&mut p); (SyntaxNode::new_root(p.builder.finish()), p.diags) } #[allow(dead_code)] pub fn parse_tokens(tokens: Vec) -> (SyntaxNode, Diagnostics) { let mut p = DefaultParser::from(tokens); document::parse_document(&mut p); (SyntaxNode::new_root(p.builder.finish()), p.diags) }