mirror of
https://github.com/slint-ui/slint.git
synced 2025-09-29 13:24:48 +00:00
592 lines
19 KiB
Rust
592 lines
19 KiB
Rust
/*! The sixtyfps language parser
|
|
|
|
This module is responsible to parse a string onto a syntax tree.
|
|
|
|
The core of it is the `DefaultParser` class that holds a list of token and
|
|
generates a `rowan::GreenNode`
|
|
|
|
This module has different sub modules with the actual parser functions
|
|
|
|
*/
|
|
|
|
use crate::diagnostics::Diagnostics;
|
|
pub use rowan::SmolStr;
|
|
use std::convert::TryFrom;
|
|
|
|
mod document;
|
|
mod expressions;
|
|
mod statements;
|
|
|
|
/// Each parser submodule would simply do `use super::prelude::*` to import typically used items
|
|
mod prelude {
|
|
#[cfg(test)]
|
|
pub use super::{syntax_nodes, SyntaxNode, SyntaxNodeVerify};
|
|
pub use super::{DefaultParser, Parser, SyntaxKind};
|
|
#[cfg(test)]
|
|
pub use parser_test_macro::parser_test;
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub trait SyntaxNodeVerify {
|
|
/// The SyntaxKind corresponding to this type
|
|
const KIND: SyntaxKind;
|
|
/// Asserts that the node is of the given SyntaxKind and that it has the expected children
|
|
/// Panic if this is not the case
|
|
fn verify(node: SyntaxNode) {
|
|
assert_eq!(node.kind(), Self::KIND)
|
|
}
|
|
}
|
|
|
|
/// Check that a node has the assumed children
|
|
#[cfg(test)]
|
|
macro_rules! verify_node {
|
|
// nothing to verify
|
|
($node:ident, _) => {};
|
|
// Some combination of children
|
|
($node:ident, [ $($t1:tt $($t2:ident)?),* ]) => {
|
|
// Check that every children is there
|
|
$(verify_node!(@check_has_children $node, $t1 $($t2)* );)*
|
|
|
|
// check that there are not too many nodes
|
|
for c in $node.children() {
|
|
assert!(
|
|
false $(|| c.kind() == verify_node!(@extract_kind $t1 $($t2)*))*,
|
|
format!("Node is none of [{}]\n{:?}", stringify!($($t1 $($t2)*),*) ,c));
|
|
}
|
|
|
|
// recurse
|
|
$(
|
|
for _c in $node.children().filter(|n| n.kind() == verify_node!(@extract_kind $t1 $($t2)*)) {
|
|
<verify_node!(@extract_type $t1 $($t2)*)>::verify(_c)
|
|
}
|
|
)*
|
|
};
|
|
|
|
// Any number of this kind.
|
|
(@check_has_children $node:ident, * $kind:ident) => {};
|
|
// 1 or 0
|
|
(@check_has_children $node:ident, ? $kind:ident) => {
|
|
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
|
|
assert!(count <= 1, "Expecting one or zero sub-node of type {}, found {}\n{:?}", stringify!($kind), count, $node);
|
|
};
|
|
// Exactly one
|
|
(@check_has_children $node:ident, $kind:ident) => {
|
|
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
|
|
assert_eq!(count, 1, "Expecting exactly one sub-node of type {}\n{:?}", stringify!($kind), $node);
|
|
};
|
|
// Exact number
|
|
(@check_has_children $node:ident, $count:literal $kind:ident) => {
|
|
let count = $node.children_with_tokens().filter(|n| n.kind() == SyntaxKind::$kind).count();
|
|
assert_eq!(count, $count, "Expecting {} sub-node of type {}, found {}\n{:?}", $count, stringify!($kind), count, $node);
|
|
};
|
|
|
|
(@extract_kind * $kind:ident) => {SyntaxKind::$kind};
|
|
(@extract_kind ? $kind:ident) => {SyntaxKind::$kind};
|
|
(@extract_kind $count:literal $kind:ident) => {SyntaxKind::$kind};
|
|
(@extract_kind $kind:ident) => {SyntaxKind::$kind};
|
|
|
|
(@extract_type * $kind:ident) => {$crate::parser::syntax_nodes::$kind};
|
|
(@extract_type ? $kind:ident) => {$crate::parser::syntax_nodes::$kind};
|
|
(@extract_type $count:literal $kind:ident) => {$crate::parser::syntax_nodes::$kind};
|
|
(@extract_type $kind:ident) => {$crate::parser::syntax_nodes::$kind};
|
|
}
|
|
|
|
macro_rules! node_accessors {
|
|
// nothing
|
|
(_) => {};
|
|
// Some combination of children
|
|
([ $($t1:tt $($t2:ident)?),* ]) => {
|
|
$(node_accessors!{@ $t1 $($t2)*} )*
|
|
};
|
|
|
|
(@ * $kind:ident) => {
|
|
#[allow(non_snake_case)]
|
|
pub fn $kind(&self) -> impl Iterator<Item = $kind> {
|
|
self.0.children().filter(|n| n.kind() ==SyntaxKind::$kind).map(Into::into)
|
|
}
|
|
};
|
|
(@ ? $kind:ident) => {
|
|
#[allow(non_snake_case)]
|
|
pub fn $kind(&self) -> Option<$kind> {
|
|
self.0.child_node(SyntaxKind::$kind).map(Into::into)
|
|
}
|
|
};
|
|
(@ 2 $kind:ident) => {
|
|
#[allow(non_snake_case)]
|
|
pub fn $kind(&self) -> ($kind, $kind) {
|
|
let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind);
|
|
let a = it.next().unwrap();
|
|
let b = it.next().unwrap();
|
|
debug_assert!(it.next().is_none());
|
|
(a.into(), b.into())
|
|
}
|
|
};
|
|
(@ 3 $kind:ident) => {
|
|
#[allow(non_snake_case)]
|
|
pub fn $kind(&self) -> ($kind, $kind, $kind) {
|
|
let mut it = self.0.children().filter(|n| n.kind() == SyntaxKind::$kind);
|
|
let a = it.next().unwrap();
|
|
let b = it.next().unwrap();
|
|
let c = it.next().unwrap();
|
|
debug_assert!(it.next().is_none());
|
|
(a.into(), b.into(), c.into())
|
|
}
|
|
};
|
|
(@ $kind:ident) => {
|
|
#[allow(non_snake_case)]
|
|
pub fn $kind(&self) -> $kind {
|
|
self.0.child_node(SyntaxKind::$kind).unwrap().into()
|
|
}
|
|
};
|
|
|
|
}
|
|
|
|
/// This macro is invoked once, to declare all the token and syntax kind.
|
|
/// The purpose of this macro is to declare the token with its regexp at the same place,
|
|
/// and the nodes with their contents.
|
|
macro_rules! declare_syntax {
|
|
({
|
|
$($token:ident -> $rx:expr ,)*
|
|
}
|
|
{
|
|
$( $(#[$attr:meta])* $nodekind:ident -> $children:tt ,)*
|
|
})
|
|
=> {
|
|
#[repr(u16)]
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, num_enum::IntoPrimitive, num_enum::TryFromPrimitive)]
|
|
pub enum SyntaxKind {
|
|
Error,
|
|
Eof,
|
|
|
|
// Tokens:
|
|
$(
|
|
/// Token matching this regexp:
|
|
/// ```text
|
|
#[doc = $rx]
|
|
/// ```
|
|
$token,
|
|
)*
|
|
|
|
// Nodes:
|
|
$(
|
|
$(#[$attr])*
|
|
$nodekind,
|
|
)*
|
|
}
|
|
|
|
fn lexer() -> m_lexer::Lexer {
|
|
m_lexer::LexerBuilder::new()
|
|
.error_token(m_lexer::TokenKind(SyntaxKind::Error.into()))
|
|
.tokens(&[
|
|
$((m_lexer::TokenKind(SyntaxKind::$token.into()), $rx)),*
|
|
])
|
|
.build()
|
|
}
|
|
|
|
pub mod syntax_nodes {
|
|
use super::*;
|
|
use derive_more::*;
|
|
$(
|
|
#[derive(Debug, Clone, From, Deref, DerefMut, Into)]
|
|
pub struct $nodekind(pub SyntaxNode);
|
|
#[cfg(test)]
|
|
impl SyntaxNodeVerify for $nodekind {
|
|
const KIND: SyntaxKind = SyntaxKind::$nodekind;
|
|
fn verify(node: SyntaxNode) {
|
|
assert_eq!(node.kind(), Self::KIND);
|
|
verify_node!(node, $children);
|
|
}
|
|
}
|
|
impl $nodekind {
|
|
node_accessors!{$children}
|
|
}
|
|
)*
|
|
}
|
|
}
|
|
}
|
|
declare_syntax! {
|
|
// Tokens.
|
|
// WARNING: when changing this, do not forget to update the tokenizer in the sixtyfps-rs-macro crate!
|
|
{
|
|
Whitespace -> r"\s+",
|
|
Comment -> r"//.*\n|(?sU)/\*.*\*/", // FIXME: comments within comments
|
|
StringLiteral -> r#""[^"]*""#, // FIXME: escapes
|
|
NumberLiteral -> r"[\d]+(\.[\d]*)?",
|
|
ColorLiteral -> r"#[\w]+",
|
|
Identifier -> r"[\w]+",
|
|
LBrace -> r"\{",
|
|
RBrace -> r"\}",
|
|
LParent -> r"\(",
|
|
RParent -> r"\)",
|
|
LAngle -> r"<",
|
|
RAngle -> r">",
|
|
LBracket -> r"\[",
|
|
RBracket -> r"\]",
|
|
Plus -> r"\+",
|
|
Minus -> r"-",
|
|
Star -> r"\*",
|
|
Div -> r"/",
|
|
PlusEqual -> r"\+=",
|
|
MinusEqual -> r"-=",
|
|
StarEqual -> r"\*=",
|
|
DivEqual -> r"/=",
|
|
ColonEqual -> r":=",
|
|
FatArrow -> r"=>",
|
|
Equal -> r"=",
|
|
Colon -> r":",
|
|
Comma -> r",",
|
|
Semicolon -> r";",
|
|
Bang -> r"!",
|
|
Dot -> r"\.",
|
|
Question -> r"\?",
|
|
}
|
|
// syntax kind
|
|
{
|
|
Document -> [ *Component ],
|
|
Component -> [ Element ],
|
|
/// Note: This is in fact the same as Component as far as the parser is concerned
|
|
SubElement -> [ Element ],
|
|
Element -> [ QualifiedName, *PropertyDeclaration, *Binding, *SignalConnection, *SignalDeclaration, *SubElement, *RepeatedElement ],
|
|
RepeatedElement -> [ ?DeclaredIdentifier, ?RepeatedIndex, Expression , Element],
|
|
RepeatedIndex -> [],
|
|
SignalDeclaration -> [ DeclaredIdentifier ],
|
|
SignalConnection -> [ CodeBlock ],
|
|
PropertyDeclaration-> [ QualifiedName , DeclaredIdentifier, ?BindingExpression ],
|
|
/// wraps Identifiers, like `Rectangle` or `SomeModule.SomeType`
|
|
QualifiedName-> [],
|
|
/// Wraps single identifier (to disambiguate when there are other identifiar in the production)
|
|
DeclaredIdentifier -> [],
|
|
Binding-> [ BindingExpression ],
|
|
/// the right-hand-side of a binding
|
|
// Fixme: the test should be a or
|
|
BindingExpression-> [ ?CodeBlock, ?Expression ],
|
|
CodeBlock-> [ *Expression ],
|
|
// FIXME: the test should test that as alternative rather than several of them (but it can also be a literal)
|
|
Expression-> [ ?Expression, ?BangExpression, ?FunctionCallExpression, ?SelfAssignment,
|
|
?ConditionalExpression, ?QualifiedName, ?BinaryExpression, ?Array, ?ObjectLiteral],
|
|
/// `foo!bar`
|
|
BangExpression -> [Expression],
|
|
/// expression()
|
|
FunctionCallExpression -> [Expression],
|
|
/// `expression += expression`
|
|
SelfAssignment -> [2 Expression],
|
|
/// `condition ? first : second`
|
|
ConditionalExpression -> [3 Expression],
|
|
/// `expr + expr`
|
|
BinaryExpression -> [2 Expression],
|
|
/// `[ ... ]`
|
|
Array -> [ *Expression ],
|
|
/// `{ foo: bar }`
|
|
ObjectLiteral -> [ *ObjectMember ],
|
|
/// `foo: bar` inside an ObjectLiteral
|
|
ObjectMember -> [ Expression ],
|
|
}
|
|
}
|
|
|
|
impl From<SyntaxKind> for rowan::SyntaxKind {
|
|
fn from(v: SyntaxKind) -> Self {
|
|
rowan::SyntaxKind(v.into())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Token {
|
|
pub kind: SyntaxKind,
|
|
pub text: SmolStr,
|
|
pub offset: usize,
|
|
#[cfg(feature = "proc_macro_span")]
|
|
pub span: Option<proc_macro::Span>,
|
|
}
|
|
|
|
impl Default for Token {
|
|
fn default() -> Self {
|
|
Token {
|
|
kind: SyntaxKind::Eof,
|
|
text: Default::default(),
|
|
offset: 0,
|
|
#[cfg(feature = "proc_macro_span")]
|
|
span: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Token {
|
|
pub fn as_str(&self) -> &str {
|
|
self.text.as_str()
|
|
}
|
|
}
|
|
|
|
mod parser_trait {
|
|
//! module allowing to keep implementation details of the node private
|
|
use super::*;
|
|
|
|
pub trait Parser: Sized {
|
|
type Checkpoint: Clone;
|
|
|
|
/// Enter a new node. The node is going to be finished when
|
|
/// The return value of this function is drop'ed
|
|
///
|
|
/// (do not re-implement this function, re-implement
|
|
/// start_node_impl and finish_node_impl)
|
|
#[must_use = "The node will be finished when it is dropped"]
|
|
fn start_node(&mut self, kind: SyntaxKind) -> Node<Self> {
|
|
self.start_node_impl(kind, None, NodeToken(()));
|
|
Node(self)
|
|
}
|
|
#[must_use = "use start_node_at to use this checkpoint"]
|
|
fn checkpoint(&mut self) -> Self::Checkpoint;
|
|
#[must_use = "The node will be finished when it is dropped"]
|
|
fn start_node_at(&mut self, checkpoint: Self::Checkpoint, kind: SyntaxKind) -> Node<Self> {
|
|
self.start_node_impl(kind, Some(checkpoint), NodeToken(()));
|
|
Node(self)
|
|
}
|
|
|
|
/// Can only be called by Node::drop
|
|
fn finish_node_impl(&mut self, token: NodeToken);
|
|
/// Can only be called by Self::start_node
|
|
fn start_node_impl(
|
|
&mut self,
|
|
kind: SyntaxKind,
|
|
checkpoint: Option<Self::Checkpoint>,
|
|
token: NodeToken,
|
|
);
|
|
fn peek(&mut self) -> Token;
|
|
/// Peek the n'th token, not including whitespaces and comments
|
|
fn nth(&mut self, n: usize) -> SyntaxKind;
|
|
fn consume(&mut self);
|
|
fn error(&mut self, e: impl Into<String>);
|
|
|
|
/// Consume the token if it has the right kind, otherwise report a syntax error.
|
|
/// Returns true if the token was consumed.
|
|
fn expect(&mut self, kind: SyntaxKind) -> bool {
|
|
if !self.test(kind) {
|
|
self.error(format!("Syntax error: expected {:?}", kind));
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// If the token if of this type, consume it and return true, otherwise return false
|
|
fn test(&mut self, kind: SyntaxKind) -> bool {
|
|
if self.nth(0) != kind {
|
|
return false;
|
|
}
|
|
self.consume();
|
|
return true;
|
|
}
|
|
|
|
/// consume everyting until reaching a token of this kind
|
|
fn until(&mut self, kind: SyntaxKind) {
|
|
// FIXME! match {} () []
|
|
while {
|
|
let k = self.nth(0);
|
|
k != kind && k != SyntaxKind::Eof
|
|
} {
|
|
self.consume();
|
|
}
|
|
self.expect(kind);
|
|
}
|
|
}
|
|
|
|
/// A token to proof that start_node_impl and finish_node_impl are only
|
|
/// called from the Node implementation
|
|
///
|
|
/// Since the constructor is private, it cannot be produced by anything else.
|
|
pub struct NodeToken(());
|
|
/// The return value of `DefaultParser::start_node`. This borrows the parser
|
|
/// and finishes the node on Drop
|
|
#[derive(derive_more::DerefMut)]
|
|
pub struct Node<'a, P: Parser>(&'a mut P);
|
|
impl<'a, P: Parser> Drop for Node<'a, P> {
|
|
fn drop(&mut self) {
|
|
self.0.finish_node_impl(NodeToken(()));
|
|
}
|
|
}
|
|
impl<'a, P: Parser> core::ops::Deref for Node<'a, P> {
|
|
type Target = P;
|
|
fn deref(&self) -> &Self::Target {
|
|
self.0
|
|
}
|
|
}
|
|
}
|
|
#[doc(inline)]
|
|
pub use parser_trait::*;
|
|
|
|
pub struct DefaultParser {
|
|
builder: rowan::GreenNodeBuilder<'static>,
|
|
tokens: Vec<Token>,
|
|
cursor: usize,
|
|
diags: Diagnostics,
|
|
}
|
|
|
|
impl From<Vec<Token>> for DefaultParser {
|
|
fn from(tokens: Vec<Token>) -> Self {
|
|
Self { builder: Default::default(), tokens, cursor: 0, diags: Default::default() }
|
|
}
|
|
}
|
|
|
|
impl DefaultParser {
|
|
/// Constructor that create a parser from the source code
|
|
pub fn new(source: &str) -> Self {
|
|
fn lex(source: &str) -> Vec<Token> {
|
|
lexer()
|
|
.tokenize(source)
|
|
.into_iter()
|
|
.scan(0usize, |start_offset, t| {
|
|
let s: rowan::SmolStr = source[*start_offset..*start_offset + t.len].into();
|
|
let offset = *start_offset;
|
|
*start_offset += t.len;
|
|
Some(Token {
|
|
kind: SyntaxKind::try_from(t.kind.0).unwrap(),
|
|
text: s,
|
|
offset,
|
|
..Default::default()
|
|
})
|
|
})
|
|
.collect()
|
|
}
|
|
Self::from(lex(source))
|
|
}
|
|
|
|
fn current_token(&self) -> Token {
|
|
self.tokens.get(self.cursor).cloned().unwrap_or_default()
|
|
}
|
|
|
|
/// Consume all the whitespace
|
|
pub fn consume_ws(&mut self) {
|
|
while matches!(self.current_token().kind, SyntaxKind::Whitespace | SyntaxKind::Comment) {
|
|
self.consume()
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Parser for DefaultParser {
|
|
fn start_node_impl(
|
|
&mut self,
|
|
kind: SyntaxKind,
|
|
checkpoint: Option<Self::Checkpoint>,
|
|
_: NodeToken,
|
|
) {
|
|
match checkpoint {
|
|
None => self.builder.start_node(kind.into()),
|
|
Some(cp) => self.builder.start_node_at(cp, kind.into()),
|
|
}
|
|
}
|
|
|
|
fn finish_node_impl(&mut self, _: NodeToken) {
|
|
self.builder.finish_node();
|
|
}
|
|
|
|
fn peek(&mut self) -> Token {
|
|
self.consume_ws();
|
|
self.current_token()
|
|
}
|
|
|
|
/// Peek the n'th token, not including whitespaces and comments
|
|
fn nth(&mut self, mut n: usize) -> SyntaxKind {
|
|
self.consume_ws();
|
|
let mut c = self.cursor;
|
|
while n > 0 {
|
|
n -= 1;
|
|
c += 1;
|
|
while c < self.tokens.len()
|
|
&& matches!(self.tokens[c].kind, SyntaxKind::Whitespace | SyntaxKind::Comment)
|
|
{
|
|
c += 1;
|
|
}
|
|
}
|
|
self.tokens.get(c).map_or(SyntaxKind::Eof, |x| x.kind)
|
|
}
|
|
|
|
/// Consume the current token
|
|
fn consume(&mut self) {
|
|
let t = self.current_token();
|
|
self.builder.token(t.kind.into(), t.text);
|
|
self.cursor += 1;
|
|
}
|
|
|
|
/// Reports an error at the current token location
|
|
fn error(&mut self, e: impl Into<String>) {
|
|
let current_token = self.current_token();
|
|
#[allow(unused_mut)]
|
|
let mut span = crate::diagnostics::Span::new(current_token.offset);
|
|
#[cfg(feature = "proc_macro_span")]
|
|
{
|
|
span.span = current_token.span;
|
|
}
|
|
self.diags.push_error(e.into(), span);
|
|
}
|
|
|
|
type Checkpoint = rowan::Checkpoint;
|
|
fn checkpoint(&mut self) -> Self::Checkpoint {
|
|
self.builder.checkpoint()
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Eq, Ord, Hash, PartialEq, PartialOrd)]
|
|
pub enum Language {}
|
|
impl rowan::Language for Language {
|
|
type Kind = SyntaxKind;
|
|
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
|
|
SyntaxKind::try_from(raw.0).unwrap()
|
|
}
|
|
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
|
|
kind.into()
|
|
}
|
|
}
|
|
|
|
pub type SyntaxNode = rowan::SyntaxNode<Language>;
|
|
pub type SyntaxToken = rowan::SyntaxToken<Language>;
|
|
|
|
/// Helper functions to easily get the children of a given kind.
|
|
/// This traits is only supposed to be implemented on SyntaxNope
|
|
pub trait SyntaxNodeEx {
|
|
fn child_node(&self, kind: SyntaxKind) -> Option<SyntaxNode>;
|
|
fn child_token(&self, kind: SyntaxKind) -> Option<SyntaxToken>;
|
|
fn child_text(&self, kind: SyntaxKind) -> Option<String>;
|
|
}
|
|
|
|
impl SyntaxNodeEx for SyntaxNode {
|
|
fn child_node(&self, kind: SyntaxKind) -> Option<SyntaxNode> {
|
|
self.children().find(|n| n.kind() == kind)
|
|
}
|
|
fn child_token(&self, kind: SyntaxKind) -> Option<SyntaxToken> {
|
|
self.children_with_tokens().find(|n| n.kind() == kind).and_then(|x| x.into_token())
|
|
}
|
|
fn child_text(&self, kind: SyntaxKind) -> Option<String> {
|
|
self.children_with_tokens()
|
|
.find(|n| n.kind() == kind)
|
|
.and_then(|x| x.as_token().map(|x| x.text().to_string()))
|
|
}
|
|
}
|
|
|
|
/// Returns a span. This is implemented for tokens and nodes
|
|
pub trait Spanned {
|
|
fn span(&self) -> crate::diagnostics::Span;
|
|
}
|
|
|
|
impl Spanned for SyntaxNode {
|
|
fn span(&self) -> crate::diagnostics::Span {
|
|
crate::diagnostics::Span::new(self.text_range().start().into())
|
|
}
|
|
}
|
|
|
|
impl Spanned for SyntaxToken {
|
|
fn span(&self) -> crate::diagnostics::Span {
|
|
crate::diagnostics::Span::new(self.text_range().start().into())
|
|
}
|
|
}
|
|
|
|
// Actual parser
|
|
pub fn parse(source: &str) -> (SyntaxNode, Diagnostics) {
|
|
let mut p = DefaultParser::new(source);
|
|
document::parse_document(&mut p);
|
|
(SyntaxNode::new_root(p.builder.finish()), p.diags)
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
pub fn parse_tokens(tokens: Vec<Token>) -> (SyntaxNode, Diagnostics) {
|
|
let mut p = DefaultParser::from(tokens);
|
|
document::parse_document(&mut p);
|
|
(SyntaxNode::new_root(p.builder.finish()), p.diags)
|
|
}
|