biome/xtask/codegen/html.ungram
Emanuele Stoppa 1a27277a48
feat(html/parser): text expressions (#6802)
Co-authored-by: mdevils <176898+mdevils@users.noreply.github.com>
Co-authored-by: dyc3 <1808807+dyc3@users.noreply.github.com>
2025-07-09 21:30:06 +01:00

176 lines
3.8 KiB
Text

// HTML Un-Grammar.
//
// This grammar specifies the structure of Rust's concrete syntax tree.
// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
// Tokens are processed -- contextual keywords are recognised, compound operators glued.
//
// Legend:
//
// // -- comment
// Name = -- non-terminal definition
// 'ident' -- token (terminal)
// A B -- sequence
// A | B -- alternation
// A* -- zero or more repetition
// (A (',' A)* ','?) -- repetition of node A separated by ',' and allowing a trailing comma
// (A (',' A)*) -- repetition of node A separated by ',' without a trailing comma
// A? -- zero or one repetition
// (A) -- same as A
// label:A -- suggested name for field of AST node
// NOTES
//
// - SyntaxNode, SyntaxToken and SyntaxElement will be stripped from the codegen
// - Bogus nodes are special nodes used to keep track of broken code; they are
// not part of the grammar but they will appear inside the green tree
///////////////
// BOGUS NODES
///////////////
// SyntaxElement is a generic data structure that is meant to track nodes and tokens
// in cases where we care about both types
//
// As Bogus* node will need to yield both tokens and nodes without discrimination,
// and their children will need to yield nodes and tokens as well.
// For this reason, SyntaxElement = SyntaxElement
SyntaxElement = SyntaxElement
HtmlBogus = SyntaxElement*
HtmlBogusElement = SyntaxElement*
HtmlBogusAttribute = SyntaxElement*
AstroBogusFrontmatter = SyntaxElement*
HtmlRoot =
bom: 'UNICODE_BOM'?
frontmatter: AnyAstroFrontmatterElement?
directive: HtmlDirective?
html: HtmlElementList
eof: 'EOF'
// <!DOCTYPE html>
// ^^^^^^^^^^^^^^^
HtmlDirective =
'<'
'!'
doctype: 'doctype'
html: 'html'?
quirk: 'html_literal'?
public_id: 'html_string_literal'?
system_id: 'html_string_literal'?
'>'
// ==================================
// Elements (AKA tags)
// ==================================
HtmlElementList = AnyHtmlElement*
AnyHtmlElement =
HtmlSelfClosingElement
| HtmlElement
| AnyHtmlContent
| HtmlComment
| HtmlCdataSection
| HtmlBogusElement
AnyHtmlContent =
HtmlContent
| AnyHtmlTextExpression
AnyHtmlTextExpression =
HtmlTextExpression
| SvelteTextExpression
// {{ expression }}
// ^^^^^^^^^^^^^^^^
HtmlTextExpression =
'{{'
expression: 'html_literal'
'}}'
// {expression}
// ^^^^^^^^^^^^
SvelteTextExpression =
'{'
expression: 'html_literal'
'}'
// <a />
HtmlSelfClosingElement =
'<'
name: HtmlTagName
attributes: HtmlAttributeList
'/'?
'>'
HtmlElement =
opening_element: HtmlOpeningElement
children: HtmlElementList
closing_element: HtmlClosingElement
// <a href="">
// ^^ ^
HtmlOpeningElement =
'<'
name: HtmlTagName
attributes: HtmlAttributeList
'>'
// </a>
HtmlClosingElement =
'<'
'/'
name: HtmlTagName
'>'
// <!-- comment -->
HtmlComment =
'<!--'
content: 'html_literal'
'-->'
// <![CDATA[example]]>
// Reference: https://html.spec.whatwg.org/multipage/syntax.html#cdata-sections
HtmlCdataSection =
'<![CDATA['
content: 'html_literal'
']]>'
AnyAstroFrontmatterElement =
AstroFrontmatterElement
| AstroBogusFrontmatter
AstroFrontmatterElement =
l_fence: '---'
content: 'html_literal'?
r_fence: '---'
// ==================================
// Attributes
// ==================================
HtmlAttributeList = AnyHtmlAttribute*
AnyHtmlAttribute =
HtmlAttribute
| HtmlBogusAttribute
HtmlAttribute =
name: HtmlAttributeName
initializer: HtmlAttributeInitializerClause?
// <a href="">
// ^^^
HtmlAttributeInitializerClause =
'='
value: HtmlString
HtmlString = value: 'html_string_literal'
HtmlTagName = value: 'html_literal'
HtmlAttributeName = value: 'html_literal'
HtmlContent = value: 'html_literal'