biome/xtask/codegen/markdown.ungram
2025-03-13 16:13:06 +00:00

240 lines
4.5 KiB
Text

// Markdown Un-Grammar.
//
// This grammar specifies the structure of Rust's concrete syntax tree.
// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
// Tokens are processed -- contextual keywords are recognised, compound operators glued.
//
// Legend:
//
// // -- comment
// Name = -- non-terminal definition
// 'ident' -- token (terminal)
// A B -- sequence
// A | B -- alternation
// A* -- zero or more repetition
// (A (',' A)* ','?) -- repetition of node A separated by ',' and allowing a trailing comma
// (A (',' A)*) -- repetition of node A separated by ',' without a trailing comma
// A? -- zero or one repetition
// (A) -- same as A
// label:A -- suggested name for field of AST node
// NOTES
//
// - SyntaxNode, SyntaxToken and SyntaxElement will be stripped from the codegen
// - Bogus nodes are special nodes used to keep track of broken code; they are
// not part of the grammar but they will appear inside the green tree
///////////////
// BOGUS NODES
///////////////
// SyntaxElement is a generic data structure that is meant to track nodes and tokens
// in cases where we care about both types
//
// As Bogus* node will need to yield both tokens and nodes without discrimination,
// and their children will need to yield nodes and tokens as well.
// For this reason, SyntaxElement = SyntaxElement
SyntaxElement = SyntaxElement
// Needed by the infrastructure
MdBogus = SyntaxElement*
MdDocument =
bom: 'UNICODE_BOM'?
value: MdBlockList
eof: 'EOF'
MdBlockList = AnyMdBlock*
AnyMdBlock =
AnyLeafBlock
| AnyContainerBlock
AnyLeafBlock =
MdThematicBreakBlock
| MdHeader
| MdSetextHeader
| AnyCodeBlock
| MdHtmlBlock
| MdLinkBlock
| MdParagraph
AnyContainerBlock =
MdQuote
| MdBulletListItem
| MdOrderListItem
// ## Lorem
// ^^^^^^^^
MdHeader =
before: MdHashList
content: MdParagraph?
after: MdHashList
MdHashList = MdHash*
MdHash = '#'
// setext title
// foo
// ===
// bar
// ---
MdSetextHeader = MdParagraph
// indented code blocks & fenced code blocks
AnyCodeBlock =
MdIndentCodeBlock
| MdFencedCodeBlock
// code
// ^^^^^^^^
// The space before "code" is intentional.
MdIndentCodeBlock =
lines: MdIndentedCodeLineList
MdIndentedCodeLineList = MdIndentedCodeLine*
MdIndentedCodeLine =
indentation: MdIndent
content: MdTextual
// ```shell
//
// ```
MdFencedCodeBlock =
l_fence: '```'
code_list: MdCodeNameList
l_hard_line: MdHardLine
content: MdTextual
r_hard_line: MdHardLine
r_fence: '```'
MdCodeNameList = (MdTextual (',' MdTextual)*)
// html block
MdHtmlBlock = MdTextual
MdLinkBlock = label: MdTextual
url: MdTextual
title: MdTextual?
MdQuote = AnyMdBlock
MdBulletListItem = MdBulletList
MdOrderListItem = MdBulletList
MdBulletList = MdBullet*
// - Hey!
// ^^^^^^
MdBullet =
bullet: ('-' | '*')
space: 'md_textual_literal'
content: MdInlineItemList
MdOrderList = AnyCodeBlock*
// Any block paragraph
//
// Another block paragraph
MdParagraph =
list: MdInlineItemList
hard_line: MdHardLine
MdInlineItemList = AnyMdInline*
AnyMdInline =
MdInlineCode
| MdInlineEmphasis
| MdInlineItalic
| MdInlineLink
| MdInlineImage
| MdHtmlBlock
| MdHardLine
| MdSoftBreak
| MdTextual
// *italic*
// ^^^^^^^^
MdInlineItalic =
l_fence: ('*' | '_')
content: MdInlineItemList
r_fence: ('*' | '_')
// **bold**
// ^^^^^^^^
MdInlineEmphasis =
l_fence: ('**' | '__')
content: MdInlineItemList
r_fence: ('**' | '__')
// `code`
// ^^^^^^
MdInlineCode =
l_tick: '`'
content: MdInlineItemList
r_tick: '`'
// [text](href)
// ^^^^^^^^^^^^
MdInlineLink =
'['
text: MdInlineItemList
']'
'('
source: MdInlineItemList
')'
// [![alt](image)](link)
// ^^^^^^^^^^^^^^^^^^^^^
MdInlineImage =
'['
'!'
alt: MdInlineImageAlt
source: MdInlineImageSource
']'
link: MdInlineImageLink?
// [![alt](image)](link)
// ^^^^^
MdInlineImageAlt =
'['
content: MdInlineItemList
']'
// [![alt](image)](link)
// ^^^^^^^
MdInlineImageSource =
'('
content: MdInlineItemList
')'
// [![alt](image)](link)
// ^^^^^^
MdInlineImageLink =
'('
content: MdInlineItemList
')'
// ***
// ---
// ___
// https://spec.commonmark.org/0.31.2/#container-blocks-and-leaf-blocks
MdThematicBreakBlock = value: 'md_thematic_break_literal'
MdHardLine = value: 'md_hard_line_literal'
MdSoftBreak = value: 'md_soft_break_literal'
MdTextual = value: 'md_textual_literal'
MdIndent = value: 'md_indent_chunk_literal'