dev: add link support of typlite (#452)

* dev: extend ast to work

* dev: support link

* dev: reimplement it

* dev: add nest testing

* dev: rearrange code

* dev: rearrange code
This commit is contained in:
Myriad-Dreamin 2024-07-23 20:40:55 +08:00 committed by GitHub
parent f8c4218534
commit 379359a19f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 471 additions and 119 deletions

2
Cargo.lock generated
View file

@ -4129,6 +4129,8 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
name = "typlite"
version = "0.11.16"
dependencies = [
"comemo 0.4.0",
"ecow 0.2.2",
"insta",
"typst-syntax 0.11.1",
]

View file

@ -12,6 +12,8 @@ repository.workspace = true
[dependencies]
typst-syntax.workspace = true
ecow.workspace = true
comemo.workspace = true
[dev-dependencies]
insta.workspace = true

View file

@ -1,8 +1,16 @@
//! # Typlite
mod library;
pub mod scopes;
mod value;
use scopes::Scopes;
use value::{ArgGetter, Value};
use std::borrow::Cow;
use std::fmt::Write;
use ecow::{eco_format, EcoString};
use typst_syntax::{
ast::{self, AstNode},
Source, SyntaxKind, SyntaxNode,
@ -13,7 +21,10 @@ type Result<T, Err = Cow<'static, str>> = std::result::Result<T, Err>;
/// Task builder for converting a typst document to Markdown.
#[derive(Debug, Clone)]
pub struct Typlite {
root: SyntaxNode,
/// The document to convert.
main: Source,
/// Whether to enable GFM (GitHub Flavored Markdown) features.
gfm: bool,
}
impl Typlite {
@ -23,31 +34,48 @@ impl Typlite {
/// use typlite::Typlite;
/// let content = "= Hello, World";
/// let res = Typlite::new_with_content(content).convert();
/// assert_eq!(res, Ok("# Hello, World".to_string()));
/// assert_eq!(res, Ok("# Hello, World".into()));
/// ```
pub fn new_with_content(content: &str) -> Self {
let root = typst_syntax::parse(content);
Self { root }
let main = Source::detached(content);
Self { main, gfm: false }
}
/// Create a new Typlite instance from a [`Source`].
///
/// This is useful when you have a [`Source`] instance and you can avoid
/// reparsing the content.
pub fn new_with_src(src: Source) -> Self {
let root = src.root().clone();
Self { root }
pub fn new_with_src(main: Source) -> Self {
Self { main, gfm: false }
}
/// Convert the content to a markdown string.
pub fn convert(self) -> Result<String> {
let mut res = String::new();
Self::convert_to(&self.root, &mut res)?;
pub fn convert(self) -> Result<EcoString> {
let mut res = EcoString::new();
let mut worker = TypliteWorker {
gfm: self.gfm,
scopes: library::library(),
};
worker.convert_to(self.main.root(), &mut res)?;
Ok(res)
}
}
struct TypliteWorker {
gfm: bool,
scopes: Scopes<Value>,
}
impl TypliteWorker {
pub fn convert(&mut self, node: &SyntaxNode) -> Result<EcoString> {
let mut res = EcoString::new();
self.convert_to(node, &mut res)?;
Ok(res)
}
/// Convert the content to a markdown string.
pub fn convert_to(node: &SyntaxNode, s: &mut String) -> Result<()> {
pub fn convert_to(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
use SyntaxKind::*;
match node.kind() {
RawLang | RawDelim | RawTrimmed => Err("converting clause")?,
@ -57,12 +85,22 @@ impl Typlite {
Eof | None => Ok(()),
// Non-leaf nodes
Math => Self::reduce(node, s),
Markup => Self::reduce(node, s),
Code => Self::reduce(node, s),
CodeBlock => Self::reduce(node, s),
ContentBlock => Self::reduce(node, s),
Parenthesized => Self::reduce(node, s),
Math => self.reduce(node, s),
Markup => self.reduce(node, s),
Code => self.reduce(node, s),
CodeBlock => {
let code_block: ast::CodeBlock = node.cast().unwrap();
self.convert_to(code_block.body().to_untyped(), s)
}
ContentBlock => {
let content_block: ast::ContentBlock = node.cast().unwrap();
self.convert_to(content_block.body().to_untyped(), s)
}
Parenthesized => {
let parenthesized: ast::Parenthesized = node.cast().unwrap();
self.convert_to(parenthesized.expr().to_untyped(), s)
}
// Text nodes
Text | Space | Linebreak | Parbreak => Self::str(node, s),
@ -71,20 +109,20 @@ impl Typlite {
Escape => Self::escape(node, s),
Shorthand => Self::shorthand(node, s),
SmartQuote => Self::str(node, s),
Strong => Self::strong(node, s),
Emph => Self::emph(node, s),
Strong => self.strong(node, s),
Emph => self.emph(node, s),
Raw => Self::raw(node, s),
Link => Self::link(node, s),
Link => self.link(node, s),
Label => Self::label(node, s),
Ref => Self::label_ref(node, s),
RefMarker => Self::ref_marker(node, s),
Heading => Self::heading(node, s),
Heading => self.heading(node, s),
HeadingMarker => Self::str(node, s),
ListItem => Self::list_item(node, s),
ListItem => self.list_item(node, s),
ListMarker => Self::str(node, s),
EnumItem => Self::enum_item(node, s),
EnumItem => self.enum_item(node, s),
EnumMarker => Self::str(node, s),
TermItem => Self::term_item(node, s),
TermItem => self.term_item(node, s),
TermMarker => Self::str(node, s),
Equation => Self::equation(node, s),
MathIdent => Self::str(node, s),
@ -154,11 +192,22 @@ impl Typlite {
Include => Self::str(node, s),
As => Self::str(node, s),
LetBinding => self.let_binding(node, s),
FieldAccess => self.field_access(node, s),
FuncCall => Self::absorb(self.func_call(node), s),
Contextual => self.contextual(node, s),
// Clause nodes
Named => Ok(()),
Keyed => Ok(()),
Unary => Ok(()),
Binary => Ok(()),
Spread => Ok(()),
ImportItems => Ok(()),
RenamedImportItem => Ok(()),
Closure => Ok(()),
Args => Ok(()),
Params => Ok(()),
// Ignored code expressions
Ident => Ok(()),
@ -171,28 +220,20 @@ impl Typlite {
Dict => Ok(()),
// Ignored code expressions
FieldAccess => Ok(()),
FuncCall => Ok(()),
Args => Ok(()),
Spread => Ok(()),
Closure => Ok(()),
Params => Ok(()),
LetBinding => Ok(()),
SetRule => Ok(()),
ShowRule => Ok(()),
Contextual => Ok(()),
Destructuring => Ok(()),
DestructAssignment => Ok(()),
Conditional => Ok(()),
WhileLoop => Ok(()),
ForLoop => Ok(()),
ModuleImport => Ok(()),
ImportItems => Ok(()),
RenamedImportItem => Ok(()),
ModuleInclude => Ok(()),
LoopBreak => Ok(()),
LoopContinue => Ok(()),
FuncReturn => Ok(()),
Destructuring => Ok(()),
DestructAssignment => Ok(()),
ModuleImport => Ok(()),
ModuleInclude => Ok(()),
// Ignored comments
LineComment => Ok(()),
@ -200,61 +241,74 @@ impl Typlite {
}
}
fn reduce(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn reduce(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
for child in node.children() {
Self::convert_to(child, s)?;
self.convert_to(child, s)?;
}
Ok(())
}
fn char(arg: char, s: &mut String) -> Result<()> {
fn absorb(u: Result<EcoString>, v: &mut EcoString) -> Result<()> {
v.push_str(&u?);
Ok(())
}
fn char(arg: char, s: &mut EcoString) -> Result<()> {
s.push(arg);
Ok(())
}
fn str(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn str(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
s.push_str(node.clone().into_text().as_str());
Ok(())
}
fn escape(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn value(res: Value) -> EcoString {
let Value::Content(content) = res else {
return eco_format!("{res:?}");
};
content
}
fn escape(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
// todo: escape characters
Self::str(node, s)
}
fn shorthand(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn shorthand(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
// todo: shorthands
Self::str(node, s)
}
fn strong(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn strong(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let strong = node.cast::<ast::Strong>().unwrap();
s.push_str("**");
Self::convert_to(strong.body().to_untyped(), s)?;
self.convert_to(strong.body().to_untyped(), s)?;
s.push_str("**");
Ok(())
}
fn emph(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn emph(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let emph = node.cast::<ast::Emph>().unwrap();
s.push('_');
Self::convert_to(emph.body().to_untyped(), s)?;
self.convert_to(emph.body().to_untyped(), s)?;
s.push('_');
Ok(())
}
fn heading(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn heading(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let heading = node.cast::<ast::Heading>().unwrap();
let level = heading.depth();
for _ in 0..level.get() {
s.push('#');
}
s.push(' ');
Self::convert_to(heading.body().to_untyped(), s)
self.convert_to(heading.body().to_untyped(), s)
}
fn raw(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn raw(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let raw = node.cast::<ast::Raw>().unwrap();
if raw.block() {
return Self::str(node, s);
@ -267,42 +321,53 @@ impl Typlite {
Ok(())
}
fn link(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn link(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
// GFM supports autolinks
if self.gfm {
return Self::str(node, s);
}
s.push('[');
Self::str(node, s)?;
s.push(']');
s.push('(');
Self::str(node, s)?;
s.push(')');
Ok(())
}
fn label(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
Self::str(node, s)
}
fn label(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn label_ref(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
Self::str(node, s)
}
fn label_ref(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn ref_marker(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
Self::str(node, s)
}
fn ref_marker(node: &SyntaxNode, s: &mut String) -> Result<()> {
Self::str(node, s)
fn list_item(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
self.reduce(node, s)
}
fn list_item(node: &SyntaxNode, s: &mut String) -> Result<()> {
Self::reduce(node, s)
}
fn enum_item(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn enum_item(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let enum_item = node.cast::<ast::EnumItem>().unwrap();
if let Some(num) = enum_item.number() {
write!(s, "{num}. ").map_err(|_| "cannot write enum item number")?;
} else {
s.push_str("1. ");
}
Self::convert_to(enum_item.body().to_untyped(), s)
self.convert_to(enum_item.body().to_untyped(), s)
}
fn term_item(node: &SyntaxNode, s: &mut String) -> Result<()> {
Self::reduce(node, s)
fn term_item(&mut self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
self.reduce(node, s)
}
#[cfg(not(feature = "texmath"))]
fn equation(node: &SyntaxNode, s: &mut String) -> Result<()> {
fn equation(node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let equation = node.cast::<ast::Equation>().unwrap();
#[rustfmt::skip]
@ -315,60 +380,44 @@ impl Typlite {
Ok(())
}
fn let_binding(&self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let _ = node;
let _ = s;
Ok(())
}
fn field_access(&self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let _ = node;
let _ = s;
Ok(())
}
fn func_call(&mut self, node: &SyntaxNode) -> Result<EcoString> {
let c: ast::FuncCall = node.cast().unwrap();
let callee = match c.callee() {
ast::Expr::Ident(callee) => self.scopes.get(callee.get()),
ast::Expr::FieldAccess(..) => return Ok(EcoString::new()),
_ => return Ok(EcoString::new()),
}?;
let Value::RawFunc(func) = callee else {
return Err("callee is not a function")?;
};
Ok(Self::value(func(ArgGetter::new(self, c.args()))?))
}
fn contextual(&self, node: &SyntaxNode, s: &mut EcoString) -> Result<()> {
let _ = node;
let _ = s;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn conv(s: &str) -> String {
Typlite::new_with_content(s.trim()).convert().unwrap()
}
#[test]
fn test_converted() {
insta::assert_snapshot!(conv(r###"
= Hello, World!
This is a typst document.
"###), @r###"
# Hello, World!
This is a typst document.
"###);
insta::assert_snapshot!(conv(r###"
Some inlined raw `a`, ```c b```
"###), @"Some inlined raw `a`, `b`");
insta::assert_snapshot!(conv(r###"
- Some *item*
- Another _item_
"###), @r###"
- Some **item**
- Another _item_
"###);
insta::assert_snapshot!(conv(r###"
+ A
+ B
"###), @r###"
1. A
1. B
"###);
insta::assert_snapshot!(conv(r###"
2. A
+ B
"###), @r###"
2. A
1. B
"###);
#[cfg(not(feature = "texmath"))]
insta::assert_snapshot!(conv(r###"
$
1/2 + 1/3 = 5/6
$
"###), @r###"
```typ
$
1/2 + 1/3 = 5/6
$
```
"###);
}
}
mod tests;

View file

@ -0,0 +1,17 @@
use super::*;
use ecow::eco_format;
use value::*;
pub fn library() -> Scopes<Value> {
let mut scopes = Scopes::new();
scopes.define("link", link as RawFunc);
scopes
}
/// Evaluate a link to markdown-format string.
pub fn link(mut args: ArgGetter) -> Result<Value> {
let dest = get_pos_named!(args, dest: EcoString);
let body = get_pos_named!(args, body: Content);
Ok(Value::Content(eco_format!("[{body}]({dest})")))
}

View file

@ -0,0 +1,105 @@
//! Variable scopes.
use super::Result;
use std::{borrow::Cow, collections::HashMap};
/// A single scope.
#[derive(Debug, Clone)]
pub struct Scope<T> {
map: HashMap<String, T>,
}
impl<T> Default for Scope<T> {
fn default() -> Self {
Self::new()
}
}
impl<T> Scope<T> {
/// Create a new, empty scope.
pub fn new() -> Self {
Self {
map: HashMap::new(),
}
}
/// Define a variable in this scope.
pub fn define(&mut self, name: String, val: T) {
self.map.insert(name, val);
}
/// Try to access a variable immutably.
pub fn get(&self, var: &str) -> Option<&T> {
self.map.get(var)
}
/// Try to access a variable mutably.
pub fn get_mut(&mut self, var: &str) -> Option<&mut T> {
self.map.get_mut(var)
}
}
/// A stack of scopes.
#[derive(Debug, Default, Clone)]
pub struct Scopes<T> {
/// The active scope.
pub top: Scope<T>,
/// The stack of lower scopes.
pub scopes: Vec<Scope<T>>,
}
impl<T> Scopes<T> {
/// Create a new, empty hierarchy of scopes.
pub fn new() -> Self {
Self {
top: Scope::new(),
scopes: vec![],
}
}
/// Enter a new scope.
pub fn enter(&mut self) {
self.scopes.push(std::mem::take(&mut self.top));
}
/// Exit the topmost scope.
///
/// This panics if no scope was entered.
pub fn exit(&mut self) {
self.top = self.scopes.pop().expect("no pushed scope");
}
/// Try to access a variable immutably.
pub fn get(&self, var: &str) -> Result<&T> {
std::iter::once(&self.top)
.chain(self.scopes.iter().rev())
.find_map(|scope| scope.get(var))
.ok_or_else(|| unknown_variable(var))
}
/// Try to access a variable immutably in math.
pub fn get_in_math(&self, var: &str) -> Result<&T> {
std::iter::once(&self.top)
.chain(self.scopes.iter().rev())
.find_map(|scope| scope.get(var))
.ok_or_else(|| unknown_variable(var))
}
/// Try to access a variable mutably.
pub fn get_mut(&mut self, var: &str) -> Result<&mut T> {
std::iter::once(&mut self.top)
.chain(&mut self.scopes.iter_mut().rev())
.find_map(|scope| scope.get_mut(var))
.ok_or_else(|| unknown_variable(var))
}
/// Define a variable in the current scope.
pub fn define(&mut self, arg: &str, v: impl Into<T>) {
self.top.define(arg.to_string(), v.into());
}
}
/// The error message when a variable is not found.
fn unknown_variable(var: &str) -> Cow<'static, str> {
Cow::Owned(format!("unknown variable: {var}"))
}

View file

@ -0,0 +1,54 @@
mod link;
use super::*;
fn conv(s: &str) -> EcoString {
Typlite::new_with_content(s.trim()).convert().unwrap()
}
#[test]
fn test_converted() {
insta::assert_snapshot!(conv(r###"
= Hello, World!
This is a typst document.
"###), @r###"
# Hello, World!
This is a typst document.
"###);
insta::assert_snapshot!(conv(r###"
Some inlined raw `a`, ```c b```
"###), @"Some inlined raw `a`, `b`");
insta::assert_snapshot!(conv(r###"
- Some *item*
- Another _item_
"###), @r###"
- Some **item**
- Another _item_
"###);
insta::assert_snapshot!(conv(r###"
+ A
+ B
"###), @r###"
1. A
1. B
"###);
insta::assert_snapshot!(conv(r###"
2. A
+ B
"###), @r###"
2. A
1. B
"###);
#[cfg(not(feature = "texmath"))]
insta::assert_snapshot!(conv(r###"
$
1/2 + 1/3 = 5/6
$
"###), @r###"
```typ
$
1/2 + 1/3 = 5/6
$
```
"###);
}

View file

@ -0,0 +1,18 @@
use super::*;
#[test]
fn test_simple() {
insta::assert_snapshot!(conv(r###"
https://example.com
"###), @"[https://example.com](https://example.com)");
insta::assert_snapshot!(conv(r###"
#link("https://example.com")[Content]
"###), @"[Content](https://example.com)");
}
#[test]
fn test_nested() {
insta::assert_snapshot!(conv(r###"
#link("https://example.com")[Reverse *the World*]
"###), @"[Reverse **the World**](https://example.com)");
}

105
crates/typlite/src/value.rs Normal file
View file

@ -0,0 +1,105 @@
use core::fmt;
use crate::*;
pub type RawFunc = fn(ArgGetter) -> Result<Value>;
#[derive(Debug)]
pub enum Value {
RawFunc(RawFunc),
Content(EcoString),
}
impl From<RawFunc> for Value {
fn from(func: RawFunc) -> Self {
Self::RawFunc(func)
}
}
pub struct Content(pub EcoString);
impl fmt::Display for Content {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
pub struct ArgGetter<'a> {
pub worker: &'a mut TypliteWorker,
pub args: ast::Args<'a>,
pub pos: Vec<&'a SyntaxNode>,
}
impl<'a> ArgGetter<'a> {
pub fn new(worker: &'a mut TypliteWorker, args: ast::Args<'a>) -> Self {
let pos = args
.items()
.filter_map(|item| match item {
ast::Arg::Pos(pos) => Some(pos.to_untyped()),
_ => None,
})
.rev()
.collect();
Self { worker, args, pos }
}
pub fn get(&mut self, key: &str) -> Result<&'a SyntaxNode> {
// find named
for item in self.args.items() {
if let ast::Arg::Named(named) = item {
if named.name().get() == key {
return Ok(named.expr().to_untyped());
}
}
}
// find positional
Ok(self
.pos
.pop()
.ok_or_else(|| format!("missing positional arguments: {key}"))?)
}
pub fn parse<T: Eval<'a>>(&mut self, node: &'a SyntaxNode) -> Result<T> {
T::eval(node, self.worker)
}
}
// [attr] key: ty
macro_rules! get_pos_named {
(
$args:expr,
$key:ident: $ty:ty
) => {{
let raw = $args.get(stringify!($key))?;
$args.parse::<$ty>(raw)?
}};
}
pub(crate) use get_pos_named;
/// Evaluate an expression.
pub trait Eval<'a>: Sized {
/// Evaluate the expression to the output value.
fn eval(node: &'a SyntaxNode, vm: &mut TypliteWorker) -> Result<Self>;
}
impl<'a> Eval<'a> for &'a SyntaxNode {
fn eval(node: &'a SyntaxNode, _vm: &mut TypliteWorker) -> Result<Self> {
Ok(node)
}
}
impl<'a> Eval<'a> for EcoString {
fn eval(node: &'a SyntaxNode, _vm: &mut TypliteWorker) -> Result<Self> {
let node: ast::Str = node
.cast()
.ok_or_else(|| format!("expected string, found {:?}", node.kind()))?;
Ok(node.get())
}
}
impl<'a> Eval<'a> for Content {
fn eval(node: &'a SyntaxNode, vm: &mut TypliteWorker) -> Result<Self> {
Ok(Self(vm.convert(node)?))
}
}