From f35d1056ffa909b7fe834e7faa8aa91888d49a00 Mon Sep 17 00:00:00 2001 From: Myriad-Dreamin <35292584+Myriad-Dreamin@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:12:25 +0800 Subject: [PATCH] feat: reimplement typlite by html export (#1684) * dev: init markdown file * dev: typlite element derive * feat: pass tests refactor lib.rs to separated files (#1692) feat(typlite): Docx export and export markdown in cmark-writer (#1698) * feat: docx export support * refactor: simplify DocxConverter structure and improve content handling * tests: add binary insta for docx * feat: add MathBlock style and improve frame rendering in DocxConverter * fix: enhance paragraph creation(silly method) * fix: enhance math equation rendering * use md5 instead of docx binary * feat: enhance list numbering and paragraph handling in DocxConverter * feat: add all_elements test * refactor * reimpl md export in cmark-writer * feat: add support for highlight tag in MarkdownConverter * feat: refactor LaTeXConverter to improve element processing and add new methods * fmt * Refactor DOCX converter to improve list handling and document structure - Introduced separate methods for creating ordered and unordered list numbering. - Enhanced list management by tracking next numbering IDs. - Consolidated paragraph and run management within the DocxConverter. - Improved image processing with better error handling and placeholder support. - Streamlined the handling of various HTML elements, including headings, lists, and images. - Added functionality for processing captions and preformatted blocks. - Updated methods for processing inline styles and links. * feat: update cmark-writer to version 0.2.0 * feat: refactor code block handling in DOCX converter for improved readability * refactor: refactor DOCX converter to enhance document structure * refactor docx to separated files * update instas * fmt * chore: update cmark-writer version to 0.3.0 * fix: ol custom value * feat: table and grid processing * use cmark-writer's ast node for consistency * fix: update snapshot hashes for document generation tests * fix: add preamble * update snapshot hashes * refactor DOCX conversion: Split writer functionality into separate module, enhance image processing, and clean up utility functions * update comments in LaTeX and Markdown converters for clarity and consistency * fmt * delete utils * feat: support figure node by custom node in cmark-writer * fix * fix: frame * feat: enhance table conversion logic in MarkdownConverter * refactor: simplify FigureNode implementation by removing CustomNode trait * chore: update cmark-writer to version 0.5.0 * fix: update figure and raw inline snapshots for consistency * fix: update snapshot hashes and correct caption reference in markdown.typ * refactor proj structure * feat: update CompileArgs to support multiple output files and remove debug option * docs: update README to clarify usage of multiple output formats and comment out feature section * remove DocxConverter module * feat: impl assets-path feature and add ExternalFrameNode for handling external frames and update writers to support it * feat: enhance HTML element conversion to include attributes and children handling * feat: update cmark-writer to 0.6.1 and refactor related code * fix: update snapshots for figure caption, list, outline, and docx generation * feat: refactor HTML element conversion to use create_html_element method and enhance table processing * fix * feat: add HighlightNode for highlighted text and integrate with HTML to AST parser and LaTeX writer * refactoor * update tests Co-Authored-By: Hong Jiarong * feat: revert latex/docx conversions * fix: warnings * bad: convert docs * build: remove other cargo deps * build: update cargo.lock * test: update snapshot * chore: remove useless parser trait * feat: annotate v1 * feat: annotate v2 * test: update snapshot * question: is it a bug? * test: update bad snapshot --------- Co-authored-by: Hong Jiarong --- Cargo.lock | 23 + crates/tinymist-analysis/src/docs/tidy.rs | 18 +- crates/tinymist-derive/src/lib.rs | 54 + .../fixtures/docs/snaps/docs@blocky2.typ.snap | 3 +- .../docs/snaps/docs@multiple_line.typ.snap | 3 +- .../snaps/test@annotate_dict_param.typ.snap | 3 +- .../snaps/test@annotate_dict_param2.typ.snap | 3 +- .../snaps/test@annotate_docs_error.typ.snap | 2 +- .../hover/snaps/test@module_alias.typ.snap | 3 +- .../hover/snaps/test@module_path.typ.snap | 3 +- .../hover/snaps/test@module_var.typ.snap | 3 +- .../hover/snaps/test@render_equation.typ.snap | 2 +- .../test@render_equation_no_html.typ.snap | 2 +- crates/tinymist-query/src/tests.rs | 7 +- crates/typlite/Cargo.toml | 3 + crates/typlite/README.md | 8 +- crates/typlite/src/attributes.rs | 121 +++ crates/typlite/src/common.rs | 110 ++ crates/typlite/src/error.rs | 31 +- .../typlite/src/fixtures/docs/nest_list.typ | 10 +- .../snaps/convert_docs@nest_list.typ.snap | 28 +- .../docs/snaps/convert_docs@tidy.typ.snap | 32 +- crates/typlite/src/fixtures/docs/tidy.typ | 24 +- .../integration/snaps/convert@base.typ.snap | 13 +- .../integration/snaps/convert@enum.typ.snap | 12 +- .../integration/snaps/convert@enum2.typ.snap | 12 +- .../snaps/convert@figure_caption.typ.snap | 14 +- .../snaps/convert@figure_image.typ.snap | 10 + .../snaps/convert@figure_image_alt.typ.snap | 10 + .../integration/snaps/convert@image.typ.snap | 10 + .../snaps/convert@image_alt.typ.snap | 10 + .../integration/snaps/convert@link.typ.snap | 10 + .../integration/snaps/convert@link2.typ.snap | 10 + .../integration/snaps/convert@link3.typ.snap | 10 + .../integration/snaps/convert@list.typ.snap | 10 + .../snaps/convert@math_block.typ.snap | 12 +- .../snaps/convert@math_block2.typ.snap | 12 +- .../snaps/convert@math_inline.typ.snap | 12 +- .../snaps/convert@outline.typ.snap | 75 +- .../snaps/convert@raw_inline.typ.snap | 10 + .../integration/snaps/convert@table.typ.snap | 21 +- crates/typlite/src/lib.rs | 946 +++--------------- crates/typlite/src/library.rs | 2 + crates/typlite/src/main.rs | 100 +- crates/typlite/src/markdown-typst.toml | 5 + crates/typlite/src/markdown.typ | 151 +++ crates/typlite/src/parser/core.rs | 293 ++++++ crates/typlite/src/parser/inline.rs | 98 ++ crates/typlite/src/parser/list.rs | 88 ++ crates/typlite/src/parser/media.rs | 74 ++ crates/typlite/src/parser/mod.rs | 9 + crates/typlite/src/parser/table.rs | 190 ++++ crates/typlite/src/tags.rs | 43 + crates/typlite/src/tests.rs | 51 +- crates/typlite/src/value.rs | 6 + crates/typlite/src/worker.rs | 824 +++++++++++++++ crates/typlite/src/writer/markdown.rs | 31 + crates/typlite/src/writer/mod.rs | 25 + 58 files changed, 2755 insertions(+), 950 deletions(-) create mode 100644 crates/typlite/src/attributes.rs create mode 100644 crates/typlite/src/common.rs create mode 100644 crates/typlite/src/markdown-typst.toml create mode 100644 crates/typlite/src/markdown.typ create mode 100644 crates/typlite/src/parser/core.rs create mode 100644 crates/typlite/src/parser/inline.rs create mode 100644 crates/typlite/src/parser/list.rs create mode 100644 crates/typlite/src/parser/media.rs create mode 100644 crates/typlite/src/parser/mod.rs create mode 100644 crates/typlite/src/parser/table.rs create mode 100644 crates/typlite/src/tags.rs create mode 100644 crates/typlite/src/worker.rs create mode 100644 crates/typlite/src/writer/markdown.rs create mode 100644 crates/typlite/src/writer/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 2858f102..cbd2243f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -579,6 +579,26 @@ dependencies = [ "roff", ] +[[package]] +name = "cmark-writer" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79ac3be73d18979a75f69c6f73bae74d2ff9fa61e9c3539732040429ad2659b8" +dependencies = [ + "cmark-writer-macros", +] + +[[package]] +name = "cmark-writer-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23ad8476b03e933c3ff38eefa7c96a9c155e4402621918852f27cf67cb637b99" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "cobs" version = "0.2.3" @@ -4743,15 +4763,18 @@ version = "0.13.12" dependencies = [ "base64", "clap", + "cmark-writer", "comemo", "ecow", "insta", "regex", "tinymist-analysis", + "tinymist-derive", "tinymist-project", "tinymist-std", "tinymist-tests", "typst", + "typst-html", "typst-svg", "typst-syntax", ] diff --git a/crates/tinymist-analysis/src/docs/tidy.rs b/crates/tinymist-analysis/src/docs/tidy.rs index 6cad9bfa..3e8eaac0 100644 --- a/crates/tinymist-analysis/src/docs/tidy.rs +++ b/crates/tinymist-analysis/src/docs/tidy.rs @@ -47,7 +47,11 @@ pub fn identify_pat_docs(converted: &str) -> StrResult { loop { if matching_return_ty { matching_return_ty = false; - let Some(w) = line.trim_start().strip_prefix("->") else { + let line = line.trim_start(); + let type_line = line + .strip_prefix("-\\>") + .or_else(|| line.strip_prefix("->")); + let Some(w) = type_line else { // break_line = Some(i); continue; }; @@ -204,7 +208,7 @@ mod tests { - `types` (optional): A list of accepted argument types. - `default` (optional): Default value for this argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. - content (string): Content of `.typ` file to analyze for docstrings. - name (string): The name for the module. @@ -223,7 +227,7 @@ See @@show-module() for outputting the results of this function. - `types` (optional): A list of accepted argument types. - `default` (optional): Default value for this argument. - See @@show-module() for outputting the results of this function. + See show-module() for outputting the results of this function. << docs >>return string @@ -258,7 +262,7 @@ See @@show-module() for outputting the results of this function. insta::assert_snapshot!(func(r###"These again are dictionaries with the keys - `description` (optional): The description for the argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. - name (string): The name for the module. - label-prefix (auto, string): The label-prefix for internal function @@ -270,7 +274,7 @@ See @@show-module() for outputting the results of this function. These again are dictionaries with the keys - `description` (optional): The description for the argument. - See @@show-module() for outputting the results of this function. + See show-module() for outputting the results of this function. << docs >>return string @@ -289,10 +293,10 @@ See @@show-module() for outputting the results of this function. #[test] fn test_identify_tidy_docs3() { - insta::assert_snapshot!(var(r###"See @@show-module() for outputting the results of this function. + insta::assert_snapshot!(var(r###"See show-module() for outputting the results of this function. -> string"###), @r" >> docs: - See @@show-module() for outputting the results of this function. + See show-module() for outputting the results of this function. << docs >>return string diff --git a/crates/tinymist-derive/src/lib.rs b/crates/tinymist-derive/src/lib.rs index 91928ea7..fac13f12 100644 --- a/crates/tinymist-derive/src/lib.rs +++ b/crates/tinymist-derive/src/lib.rs @@ -96,3 +96,57 @@ pub fn gen_decl_enum(input: TokenStream) -> TokenStream { TokenStream::from(expanded) } + +#[proc_macro_derive(TypliteAttr)] +pub fn gen_typlite_element(input: TokenStream) -> TokenStream { + // Parse the input tokens into a syntax tree + let input = parse_macro_input!(input as DeriveInput); + + // extract the fields from the struct + let field_parsers = match &input.data { + syn::Data::Struct(data) => match &data.fields { + syn::Fields::Named(fields) => fields + .named + .iter() + .map(|f| { + let name = f.ident.as_ref().unwrap(); + + let ty = &f.ty; + + quote! { + md_attr::#name => { + let value = <#ty>::parse_attr(content)?; + result.#name = value; + } + } + }) + .collect::>(), + syn::Fields::Unnamed(_) => panic!("unnamed fields are not supported"), + syn::Fields::Unit => panic!("unit structs are not supported"), + }, + _ => panic!("only structs are supported"), + }; + + let input_name = &input.ident; + + // generate parse trait + let expanded = quote! { + impl TypliteAttrsParser for #input_name { + fn parse(attrs: &HtmlAttrs) -> Result { + let mut result = Self::default(); + for (name, content) in attrs.0.iter() { + match *name { + #(#field_parsers)* + _ => { + return Err(format!("unknown attribute: {name}").into()); + } + } + } + + Ok(result) + } + } + }; + + TokenStream::from(expanded) +} diff --git a/crates/tinymist-query/src/fixtures/docs/snaps/docs@blocky2.typ.snap b/crates/tinymist-query/src/fixtures/docs/snaps/docs@blocky2.typ.snap index 6cbe835f..3147b7d6 100644 --- a/crates/tinymist-query/src/fixtures/docs/snaps/docs@blocky2.typ.snap +++ b/crates/tinymist-query/src/fixtures/docs/snaps/docs@blocky2.typ.snap @@ -2,7 +2,6 @@ source: crates/tinymist-query/src/analysis.rs expression: "snap.join(\"\\n\")" input_file: crates/tinymist-query/src/fixtures/docs/blocky2.typ -snapshot_kind: text --- = docstings -Pattern(..)@41..42 in /s0.typ -> DocString { docs: Some("This is X\nNote: This is not Y"), var_bounds: {}, vars: {}, res_ty: None } +Pattern(..)@41..42 in /s0.typ -> DocString { docs: Some("This is X Note: This is not Y"), var_bounds: {}, vars: {}, res_ty: None } diff --git a/crates/tinymist-query/src/fixtures/docs/snaps/docs@multiple_line.typ.snap b/crates/tinymist-query/src/fixtures/docs/snaps/docs@multiple_line.typ.snap index a078ab50..f2e20e37 100644 --- a/crates/tinymist-query/src/fixtures/docs/snaps/docs@multiple_line.typ.snap +++ b/crates/tinymist-query/src/fixtures/docs/snaps/docs@multiple_line.typ.snap @@ -2,7 +2,6 @@ source: crates/tinymist-query/src/analysis.rs expression: "snap.join(\"\\n\")" input_file: crates/tinymist-query/src/fixtures/docs/multiple_line.typ -snapshot_kind: text --- = docstings -Pattern(..)@45..46 in /s0.typ -> DocString { docs: Some("This is X.\nNote: this is not Y."), var_bounds: {}, vars: {}, res_ty: None } +Pattern(..)@45..46 in /s0.typ -> DocString { docs: Some("This is X. Note: this is not Y."), var_bounds: {}, vars: {}, res_ty: None } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param.typ.snap index 629ecee9..a6279102 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param.typ.snap @@ -2,9 +2,8 @@ source: crates/tinymist-query/src/hover.rs expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/annotate_dict_param.typ -snapshot_kind: text --- { - "contents": "```typc\nlet show-example(\n ..options: arguments,\n inherited-scope: dictionary = (:),\n) = none;\n```\n\n---\n\n- inherited-scope (dictionary): Definitions that are made available to the entire parsed\n module. This parameter is only used internally.\n\n# Rest Parameters\n\n## options\n\n```typc\ntype: arguments\n```\n\n\n\n# Named Parameters\n\n## inherited-scope\n\n```typc\ntype: dictionary\n```\n\nDefinitions that are made available to the entire parsed\n module. This parameter is only used internally.", + "contents": "```typc\nlet show-example(\n ..options: arguments,\n inherited-scope: dictionary = (:),\n) = none;\n```\n\n---\n\n- inherited-scope (dictionary): Definitions that are made available to the entire parsed module. This parameter is only used internally.\n\n# Rest Parameters\n\n## options\n\n```typc\ntype: arguments\n```\n\n\n\n# Named Parameters\n\n## inherited-scope\n\n```typc\ntype: dictionary\n```\n\nDefinitions that are made available to the entire parsed module. This parameter is only used internally.", "range": "7:20:7:32" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param2.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param2.typ.snap index 8ec77837..160e0d9f 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param2.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_dict_param2.typ.snap @@ -2,9 +2,8 @@ source: crates/tinymist-query/src/hover.rs expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/annotate_dict_param2.typ -snapshot_kind: text --- { - "contents": "```typc\nlet inherited-scope = dictionary;\n```\n\n---\n\nDefinitions that are made available to the entire parsed\n module. This parameter is only used internally.", + "contents": "```typc\nlet inherited-scope = dictionary;\n```\n\n---\n\nDefinitions that are made available to the entire parsed module. This parameter is only used internally.", "range": "6:21:6:36" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_docs_error.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_docs_error.typ.snap index a8cfcc48..7ceda3da 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_docs_error.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@annotate_docs_error.typ.snap @@ -4,6 +4,6 @@ expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/annotate_docs_error.typ --- { - "contents": "```typc\nlet speaker-note(\n note: any,\n mode: str = \"typ\",\n setting: (any) => any = Closure(..),\n) = none;\n```\n\n---\n\nSpeaker notes are a way to add additional information to your slides that is not visible to the audience. This can be useful for providing additional context or reminders to yourself.\n\n## Example\n\n```typ\n#speaker-note[This is a speaker note]\n\n```\n```\nRender Error\ncompiling node: error: unknown variable: speaker-note at \"/__render__.typ\":214..226\nHint: if you meant to use subtraction, try adding spaces around the minus sign: \\`speaker - note\\`\n\n```\n\n# Positional Parameters\n\n## note\n\n```typc\ntype: \n```\n\n\n\n# Named Parameters\n\n## mode\n\n```typc\ntype: \"typ\"\n```\n\n\n\n## setting (named)\n\n```typc\ntype: (any) => any\n```\n\n", + "contents": "```typc\nlet speaker-note(\n note: any,\n mode: str = \"typ\",\n setting: (any) => any = Closure(..),\n) = none;\n```\n\n---\n\n```\nfailed to parse docs: failed to convert to markdown: convert source for main file: [SourceDiagnostic { severity: Error, span: Span(..), message: \"unknown variable: example\", trace: [Import], hints: [] }]\n```\n\n````typ\nSpeaker notes are a way to add additional information to your slides that is not visible to the audience. This can be useful for providing additional context or reminders to yourself.\n\n== Example\n\n#example(```typ\n#speaker-note[This is a speaker note]\n```)\n````\n\n# Positional Parameters\n\n## note\n\n```typc\ntype: \n```\n\n\n\n# Named Parameters\n\n## mode\n\n```typc\ntype: \"typ\"\n```\n\n\n\n## setting (named)\n\n```typc\ntype: (any) => any\n```\n\n", "range": "11:20:11:32" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_alias.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_alias.typ.snap index e3774a20..cb5aeebd 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_alias.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_alias.typ.snap @@ -2,9 +2,8 @@ source: crates/tinymist-query/src/hover.rs expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/module_alias.typ -snapshot_kind: text --- { - "contents": "### Sampled Values\n```typc\n\n```\n\n---\n\n# The Module (Alias)", + "contents": "### Sampled Values\n```typc\n\n```\n\n---\n\n## The Module (Alias)\n", "range": "2:24:2:31" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_path.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_path.typ.snap index 602ff313..a1c48dad 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_path.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_path.typ.snap @@ -2,9 +2,8 @@ source: crates/tinymist-query/src/hover.rs expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/module_path.typ -snapshot_kind: text --- { - "contents": "# Some Module", + "contents": "## Some Module\n", "range": "0:29:0:46" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_var.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_var.typ.snap index 294c1a06..c780cbab 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@module_var.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@module_var.typ.snap @@ -2,9 +2,8 @@ source: crates/tinymist-query/src/hover.rs expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/module_var.typ -snapshot_kind: text --- { - "contents": "### Sampled Values\n```typc\n\n```\n\n---\n\n# The Module", + "contents": "### Sampled Values\n```typc\n\n```\n\n---\n\n## The Module\n", "range": "2:24:2:30" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation.typ.snap index 734d3142..fe09ea12 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation.typ.snap @@ -4,6 +4,6 @@ expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/render_equation.typ --- { - "contents": "```typc\nlet lam(\n A: type,\n B: type,\n) = dictionary;\n```\n\n---\n\nLambda constructor.\n\nTyping Rule:\n\n

\"typst-block\"

\n\n# Positional Parameters\n\n## A\n\n```typc\ntype: type\n```\n\nThe type of the argument.\n - It can be also regarded as the condition of the proposition.\n\n## B (positional)\n\n```typc\ntype: type\n```\n\nThe type of the body.\n - It can be also regarded as the conclusion of the proposition.", + "contents": "```typc\nlet lam(\n A: any,\n B: any,\n) = dictionary;\n```\n\n---\n\nLambda constructor.\n\nTyping Rule:\n\n\"typst-block\"\n\n- A (type): The type of the argument.\n \n - It can be also regarded as the condition of the proposition.\n \n \n- B (type): The type of the body.\n \n - It can be also regarded as the conclusion of the proposition.\n \n \n\n# Positional Parameters\n\n## A\n\n```typc\ntype: \n```\n\n\n\n## B (positional)\n\n```typc\ntype: \n```\n\n", "range": "12:20:12:23" } diff --git a/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation_no_html.typ.snap b/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation_no_html.typ.snap index cad05d87..30374354 100644 --- a/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation_no_html.typ.snap +++ b/crates/tinymist-query/src/fixtures/hover/snaps/test@render_equation_no_html.typ.snap @@ -4,6 +4,6 @@ expression: "JsonRepr::new_redacted(result, &REDACT_LOC)" input_file: crates/tinymist-query/src/fixtures/hover/render_equation_no_html.typ --- { - "contents": "```typc\nlet lam(\n A: type,\n B: type,\n) = dictionary;\n```\n\n---\n\nLambda constructor.\n\nTyping Rule:\n\n```typc\n$ (Γ , x : A ⊢ M : B #h(2em) Γ ⊢ a:B)/(Γ ⊢ λ (x : A) → M : π (x : A) → B) $\n```\n\n# Positional Parameters\n\n## A\n\n```typc\ntype: type\n```\n\nThe type of the argument.\n - It can be also regarded as the condition of the proposition.\n\n## B (positional)\n\n```typc\ntype: type\n```\n\nThe type of the body.\n - It can be also regarded as the conclusion of the proposition.", + "contents": "```typc\nlet lam(\n A: any,\n B: any,\n) = dictionary;\n```\n\n---\n\nLambda constructor.\n\nTyping Rule:\n\n\"typst-block\"\n\n- A (type): The type of the argument.\n \n - It can be also regarded as the condition of the proposition.\n \n \n- B (type): The type of the body.\n \n - It can be also regarded as the conclusion of the proposition.\n\n# Positional Parameters\n\n## A\n\n```typc\ntype: \n```\n\n\n\n## B (positional)\n\n```typc\ntype: \n```\n\n", "range": "14:20:14:23" } diff --git a/crates/tinymist-query/src/tests.rs b/crates/tinymist-query/src/tests.rs index 02b54ee7..1f9af72d 100644 --- a/crates/tinymist-query/src/tests.rs +++ b/crates/tinymist-query/src/tests.rs @@ -317,7 +317,12 @@ impl fmt::Display for JsonRepr { let mut ser = Serializer::with_formatter(w, PrettyFormatter::with_indent(b" ")); self.0.serialize(&mut ser).unwrap(); - f.write_str(&String::from_utf8(ser.into_inner().into_inner().unwrap()).unwrap()) + let res = String::from_utf8(ser.into_inner().into_inner().unwrap()).unwrap(); + // replace Span(number) to Span(..) + static REG: LazyLock = + LazyLock::new(|| regex::Regex::new(r#"Span\((\d+)\)"#).unwrap()); + let res = REG.replace_all(&res, "Span(..)"); + f.write_str(&res) } } diff --git a/crates/typlite/Cargo.toml b/crates/typlite/Cargo.toml index e70f1555..541dbc10 100644 --- a/crates/typlite/Cargo.toml +++ b/crates/typlite/Cargo.toml @@ -27,10 +27,13 @@ comemo.workspace = true ecow.workspace = true tinymist-analysis.workspace = true tinymist-std.workspace = true +tinymist-derive.workspace = true tinymist-project = { workspace = true, features = ["lsp"] } typst.workspace = true typst-svg.workspace = true typst-syntax.workspace = true +typst-html.workspace = true +cmark-writer = { version = "0.6.1", features = ["gfm"] } [dev-dependencies] insta.workspace = true diff --git a/crates/typlite/README.md b/crates/typlite/README.md index 272b6ec3..97dcad52 100644 --- a/crates/typlite/README.md +++ b/crates/typlite/README.md @@ -9,9 +9,13 @@ Converts a subset of typst to markdown. typlite main.typ # specify output typlite main.typ output.md +# multiple output formats +typlite main.typ output.md output.tex output.docx ``` -## Feature + diff --git a/crates/typlite/src/attributes.rs b/crates/typlite/src/attributes.rs new file mode 100644 index 00000000..f4f01764 --- /dev/null +++ b/crates/typlite/src/attributes.rs @@ -0,0 +1,121 @@ +//! Attributes for HTML elements and parsing + +use ecow::EcoString; +use tinymist_derive::TypliteAttr; +use typst::html::HtmlAttrs; + +use crate::Result; + +/// Tag attributes defined for HTML elements. +pub mod md_attr { + use typst::html::HtmlAttr; + + macro_rules! attrs { + ($($attr:ident -> $name:ident)*) => { + $(#[allow(non_upper_case_globals)] + pub const $attr: HtmlAttr = HtmlAttr::constant( + stringify!($name) + );)* + } + } + + attrs! { + src -> src + alt -> alt + level -> level + dest -> dest + lang -> lang + block -> block + text -> text + value -> value + caption -> caption + } +} + +#[derive(TypliteAttr, Default)] +pub struct HeadingAttr { + pub level: usize, +} + +#[derive(TypliteAttr, Default)] +pub struct ImageAttr { + pub src: EcoString, + pub alt: EcoString, +} + +#[derive(TypliteAttr, Default)] +pub struct FigureAttr { + pub caption: EcoString, +} + +#[derive(TypliteAttr, Default)] +pub struct LinkAttr { + pub dest: EcoString, +} + +#[derive(TypliteAttr, Default)] +pub struct RawAttr { + pub lang: EcoString, + pub block: bool, + pub text: EcoString, +} + +#[derive(TypliteAttr, Default)] +pub struct ListItemAttr { + pub value: Option, +} + +pub trait TypliteAttrsParser { + fn parse(attrs: &HtmlAttrs) -> Result + where + Self: Sized; +} + +pub trait TypliteAttrParser { + fn parse_attr(content: &EcoString) -> Result + where + Self: Sized; +} + +impl TypliteAttrParser for usize { + fn parse_attr(content: &EcoString) -> Result { + Ok(content + .parse::() + .map_err(|_| format!("cannot parse {} as usize", content))?) + } +} + +impl TypliteAttrParser for u32 { + fn parse_attr(content: &EcoString) -> Result { + Ok(content + .parse::() + .map_err(|_| format!("cannot parse {} as u32", content))?) + } +} + +impl TypliteAttrParser for bool { + fn parse_attr(content: &EcoString) -> Result { + Ok(content + .parse::() + .map_err(|_| format!("cannot parse {} as bool", content))?) + } +} + +impl TypliteAttrParser for EcoString { + fn parse_attr(content: &EcoString) -> Result { + Ok(content.clone()) + } +} + +impl TypliteAttrParser for Option +where + T: TypliteAttrParser, +{ + fn parse_attr(content: &EcoString) -> Result { + if content.is_empty() { + Ok(None) + } else { + T::parse_attr(content).map(Some) + } + } +} diff --git a/crates/typlite/src/common.rs b/crates/typlite/src/common.rs new file mode 100644 index 00000000..9b0ec316 --- /dev/null +++ b/crates/typlite/src/common.rs @@ -0,0 +1,110 @@ +//! Common types and interfaces for the conversion system + +use cmark_writer::ast::{CustomNodeWriter, Node}; +use cmark_writer::custom_node; +use cmark_writer::WriteResult; +use ecow::EcoString; +use std::path::PathBuf; + +use crate::Result; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ListState { + Ordered, + Unordered, +} + +/// Valid formats for the conversion. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Format { + Md, + LaTeX, + Docx, +} + +/// Figure node implementation for all formats +#[derive(Debug, PartialEq, Clone)] +#[custom_node] +pub struct FigureNode { + /// The main content of the figure, can be any block node + pub body: Box, + /// The caption text for the figure + pub caption: String, +} + +impl FigureNode { + fn write_custom(&self, writer: &mut dyn CustomNodeWriter) -> WriteResult<()> { + let mut temp_writer = cmark_writer::writer::CommonMarkWriter::new(); + temp_writer.write(&self.body)?; + let content = temp_writer.into_string(); + writer.write_str(&content)?; + writer.write_str("\n")?; + writer.write_str(&self.caption)?; + Ok(()) + } + + fn is_block_custom(&self) -> bool { + true + } +} + +/// External Frame node for handling frames stored as external files +#[derive(Debug, PartialEq, Clone)] +#[custom_node] +pub struct ExternalFrameNode { + /// The path to the external file containing the frame + pub file_path: PathBuf, + /// Alternative text for the frame + pub alt_text: String, + /// Original SVG data (needed for DOCX that still embeds images) + pub svg_data: String, +} + +impl ExternalFrameNode { + fn write_custom(&self, writer: &mut dyn CustomNodeWriter) -> WriteResult<()> { + // The actual handling is implemented in format-specific writers + writer.write_str(&format!( + "![{}]({})", + self.alt_text, + self.file_path.display() + ))?; + Ok(()) + } + + fn is_block_custom(&self) -> bool { + true + } +} + +/// Highlight node for highlighted text +#[derive(Debug, PartialEq, Clone)] +#[custom_node] +pub struct HighlightNode { + /// The content to be highlighted + pub content: Vec, +} + +impl HighlightNode { + fn write_custom(&self, writer: &mut dyn CustomNodeWriter) -> WriteResult<()> { + let mut temp_writer = cmark_writer::writer::CommonMarkWriter::new(); + for node in &self.content { + temp_writer.write(node)?; + } + let content = temp_writer.into_string(); + writer.write_str(&format!("=={}==", content))?; + Ok(()) + } + + fn is_block_custom(&self) -> bool { + false + } +} + +/// Common writer interface for different formats +pub trait FormatWriter { + /// Write AST document to output format + fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()>; + + /// Write AST document to vector + fn write_vec(&mut self, document: &Node) -> Result>; +} diff --git a/crates/typlite/src/error.rs b/crates/typlite/src/error.rs index 0ecffaa3..6442878b 100644 --- a/crates/typlite/src/error.rs +++ b/crates/typlite/src/error.rs @@ -25,11 +25,32 @@ impl fmt::Debug for Error { } } -impl From for Error -where - T: Into>, -{ - fn from(s: T) -> Self { +impl From for Error { + fn from(e: std::io::Error) -> Self { + Error(Box::new(Repr::Msg(e.to_string().into()))) + } +} + +impl From for Error { + fn from(e: fmt::Error) -> Self { + Error(Box::new(Repr::Msg(e.to_string().into()))) + } +} + +impl From<&'static str> for Error { + fn from(s: &'static str) -> Self { Error(Box::new(Repr::Msg(s.into()))) } } + +impl From for Error { + fn from(s: String) -> Self { + Error(Box::new(Repr::Msg(s.into()))) + } +} + +impl From> for Error { + fn from(s: Cow<'static, str>) -> Self { + Error(Box::new(Repr::Msg(s))) + } +} diff --git a/crates/typlite/src/fixtures/docs/nest_list.typ b/crates/typlite/src/fixtures/docs/nest_list.typ index 1109cc24..bf29a748 100644 --- a/crates/typlite/src/fixtures/docs/nest_list.typ +++ b/crates/typlite/src/fixtures/docs/nest_list.typ @@ -1,11 +1,11 @@ These again are dictionaries with the keys - `description` (optional): The description for the argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. -- name (string): The name for the module. -- label-prefix (auto, string): The label-prefix for internal function - references. If `auto`, the label-prefix name will be the module name. +- name (string): The name for the module. +- label-prefix (auto, string): The label-prefix for internal function + references. If `auto`, the label-prefix name will be the module name. - nested something - nested something 2 --> string \ No newline at end of file +-> string diff --git a/crates/typlite/src/fixtures/docs/snaps/convert_docs@nest_list.typ.snap b/crates/typlite/src/fixtures/docs/snaps/convert_docs@nest_list.typ.snap index eb1f3253..011932c5 100644 --- a/crates/typlite/src/fixtures/docs/snaps/convert_docs@nest_list.typ.snap +++ b/crates/typlite/src/fixtures/docs/snaps/convert_docs@nest_list.typ.snap @@ -3,14 +3,28 @@ source: crates/typlite/src/tests.rs expression: "conv(world, true)" input_file: crates/typlite/src/fixtures/docs/nest_list.typ --- + + + + + + +

These again are dictionaries with the keys

  • (optional): The description for the argument.

See show-module() for outputting the results of this function.

  • name (string): The name for the module.
  • label-prefix (auto, string): The label-prefix for internal function references. If , the label-prefix name will be the module name.

    • nested something
    • nested something 2

-> string

+ + +===== These again are dictionaries with the keys + - `description` (optional): The description for the argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. -- name (string): The name for the module. -- label-prefix (auto, string): The label-prefix for internal function - references. If `auto`, the label-prefix name will be the module name. - - nested something - - nested something 2 --> string +- name (string): The name for the module. +- label-prefix (auto, string): The label-prefix for internal function references. If `auto`, the label-prefix name will be the module name. + + - nested something + - nested something 2 + + + +-\> string diff --git a/crates/typlite/src/fixtures/docs/snaps/convert_docs@tidy.typ.snap b/crates/typlite/src/fixtures/docs/snaps/convert_docs@tidy.typ.snap index 3e2bb5a9..f7d84859 100644 --- a/crates/typlite/src/fixtures/docs/snaps/convert_docs@tidy.typ.snap +++ b/crates/typlite/src/fixtures/docs/snaps/convert_docs@tidy.typ.snap @@ -3,21 +3,29 @@ source: crates/typlite/src/tests.rs expression: "conv(world, true)" input_file: crates/typlite/src/fixtures/docs/tidy.typ --- + + + + + + +

These again are dictionaries with the keys

  • (optional): The description for the argument.
  • (optional): A list of accepted argument types.
  • (optional): Default value for this argument.

See show-module() for outputting the results of this function.

  • content (string): Content of file to analyze for docstrings.
  • name (string): The name for the module.
  • label-prefix (auto, string): The label-prefix for internal function references. If , the label-prefix name will be the module name.
  • require-all-parameters (boolean): Require that all parameters of a functions are documented and fail if some are not.
  • scope (dictionary): A dictionary of definitions that are then available in all function and parameter descriptions.
  • preamble (string): Code to prepend to all code snippets shown with . This can for instance be used to import something from the scope.

-> string

+ + +===== These again are dictionaries with the keys + - `description` (optional): The description for the argument. -- `types` (optional): A list of accepted argument types. +- `types` (optional): A list of accepted argument types. - `default` (optional): Default value for this argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. - content (string): Content of `.typ` file to analyze for docstrings. -- name (string): The name for the module. -- label-prefix (auto, string): The label-prefix for internal function - references. If `auto`, the label-prefix name will be the module name. -- require-all-parameters (boolean): Require that all parameters of a - functions are documented and fail if some are not. -- scope (dictionary): A dictionary of definitions that are then available - in all function and parameter descriptions. -- preamble (string): Code to prepend to all code snippets shown with `#example()`. - This can for instance be used to import something from the scope. --> string +- name (string): The name for the module. +- label-prefix (auto, string): The label-prefix for internal function references. If `auto`, the label-prefix name will be the module name. +- require-all-parameters (boolean): Require that all parameters of a functions are documented and fail if some are not. +- scope (dictionary): A dictionary of definitions that are then available in all function and parameter descriptions. +- preamble (string): Code to prepend to all code snippets shown with `#example()`. This can for instance be used to import something from the scope. + +-\> string diff --git a/crates/typlite/src/fixtures/docs/tidy.typ b/crates/typlite/src/fixtures/docs/tidy.typ index d715ef8d..496a2648 100644 --- a/crates/typlite/src/fixtures/docs/tidy.typ +++ b/crates/typlite/src/fixtures/docs/tidy.typ @@ -1,18 +1,18 @@ These again are dictionaries with the keys - `description` (optional): The description for the argument. -- `types` (optional): A list of accepted argument types. +- `types` (optional): A list of accepted argument types. - `default` (optional): Default value for this argument. -See @@show-module() for outputting the results of this function. +See show-module() for outputting the results of this function. - content (string): Content of `.typ` file to analyze for docstrings. -- name (string): The name for the module. -- label-prefix (auto, string): The label-prefix for internal function - references. If `auto`, the label-prefix name will be the module name. -- require-all-parameters (boolean): Require that all parameters of a - functions are documented and fail if some are not. -- scope (dictionary): A dictionary of definitions that are then available - in all function and parameter descriptions. -- preamble (string): Code to prepend to all code snippets shown with `#example()`. - This can for instance be used to import something from the scope. --> string \ No newline at end of file +- name (string): The name for the module. +- label-prefix (auto, string): The label-prefix for internal function + references. If `auto`, the label-prefix name will be the module name. +- require-all-parameters (boolean): Require that all parameters of a + functions are documented and fail if some are not. +- scope (dictionary): A dictionary of definitions that are then available + in all function and parameter descriptions. +- preamble (string): Code to prepend to all code snippets shown with `#example()`. + This can for instance be used to import something from the scope. +-> string diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@base.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@base.typ.snap index cb197d62..3f52f19d 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@base.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@base.typ.snap @@ -3,5 +3,16 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/base.typ --- -# Hello, World! + + + + + + + Hello, World!

This is a typst document.

+ + +===== +## Hello, World! + This is a typst document. diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@enum.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@enum.typ.snap index 09e4dc51..58a690e7 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@enum.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@enum.typ.snap @@ -3,5 +3,15 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/enum.typ --- + + + + + + +
  1. A
  2. B
+ + +===== 1. A -1. B +2. B diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@enum2.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@enum2.typ.snap index 48bac7c2..3c71212b 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@enum2.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@enum2.typ.snap @@ -3,5 +3,15 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/enum2.typ --- + + + + + + +
  1. A
  2. B
+ + +===== 2. A -1. B +3. B diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@figure_caption.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@figure_caption.typ.snap index 6518a9c3..ac7d02c6 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@figure_caption.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@figure_caption.typ.snap @@ -3,4 +3,16 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/figure_caption.typ --- -![Caption, Content](./fig.svg) + + + + + + + + + +===== +![Content](./fig.svg) + +Caption diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@figure_image.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@figure_image.typ.snap index 6ac917b5..c5847e13 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@figure_image.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@figure_image.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/figure_image.typ --- + + + + + + + + + +===== ![](./fig.svg) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@figure_image_alt.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@figure_image_alt.typ.snap index 22fe654c..91fb0321 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@figure_image_alt.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@figure_image_alt.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/figure_image_alt.typ --- + + + + + + + + + +===== ![Content](./fig.svg) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@image.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@image.typ.snap index 72585593..0f8ef76e 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@image.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@image.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/image.typ --- + + + + + + + + + +===== ![](./fig.svg) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@image_alt.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@image_alt.typ.snap index d5a32419..c3532e18 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@image_alt.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@image_alt.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/image_alt.typ --- + + + + + + + + + +===== ![Content](./fig.svg) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@link.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@link.typ.snap index bdd27f92..8ae77130 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@link.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@link.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/link.typ --- + + + + + + + https://example.com + + +===== [https://example.com](https://example.com) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@link2.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@link2.typ.snap index ff71d944..d4945ac6 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@link2.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@link2.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/link2.typ --- + + + + + + + Content + + +===== [Content](https://example.com) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@link3.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@link3.typ.snap index 064c655d..6d9c8d3d 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@link3.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@link3.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/link3.typ --- + + + + + + + Reverse the World + + +===== [Reverse **the World**](https://example.com) diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@list.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@list.typ.snap index c83b3f70..111a9e63 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@list.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@list.typ.snap @@ -3,5 +3,15 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/list.typ --- + + + + + + +
  • Some item
  • Another item
+ + +===== - Some **item** - Another _item_ diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@math_block.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@math_block.typ.snap index 24a242eb..14e22234 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@math_block.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@math_block.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/math_block.typ --- -

typst-block

+ + + + + + + redacted-frame + + +===== +typst-block diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@math_block2.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@math_block2.typ.snap index e8a342ba..fabadd09 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@math_block2.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@math_block2.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/math_block2.typ --- -

typst-block

+ + + + + + + redacted-frame + + +===== +typst-block diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@math_inline.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@math_inline.typ.snap index 6b29b844..6b5971c4 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@math_inline.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@math_inline.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/math_inline.typ --- -typst-block + + + + + + + redacted-frame + + +===== +typst-block diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@outline.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@outline.typ.snap index c7d86952..b0501a2b 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@outline.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@outline.typ.snap @@ -3,4 +3,77 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/outline.typ --- -failed to convert to markdown: unknown variable: outline + + + + + + + ContentsHeading 1Heading 2Heading 1Heading 2

This is a link to example.com

Inline has it.

Math inline:

redacted-frame

and block:

redacted-frame
  • First item
  • Second item

    1. First sub-item
    2. Second sub-item

      • First sub-sub-item
First term
First definition
012345678910111213141516171819
+ + +===== +## Contents + +### Heading 1 + +#### Heading 2 + +### Heading 1 + +#### Heading 2 + +[This is a link to example.com](https://example.com) + +Inline `code` has `back-ticks around` it. + +```cs +using System.IO.Compression; + +#pragma warning disable 414, 3021 + +namespace MyApplication +{ + [Obsolete("...")] + class Program : IInterface + { + public static List JustDoIt(int count) + { + Console.WriteLine($"Hello {Name}!"); + return new List(new int[] { 1, 2, 3 }) + } + } +} +``` + +Math inline:typst-blockand block: + +typst-block + +- First item +- Second item + + 1. First sub-item + 2. Second sub-item + + - First sub-sub-item + +
First term
First definition
+ +| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@raw_inline.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@raw_inline.typ.snap index 5430e139..b2ae43d8 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@raw_inline.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@raw_inline.typ.snap @@ -3,4 +3,14 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/raw_inline.typ --- + + + + + + + Some inlined raw , + + +===== Some inlined raw `a`, `b` diff --git a/crates/typlite/src/fixtures/integration/snaps/convert@table.typ.snap b/crates/typlite/src/fixtures/integration/snaps/convert@table.typ.snap index d9388eee..0dbd35c6 100644 --- a/crates/typlite/src/fixtures/integration/snaps/convert@table.typ.snap +++ b/crates/typlite/src/fixtures/integration/snaps/convert@table.typ.snap @@ -3,4 +3,23 @@ source: crates/typlite/src/tests.rs expression: "conv(world, false)" input_file: crates/typlite/src/fixtures/integration/table.typ --- -failed to convert to markdown: invalid columns argument of type Binary + + + + + + +
012345678910111213141516171819
012345
67891011
121314151617
1819
012345
67891011
121314151617
181901
234
567
8910
111213
141516
171819
+ + +===== +| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | + +| 0 | 1 | 2 | 3 | 4 | 5 | +| --- | --- | --- | --- | --- | --- | +| 6 | 7 | 8 | 9 | 10 | 11 | +| 12 | 13 | 14 | 15 | 16 | 17 | +| 18 | 19 | + +
012345
67891011
121314151617
181901
234
567
8910
111213
141516
171819
diff --git a/crates/typlite/src/lib.rs b/crates/typlite/src/lib.rs index 0ff7b079..4008e074 100644 --- a/crates/typlite/src/lib.rs +++ b/crates/typlite/src/lib.rs @@ -1,45 +1,73 @@ //! # Typlite +pub mod attributes; +pub mod common; mod error; pub mod library; +pub mod parser; pub mod scopes; +pub mod tags; pub mod value; +pub mod worker; +pub mod writer; -use core::fmt; -use std::path::{Path, PathBuf}; +use std::cell::RefCell; +use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; -use std::{fmt::Write, sync::LazyLock}; pub use error::*; -use base64::Engine; -use scopes::Scopes; +use cmark_writer::ast::Node; +use tinymist_project::base::ShadowApi; use tinymist_project::vfs::WorkspaceResolver; -use tinymist_project::{base::ShadowApi, EntryReader, LspWorld}; -use tinymist_std::path::unix_slash; -use typst::foundations::IntoValue; -use typst::WorldExt; -use typst::{ - foundations::{Bytes, Dict}, - layout::Abs, - utils::LazyHash, - World, -}; -use value::{Args, Value}; +use tinymist_project::{EntryReader, LspWorld, TaskInputs}; +use typst::foundations::Bytes; +use typst::html::HtmlDocument; +use typst_syntax::VirtualPath; -use crate::SyntaxKind::Text; -use ecow::{eco_format, EcoString}; -use typst_syntax::{ - ast::{self, AstNode}, - FileId, Source, SyntaxKind, SyntaxNode, -}; - -pub use typst_syntax as syntax; +use crate::common::Format; +use crate::parser::HtmlToAstParser; +use crate::writer::WriterFactory; +use typst_syntax::FileId; /// The result type for typlite. pub type Result = std::result::Result; pub use tinymist_project::CompileOnceArgs; +pub use tinymist_std; + +#[derive(Debug, Clone)] +pub struct MarkdownDocument { + pub base: HtmlDocument, + feat: TypliteFeat, + ast_cache: RefCell>, +} + +impl MarkdownDocument { + /// Parse HTML document with AST cache. + fn parse(&self) -> Result { + if let Some(ast) = self.ast_cache.borrow().as_ref() { + return Ok(ast.clone()); + } + let parser = HtmlToAstParser::new(self.feat.clone()); + let ast = parser.parse(&self.base.root)?; + *self.ast_cache.borrow_mut() = Some(ast.clone()); + + Ok(ast) + } + + /// Convert the content to a markdown string. + pub fn to_md_string(&self) -> Result { + let mut output = ecow::EcoString::new(); + let ast = self.parse()?; + + let mut writer = WriterFactory::create(Format::Md); + writer.write_eco(&ast, &mut output)?; + + Ok(output) + } +} /// A color theme for rendering the content. The valid values can be checked in [color-scheme](https://developer.mozilla.org/en-US/docs/Web/CSS/color-scheme). #[derive(Debug, Default, Clone, Copy)] @@ -55,8 +83,6 @@ pub struct TypliteFeat { pub color_theme: Option, /// The path of external assets directory. pub assets_path: Option, - /// The path of external assets' source code directory. - pub assets_src_path: Option, /// Allows GFM (GitHub Flavored Markdown) markups. pub gfm: bool, /// Annotate the elements for identification. @@ -72,9 +98,11 @@ pub struct Typlite { /// The universe to use for the conversion. world: Arc, /// library to use for the conversion. - library: Option>>, + library: Option>>, /// Features for the conversion. feat: TypliteFeat, + /// The format to use for the conversion. + format: Format, } impl Typlite { @@ -87,11 +115,12 @@ impl Typlite { world, library: None, feat: Default::default(), + format: Format::Md, } } /// Set library to use for the conversion. - pub fn with_library(mut self, library: Arc>) -> Self { + pub fn with_library(mut self, library: Arc>) -> Self { self.library = Some(library); self } @@ -102,817 +131,80 @@ impl Typlite { self } - /// Convert the content to a markdown string. - pub fn convert(self) -> Result { - static DEFAULT_LIB: std::sync::LazyLock>> = - std::sync::LazyLock::new(|| Arc::new(library::library())); + pub fn with_format(mut self, format: Format) -> Self { + self.format = format; + self + } - let main = self.world.entry_state().main(); + /// Convert the content to a markdown string. + pub fn convert(self) -> Result { + match self.format { + Format::Md => self.convert_doc()?.to_md_string(), + _ => Err("format is not supported".into()), + } + } + + /// Convert the content to a markdown document. + pub fn convert_doc(self) -> Result { + let entry = self.world.entry_state(); + let main = entry.main(); let current = main.ok_or("no main file in workspace")?; let world = self.world; - let main = world - .source(current) + if WorkspaceResolver::is_package_file(current) { + return Err("package file is not supported".into()); + } + + let wrap_main_id = current.join("__wrap_md_main.typ"); + let wrap_main_path = world + .path_for_id(wrap_main_id) .map_err(|err| format!("getting source for main file: {err:?}"))?; - let worker = TypliteWorker { - current, - feat: self.feat, - list_depth: 0, - prepend_code: EcoString::new(), - assets_numbering: 0, - scopes: self - .library - .as_ref() - .unwrap_or_else(|| &*DEFAULT_LIB) - .clone(), - world, - }; - - worker.sub_file(main) - } -} - -/// Typlite worker -#[derive(Clone)] -pub struct TypliteWorker { - current: FileId, - scopes: Arc>, - world: Arc, - list_depth: usize, - prepend_code: EcoString, - assets_numbering: usize, - /// Features for the conversion. - pub feat: TypliteFeat, -} - -impl TypliteWorker { - /// Convert the content to a markdown string. - pub fn convert(&mut self, node: &SyntaxNode) -> Result { - Ok(Self::value(self.eval(node)?)) - } - - /// Eval the content - pub fn eval(&mut self, node: &SyntaxNode) -> Result { - use SyntaxKind::*; - let res = match node.kind() { - RawLang | RawDelim | RawTrimmed => Err("converting clause")?, - - Math | MathIdent | MathAlignPoint | MathDelimited | MathAttach | MathPrimes - | MathFrac | MathRoot | MathShorthand | MathText => Err("converting math node")?, - - // Error nodes - Error => Err(node.clone().into_text().to_string())?, - None | End => Ok(Value::None), - - // Non-leaf nodes - Markup => self.reduce(node), - Code => self.reduce(node), - Equation => self.equation(node), - CodeBlock => { - let code_block: ast::CodeBlock = node.cast().unwrap(); - self.eval(code_block.body().to_untyped()) - } - ContentBlock => { - let content_block: ast::ContentBlock = node.cast().unwrap(); - self.eval(content_block.body().to_untyped()) - } - Parenthesized => { - let parenthesized: ast::Parenthesized = node.cast().unwrap(); - // self.convert_to(parenthesized.expr().to_untyped(), ) - self.eval(parenthesized.expr().to_untyped()) - } - - // Text nodes - Text | Space | Parbreak => Self::str(node), - Linebreak => Self::char('\n'), - - // Semantic nodes - Escape => Self::escape(node), - Shorthand => Self::shorthand(node), - SmartQuote => Self::str(node), - Strong => self.strong(node), - Emph => self.emph(node), - Raw => Self::raw(node), - Link => self.link(node), - Label => Self::label(node), - Ref => Self::label_ref(node), - RefMarker => Self::ref_marker(node), - Heading => self.heading(node), - HeadingMarker => Self::str(node), - ListItem => self.list_item(node), - ListMarker => Self::str(node), - EnumItem => self.enum_item(node), - EnumMarker => Self::str(node), - TermItem => self.term_item(node), - TermMarker => Self::str(node), - - // Punctuation - // Hash => Self::char('#'), - Hash => Ok(Value::None), - LeftBrace => Self::char('{'), - RightBrace => Self::char('}'), - LeftBracket => Self::char('['), - RightBracket => Self::char(']'), - LeftParen => Self::char('('), - RightParen => Self::char(')'), - Comma => Self::char(','), - Semicolon => Ok(Value::None), - Colon => Self::char(':'), - Star => Self::char('*'), - Underscore => Self::char('_'), - Dollar => Self::char('$'), - Plus => Self::char('+'), - Minus => Self::char('-'), - Slash => Self::char('/'), - Hat => Self::char('^'), - Prime => Self::char('\''), - Dot => Self::char('.'), - Eq => Self::char('='), - Lt => Self::char('<'), - Gt => Self::char('>'), - - // Compound punctuation - EqEq => Self::str(node), - ExclEq => Self::str(node), - LtEq => Self::str(node), - GtEq => Self::str(node), - PlusEq => Self::str(node), - HyphEq => Self::str(node), - StarEq => Self::str(node), - SlashEq => Self::str(node), - Dots => Self::str(node), - Arrow => Self::str(node), - Root => Self::str(node), - - // Keywords - Auto => Self::str(node), - Not => Self::str(node), - And => Self::str(node), - Or => Self::str(node), - Let => Self::str(node), - Set => Self::str(node), - Show => Self::str(node), - Context => Self::str(node), - If => Self::str(node), - Else => Self::str(node), - For => Self::str(node), - In => Self::str(node), - While => Self::str(node), - Break => Self::str(node), - Continue => Self::str(node), - Return => Self::str(node), - Import => Self::str(node), - Include => Self::str(node), - As => Self::str(node), - - LetBinding => self.let_binding(node), - FieldAccess => self.field_access(node), - FuncCall => self.func_call(node), - Contextual => self.contextual(node), - - // Clause nodes - Named => Ok(Value::None), - Keyed => Ok(Value::None), - Unary => Ok(Value::None), - Binary => Ok(Value::None), - Spread => Ok(Value::None), - ImportItems => Ok(Value::None), - ImportItemPath => Ok(Value::None), - RenamedImportItem => Ok(Value::None), - Closure => Ok(Value::None), - Args => Ok(Value::None), - Params => Ok(Value::None), - - // Ignored code expressions - Ident => Ok(Value::None), - Bool => Ok(Value::None), - Int => Ok(Value::None), - Float => Ok(Value::None), - Numeric => Ok(Value::None), - Str => Ok(Value::Str({ - let s: ast::Str = node.cast().unwrap(); - s.get() - })), - Array => Ok(Value::None), - Dict => Ok(Value::None), - - // Ignored code expressions - SetRule => Ok(Value::None), - ShowRule => Ok(Value::None), - Destructuring => Ok(Value::None), - DestructAssignment => Ok(Value::None), - - Conditional => Ok(Value::None), - WhileLoop => Ok(Value::None), - ForLoop => Ok(Value::None), - LoopBreak => Ok(Value::None), - LoopContinue => Ok(Value::None), - FuncReturn => Ok(Value::None), - - ModuleImport => Ok(Value::None), - ModuleInclude => self.include(node), - - // Ignored comments - LineComment => Ok(Value::None), - BlockComment => Ok(Value::None), - Shebang => Ok(Value::None), - }; - if res.clone()? == Value::None - && !matches!( - node.kind(), - Ident | Bool | Int | Float | Numeric | Str | Array | Dict - ) - { - self.prepend_code += node.clone().into_text(); - if node.kind() != Hash { - self.prepend_code += "\n" - }; - } - res - } - - fn reduce(&mut self, node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - - for child in node.children() { - // self.convert_to(child)?; - s.push_str(&Self::value(self.eval(child)?)); - } - - Ok(Value::Content(s)) - } - - pub fn to_raw_block(&mut self, node: &SyntaxNode, inline: bool) -> Result { - let content = node.clone().into_text(); - - let s = if inline { - let mut s = EcoString::with_capacity(content.len() + 2); - s.push_str("`"); - s.push_str(&content); - s.push_str("`"); - s - } else { - let mut s = EcoString::with_capacity(content.len() + 15); - s.push_str("```"); - let lang = match node.cast::() { - Some(ast::Expr::Text(..) | ast::Expr::Space(..)) => "typ", - Some(..) => "typc", - None => "typ", - }; - s.push_str(lang); - s.push('\n'); - s.push_str(&content); - s.push('\n'); - s.push_str("```"); - s - }; - - Ok(Value::Content(s)) - } - - pub fn render( - &mut self, - prepend_node: &SyntaxNode, - node: &SyntaxNode, - inline: bool, - ) -> Result { - self.assets_numbering += 1; - let prepend_code = prepend_node.clone().into_text(); - let code = node.clone().into_text(); - if let Some(assets_src_path) = &self.feat.assets_src_path { - let file_name = assets_src_path - .join(self.assets_numbering.to_string()) - .with_extension("typ"); - if let Err(e) = std::fs::write(&file_name, format!("#{{\n// render_code\n{}\n}}", code)) - { - return Err(format!("failed to write code to file: {}", e).into()); - } - } - self.render_code(&prepend_code, &code, false, "center", "", inline) - } - - pub fn render_code( - &mut self, - prepend_code: &str, - code: &str, - is_markup: bool, - align: &str, - extra_attrs: &str, - inline: bool, - ) -> Result { - let theme = self.feat.color_theme; - - let code_file_name = if let Some(assets_src_path) = &self.feat.assets_src_path { - Some( - assets_src_path - .join(self.assets_numbering.to_string()) - .with_extension("typ"), - ) - } else { - None - }; - - let mut render = |theme| self.render_inner(prepend_code, code, is_markup, theme); - - let mut content = EcoString::new(); - - let inline_attrs = if inline { - r#" style="vertical-align: -0.35em""# - } else { - "" - }; - - let write_error = |content: &mut EcoString, err: &str| { - let err = err.replace("`", r#"\`"#); - let _ = write!(content, "```\nRender Error\n{err}\n```"); - }; - - let write_image = |content: &mut EcoString, - file_name: &std::path::Path, - code_file_name: Option<&PathBuf>, - inline_attrs: &str, - extra_attrs: &str| { - if let Some(code_file_name) = code_file_name { - let _ = write!( - content, - r#""#, - code_file_name.display(), - file_name.display() - ); - } else { - let _ = write!( - content, - r#""#, - file_name.display() - ); - } - }; - - let write_picture = |content: &mut EcoString, - dark_file_name: &std::path::Path, - light_file_name: &std::path::Path, - code_file_name: Option<&PathBuf>, - inline_attrs: &str, - extra_attrs: &str| { - if let Some(code_file_name) = code_file_name { - let _ = write!( - content, - r#""#, - code_file_name.display(), - dark_file_name.display(), - light_file_name.display() - ); - } else { - let _ = write!( - content, - r#""#, - dark_file_name.display(), - light_file_name.display() - ); - } - }; - - match theme { - Some(theme) => { - let data = match render(theme) { - Ok(data) => data, - Err(err) if self.feat.soft_error => { - write_error(&mut content, &err.to_string()); - return Ok(Value::Content(content)); - } - Err(err) => return Err(err), - }; - - if !inline { - let _ = write!(content, r#"

"#); - } - if let Some(assets_path) = &self.feat.assets_path { - let file_name = - assets_path.join(format!("{}_{:?}.svg", self.assets_numbering, theme)); - std::fs::write(&file_name, &data) - .map_err(|e| format!("failed to write SVG to file: {}", e))?; - - write_image( - &mut content, - &file_name, - code_file_name.as_ref(), - inline_attrs, - extra_attrs, - ); - } else { - let _ = write!( - content, - r#""# - ); - } - if !inline { - content.push_str("

"); - } - } - None => { - let dark = match render(ColorTheme::Dark) { - Ok(d) => d, - Err(err) if self.feat.soft_error => { - write_error(&mut content, &err.to_string()); - return Ok(Value::Content(content)); - } - Err(err) => return Err(err), - }; - let light = match render(ColorTheme::Light) { - Ok(l) => l, - Err(err) if self.feat.soft_error => { - write_error(&mut content, &err.to_string()); - return Ok(Value::Content(content)); - } - Err(err) => return Err(err), - }; - - if !inline { - let _ = write!(content, r#"

"#); - } - if let Some(assets_path) = &self.feat.assets_path { - let dark_file_name = assets_path.join(format!( - "{}_{:?}.svg", - self.assets_numbering, - ColorTheme::Dark - )); - let light_file_name = assets_path.join(format!( - "{}_{:?}.svg", - self.assets_numbering, - ColorTheme::Light - )); - - write_picture( - &mut content, - &dark_file_name, - &light_file_name, - code_file_name.as_ref(), - inline_attrs, - extra_attrs, - ); - } else { - let _ = write!( - content, - r#""# - ); - } - if !inline { - content.push_str("

"); - } - } - } - - Ok(Value::Content(content)) - } - - fn render_inner( - &mut self, - prepend_code: &str, - code: &str, - is_markup: bool, - theme: ColorTheme, - ) -> Result { - static DARK_THEME_INPUT: LazyLock>> = LazyLock::new(|| { - Arc::new(LazyHash::new(Dict::from_iter(std::iter::once(( - "x-color-theme".into(), - "dark".into_value(), - ))))) + let mut world = world.html_task().task(TaskInputs { + entry: Some(entry.select_in_workspace(wrap_main_id.vpath().as_rooted_path())), + inputs: None, }); - let code = WrapCode(code, is_markup); - // let inputs = is_dark.then(|| DARK_THEME_INPUT.clone()); - let inputs = match theme { - ColorTheme::Dark => Some(DARK_THEME_INPUT.clone()), - ColorTheme::Light => None, - }; - let code = eco_format!( - r##"{prepend_code} - #set page(width: auto, height: auto, margin: (y: 0.45em, rest: 0em), fill: none); - #set text(fill: rgb("#c0caf5")) if sys.inputs.at("x-color-theme", default: none) == "dark"; - {code}"## + let markdown_id = FileId::new( + Some(typst_syntax::package::PackageSpec::from_str("@local/markdown:0.1.0").unwrap()), + VirtualPath::new("lib.typ"), ); - let main = Bytes::new(code.as_bytes().to_owned()); - // let world = LiteWorld::new(main); - let path = Path::new("__render__.typ"); - let entry = self.world.entry_state().select_in_workspace(path); - let mut world = self.world.task(tinymist_project::TaskInputs { - entry: Some(entry), - inputs, - }); - world.take_db(); - world.map_shadow_by_id(world.main(), main).unwrap(); + world + .map_shadow_by_id( + markdown_id.join("typst.toml"), + Bytes::from_string(include_str!("markdown-typst.toml")), + ) + .map_err(|err| format!("cannot map markdown-typst.toml: {err:?}"))?; + world + .map_shadow_by_id( + markdown_id, + Bytes::from_string(include_str!("markdown.typ")), + ) + .map_err(|err| format!("cannot map markdown.typ: {err:?}"))?; - let document = typst::compile(&world).output; - let document = document.map_err(|diagnostics| { - let mut err = String::new(); - let _ = write!(err, "compiling node: "); - let write_span = |span: typst_syntax::Span, err: &mut String| { - let file = span.id().map(|id| match id.package() { - Some(package) if WorkspaceResolver::is_package_file(id) => { - format!("{package}:{}", unix_slash(id.vpath().as_rooted_path())) - } - Some(_) | None => unix_slash(id.vpath().as_rooted_path()), - }); - let range = world.range(span); - match (file, range) { - (Some(file), Some(range)) => { - let _ = write!(err, "{file:?}:{range:?}"); - } - (Some(file), None) => { - let _ = write!(err, "{file:?}"); - } - (None, Some(range)) => { - let _ = write!(err, "{range:?}"); - } - _ => { - let _ = write!(err, "unknown location"); - } - } - }; + world + .map_shadow( + wrap_main_path.as_path(), + Bytes::from_string(format!( + r#" + #import "@local/markdown:0.1.0": md-doc + #show: md-doc + #include {:?} + "#, + current.vpath().as_rooted_path(), + )), + ) + .map_err(|err| format!("cannot map source for main file: {err:?}"))?; - for s in diagnostics.iter() { - match s.severity { - typst::diag::Severity::Error => { - let _ = write!(err, "error: "); - } - typst::diag::Severity::Warning => { - let _ = write!(err, "warning: "); - } - } - - err.push_str(&s.message); - err.push_str(" at "); - write_span(s.span, &mut err); - - for hint in s.hints.iter() { - err.push_str("\nHint: "); - err.push_str(hint); - } - - for trace in &s.trace { - write!(err, "\nTrace: {} at ", trace.v).unwrap(); - write_span(trace.span, &mut err); - } - - err.push('\n'); - } - - err - })?; - - let svg_payload = typst_svg::svg_merged(&document, Abs::zero()); - - if let Some(assets_path) = &self.feat.assets_path { - let file_name = assets_path.join(format!("{}_{:?}.svg", self.assets_numbering, theme)); - if let Err(e) = std::fs::write(&file_name, &svg_payload) { - return Err(format!("failed to write SVG to file: {}", e).into()); - } - Ok(file_name.to_string_lossy().to_string()) - } else { - Ok(base64::engine::general_purpose::STANDARD.encode(svg_payload)) - } - } - - fn char(arg: char) -> Result { - Ok(Value::Content(arg.into())) - } - - fn str(node: &SyntaxNode) -> Result { - Ok(Value::Content(node.clone().into_text())) - } - - pub fn value(res: Value) -> EcoString { - match res { - Value::None => EcoString::new(), - Value::Content(content) => content, - Value::Str(s) => s, - Value::Image { path, alt } => eco_format!("![{alt}]({path})"), - _ => eco_format!("{res:?}"), - } - } - - fn escape(node: &SyntaxNode) -> Result { - // todo: escape characters - Self::str(node) - } - - fn shorthand(node: &SyntaxNode) -> Result { - // todo: shorthands - Self::str(node) - } - - fn strong(&mut self, node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - - let strong = node.cast::().unwrap(); - s.push_str("**"); - s.push_str(&Self::value(self.eval(strong.body().to_untyped())?)); - s.push_str("**"); - - Ok(Value::Content(s)) - } - - fn emph(&mut self, node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - let emph = node.cast::().unwrap(); - s.push('_'); - s.push_str(&Self::value(self.eval(emph.body().to_untyped())?)); - s.push('_'); - Ok(Value::Content(s)) - } - - fn heading(&mut self, node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - let heading = node.cast::().unwrap(); - let level = heading.depth(); - for _ in 0..level.get() { - s.push('#'); - } - s.push(' '); - s.push_str(&Self::value(self.eval(heading.body().to_untyped())?)); - Ok(Value::Content(s)) - } - - fn raw(node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - let raw = node.cast::().unwrap(); - - // Raw codes with typlite language will not be treated as a code block but - // directly output into the Markdown result. - if let Some(lang) = raw.lang() { - if &EcoString::from("typlite") == lang.get() { - for line in raw.lines() { - s.push_str(&Self::value(Self::str(line.to_untyped())?)); - s.push('\n'); - } - return Ok(Value::Content(s)); - } - } - - if raw.block() { - s.push_str(&Self::value(Self::str(node)?)); - return Ok(Value::Content(s)); - } - s.push('`'); - for line in raw.lines() { - s.push_str(&Self::value(Self::str(line.to_untyped())?)); - } - s.push('`'); - Ok(Value::Content(s)) - } - - fn link(&mut self, node: &SyntaxNode) -> Result { - // GFM supports autolinks - if self.feat.gfm { - // return Self::str(node, s); - return Self::str(node); - } - let mut s = EcoString::new(); - s.push('['); - s.push_str(&Self::value(Self::str(node)?)); - s.push(']'); - s.push('('); - s.push_str(&Self::value(Self::str(node)?)); - s.push(')'); - - Ok(Value::Content(s)) - } - - fn label(_node: &SyntaxNode) -> Result { - Result::Ok(Value::None) - } - - fn label_ref(node: &SyntaxNode) -> Result { - Self::str(node) - } - - fn ref_marker(node: &SyntaxNode) -> Result { - Self::str(node) - } - - fn list_item(&mut self, node: &SyntaxNode) -> Result { - let mut s = EcoString::new(); - - let list_item = node.cast::().unwrap(); - - s.push_str("- "); - if self.feat.annotate_elem { - let _ = write!(s, "", self.list_depth); - self.list_depth += 1; - } - s.push_str(&Self::value(self.eval(list_item.body().to_untyped())?)); - if self.feat.annotate_elem { - self.list_depth -= 1; - let _ = write!(s, "", self.list_depth); - } - - Ok(Value::Content(s)) - } - - fn enum_item(&mut self, node: &SyntaxNode) -> Result { - let enum_item = node.cast::().unwrap(); - - let body = Self::value(self.eval(enum_item.body().to_untyped())?); - - let s = if let Some(num) = enum_item.number() { - eco_format!("{num}. ") - } else { - "1. ".into() - }; - - Ok(Value::Content(eco_format!("{s}{body}"))) - } - - fn term_item(&mut self, node: &SyntaxNode) -> Result { - self.reduce(node) - } - - fn equation(&mut self, node: &SyntaxNode) -> Result { - let equation: ast::Equation = node.cast().unwrap(); - - if self.feat.remove_html { - return self.to_raw_block(node, !equation.block()); - } - - self.render(&SyntaxNode::leaf(Text, ""), node, !equation.block()) - } - - fn let_binding(&self, node: &SyntaxNode) -> Result { - let _ = node; - - Ok(Value::None) - } - - fn field_access(&self, node: &SyntaxNode) -> Result { - let _ = node; - - Ok(Value::None) - } - - fn func_call(&mut self, node: &SyntaxNode) -> Result { - let c: ast::FuncCall = node.cast().unwrap(); - - let callee = match c.callee() { - ast::Expr::Ident(callee) => self.scopes.get(callee.get()), - ast::Expr::FieldAccess(..) => return Ok(Value::None), - _ => return Ok(Value::None), - }?; - - let Value::RawFunc(func) = callee else { - return Err("callee is not a function")?; - }; - - func(Args::new(self, c.args())) - } - - fn contextual(&mut self, node: &SyntaxNode) -> Result { - if self.feat.remove_html { - return self.to_raw_block(node, false); - } - // Trim the last `#` in the prepend code. (#context) - self.prepend_code = self.prepend_code.trim_end_matches('#').into(); - self.render( - &SyntaxNode::leaf(node.kind(), self.prepend_code.clone()), - node, - false, - ) - } - - fn include(&self, node: &SyntaxNode) -> Result { - let include: ast::ModuleInclude = node.cast().unwrap(); - - let path = include.source(); - let src = - tinymist_analysis::syntax::find_source_by_expr(self.world.as_ref(), self.current, path) - .ok_or_else(|| format!("failed to find source on path {path:?}"))?; - - self.clone().sub_file(src).map(Value::Content) - } - - fn sub_file(mut self, src: Source) -> Result { - self.current = src.id(); - self.convert(src.root()) - } -} - -struct WrapCode<'a>(&'a str, bool); - -impl fmt::Display for WrapCode<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let is_markup = self.1; - if is_markup { - f.write_str("#[")?; - } else { - f.write_str("#{")?; - } - f.write_str(self.0)?; - if is_markup { - f.write_str("]") - } else { - f.write_str("}") - } + let base = typst::compile(&world) + .output + .map_err(|err| format!("convert source for main file: {err:?}"))?; + Ok(MarkdownDocument { + base, + feat: self.feat, + ast_cache: RefCell::new(None), + }) } } diff --git a/crates/typlite/src/library.rs b/crates/typlite/src/library.rs index f914b98c..fb9fc342 100644 --- a/crates/typlite/src/library.rs +++ b/crates/typlite/src/library.rs @@ -1,5 +1,7 @@ //! # Typlite Library +use crate::{scopes::Scopes, tinymist_std::typst::diag::EcoString, worker::TypliteWorker}; + use super::*; use ecow::eco_format; use typst_syntax::{ast, SyntaxKind, SyntaxNode}; diff --git a/crates/typlite/src/main.rs b/crates/typlite/src/main.rs index 7e0ea6b3..89e21cb6 100644 --- a/crates/typlite/src/main.rs +++ b/crates/typlite/src/main.rs @@ -1,6 +1,7 @@ #![doc = include_str!("../README.md")] use std::{ + io::Write, path::{Path, PathBuf}, sync::Arc, }; @@ -8,7 +9,7 @@ use std::{ use clap::Parser; use ecow::{eco_format, EcoString}; use tinymist_project::WorldProvider; -use typlite::{value::*, TypliteFeat}; +use typlite::{common::Format, value::*, TypliteFeat}; use typlite::{CompileOnceArgs, Typlite}; /// Common arguments of compile, watch, and query. @@ -17,19 +18,13 @@ pub struct CompileArgs { #[clap(flatten)] pub compile: CompileOnceArgs, - /// Path to output file - #[clap(value_name = "OUTPUT")] - pub output: Option, + /// Path to output file(s) + #[clap(value_name = "OUTPUT", action = clap::ArgAction::Append)] + pub outputs: Vec, /// Configures the path of assets directory #[clap(long, default_value = None, value_name = "ASSETS_PATH")] pub assets_path: Option, - - /// Configure the path to the assets' corresponding source code directory. - /// When the path is specified, typlite adds a href to jump to the source - /// code in the exported asset. - #[clap(long, default_value = None, value_name = "ASSETS_SRC_PATH")] - pub assets_src_path: Option, } fn main() -> typlite::Result<()> { @@ -41,11 +36,16 @@ fn main() -> typlite::Result<()> { .input .as_ref() .ok_or("Missing required argument: INPUT")?; - let output = match args.output { - Some(stdout_path) if stdout_path == "-" => None, - Some(output_path) => Some(PathBuf::from(output_path)), - None => Some(Path::new(input).with_extension("md")), + + let outputs = if args.outputs.is_empty() { + vec![Path::new(input) + .with_extension("md") + .to_string_lossy() + .to_string()] + } else { + args.outputs.clone() }; + let assets_path = match args.assets_path { Some(assets_path) => { let path = PathBuf::from(assets_path); @@ -58,18 +58,6 @@ fn main() -> typlite::Result<()> { } None => None, }; - let assets_src_path = match args.assets_src_path { - Some(assets_src_path) => { - let path = PathBuf::from(assets_src_path); - if !path.exists() { - if let Err(e) = std::fs::create_dir_all(&path) { - return Err(format!("failed to create assets' src directory: {}", e).into()); - } - } - Some(path) - } - None => None, - }; let universe = args.compile.resolve().map_err(|err| format!("{err:?}"))?; let world = universe.snapshot(); @@ -77,18 +65,60 @@ fn main() -> typlite::Result<()> { let converter = Typlite::new(Arc::new(world)) .with_library(lib()) .with_feature(TypliteFeat { - assets_path, - assets_src_path, + assets_path: assets_path.clone(), ..Default::default() }); - let conv = converter.convert(); + let doc = match converter.convert_doc() { + Ok(doc) => doc, + Err(err) => return Err(format!("failed to convert document: {err}").into()), + }; - match (conv, output) { - (Ok(conv), None) => println!("{}", conv), - (Ok(conv), Some(output)) => std::fs::write(output, conv.as_str()).unwrap(), - (Err(err), ..) => { - eprintln!("{err}"); - std::process::exit(1); + for output_path in &outputs { + let is_stdout = output_path == "-"; + let output = if is_stdout { + None + } else { + Some(PathBuf::from(output_path)) + }; + + let format = match &output { + Some(output) if output.extension() == Some(std::ffi::OsStr::new("tex")) => { + Format::LaTeX + } + Some(output) if output.extension() == Some(std::ffi::OsStr::new("docx")) => { + Format::Docx + } + _ => Format::Md, + }; + + match format { + Format::Docx => todo!(), + Format::LaTeX => todo!(), + Format::Md => { + let result = doc.to_md_string(); + match (result, output) { + (Ok(content), None) => { + std::io::stdout() + .write_all(content.as_str().as_bytes()) + .unwrap(); + } + (Ok(content), Some(output)) => { + if let Err(err) = std::fs::write(&output, content.as_str()) { + eprintln!( + "failed to write Markdown file {}: {}", + output.display(), + err + ); + continue; + } + println!("Generated Markdown file: {}", output.display()); + } + (Err(err), _) => { + eprintln!("Error converting to Markdown for {}: {}", output_path, err); + continue; + } + } + } } } diff --git a/crates/typlite/src/markdown-typst.toml b/crates/typlite/src/markdown-typst.toml new file mode 100644 index 00000000..d82c0bb6 --- /dev/null +++ b/crates/typlite/src/markdown-typst.toml @@ -0,0 +1,5 @@ +[package] +name = "markdown" +version = "0.1.0" +entrypoint = "lib.typ" +description = "Markdown support for typst." diff --git a/crates/typlite/src/markdown.typ b/crates/typlite/src/markdown.typ new file mode 100644 index 00000000..2fe9def6 --- /dev/null +++ b/crates/typlite/src/markdown.typ @@ -0,0 +1,151 @@ +#let bool-str(x) = { + if x { + "true" + } else { + "false" + } +} + +// typst doesn't allow things like `typParbreak`. +#let md-parbreak = html.elem("m1parbreak", "") +#let md-linebreak = html.elem("m1linebreak", "") +#let md-strong(body, delta: 0) = html.elem("span", html.elem("m1strong", body)) +#let md-emph(body) = html.elem("span", html.elem("m1emph", body)) +#let md-highlight(body) = html.elem("span", html.elem("m1highlight", body)) +#let md-strike(body) = html.elem("span", html.elem("m1strike", body)) +#let md-raw(lang: none, block: false, text) = { + let body = html.elem( + "m1raw", + attrs: ( + lang: if lang == none { + "" + } else { + lang + }, + block: bool-str(block), + text: text, + ), + "", + ) + + if block { + return body + } else { + html.elem("span", body) + } +} +#let md-link(dest: none, body) = html.elem( + "span", + html.elem( + "m1link", + attrs: (dest: dest), + body, + ), +) +#let md-label(dest: none, body) = html.elem( + "m1label", + attrs: (dest: dest), + body, +) +#let md-ref(body) = html.elem( + "span", + html.elem( + "m1ref", + body, + ), +) +#let md-heading(level: int, body) = html.elem( + "m1heading", + attrs: (level: str(level)), + box(body), +) +#let md-outline = html.elem.with("m1outline") +#let md-outline-entry(level: int, body) = html.elem( + "m1outentry", + attrs: (level: str(level)), + body, +) +#let md-quote(attribution: none, body) = html.elem( + "m1quote", + attrs: (attribution: attribution), + body, +) +#let md-table(it) = html.elem( + "m1table", + it, +) +#let md-grid(columns: auto, ..children) = html.elem( + "m1grid", + table(columns: columns, ..children.pos().map(it => table.cell(it))), +) +#let md-image(src: "", alt: none) = html.elem( + "m1image", + attrs: ( + src: src, + alt: if alt == none { + "" + } else { + alt + }, + ), + "", +) +#let md-figure(body, caption: none) = html.elem( + "m1figure", + attrs: ( + caption: if caption == none { + "" + } else { + if caption.body.func() == text { + caption.body.text + } else { + "" + } + }, + ), + body, +) + +#let if-not-paged(it, act) = { + if target() == "html" { + act + } else { + it + } +} + +#let md-doc(body) = context { + // distinguish parbreak from

tag + show parbreak: it => if-not-paged(it, md-parbreak) + show strong: it => if-not-paged(it, md-strong(it.body, delta: it.delta)) + show emph: it => if-not-paged(it, md-emph(it.body)) + show highlight: it => if-not-paged(it, md-highlight(it)) + show strike: it => if-not-paged(it, md-strike(it)) + // todo: icc? + show image: it => if-not-paged(it, md-image(src: it.source, alt: it.alt)) + + show raw: it => if-not-paged(it, md-raw(lang: it.lang, block: it.block, it.text)) + show link: it => if-not-paged(it, md-link(dest: it.dest, it.body)) + show ref: it => if-not-paged(it, md-ref(it)) + + show heading: it => if-not-paged(it, md-heading(level: it.level, it.body)) + show outline: it => if-not-paged(it, md-outline(it)) + show outline.entry: it => if-not-paged(it, md-outline-entry(level: it.level, it.element)) + show quote: it => if-not-paged(it, md-quote(attribution: it.attribution, it.body)) + show table: it => if-not-paged(it, md-table(it)) + show grid: it => if-not-paged(it, md-grid(columns: it.columns, ..it.children)) + + show math.equation.where(block: false): it => if-not-paged( + it, + html.elem("m1eqinline", html.frame(box(inset: 0.5em, it))), + ) + show math.equation.where(block: true): it => if-not-paged( + it, + html.elem("m1eqblock", html.frame(block(inset: 0.5em, it))), + ) + + show linebreak: it => if-not-paged(it, md-linebreak) + show figure: it => if-not-paged(it, md-figure(it.body, caption: it.caption)) + + html.elem("m1document", body) +} diff --git a/crates/typlite/src/parser/core.rs b/crates/typlite/src/parser/core.rs new file mode 100644 index 00000000..8f48972b --- /dev/null +++ b/crates/typlite/src/parser/core.rs @@ -0,0 +1,293 @@ +//! HTML parser core, containing main structures and general parsing logic + +use cmark_writer::ast::{HtmlAttribute, HtmlElement as CmarkHtmlElement, Node}; +use cmark_writer::CustomNode; +use typst::html::{tag, HtmlElement, HtmlNode}; + +use crate::attributes::{HeadingAttr, RawAttr, TypliteAttrsParser}; +use crate::common::ListState; +use crate::tags::md_tag; +use crate::Result; +use crate::TypliteFeat; + +use super::{inline::InlineParser, list::ListParser, media::MediaParser, table::TableParser}; + +/// HTML to AST parser implementation +pub struct HtmlToAstParser { + pub feat: TypliteFeat, + pub list_state: Option, + pub list_level: usize, + pub blocks: Vec, + pub inline_buffer: Vec, +} + +impl HtmlToAstParser { + pub fn new(feat: TypliteFeat) -> Self { + Self { + feat, + list_level: 0, + list_state: None, + blocks: Vec::new(), + inline_buffer: Vec::new(), + } + } + + pub fn convert_element(&mut self, element: &HtmlElement) -> Result<()> { + match element.tag { + tag::head => Ok(()), + + tag::html | tag::body | md_tag::doc => { + self.convert_children(element)?; + Ok(()) + } + + md_tag::parbreak => { + self.flush_inline_buffer(); + Ok(()) + } + + md_tag::heading => { + self.flush_inline_buffer(); + let attrs = HeadingAttr::parse(&element.attrs)?; + self.convert_children(element)?; + self.flush_inline_buffer_as_block(|content| { + Node::heading(attrs.level as u8 + 1, content) + }); + Ok(()) + } + + tag::ol => { + self.flush_inline_buffer(); + self.list_level += 1; + let items = ListParser::convert_list(self, element); + self.list_level -= 1; + self.blocks.push(Node::OrderedList { + start: 1, + items: items?, + }); + Ok(()) + } + + tag::ul => { + self.flush_inline_buffer(); + self.list_level += 1; + let items = ListParser::convert_list(self, element); + self.list_level -= 1; + self.blocks.push(Node::UnorderedList(items?)); + Ok(()) + } + + md_tag::raw => { + let attrs = RawAttr::parse(&element.attrs)?; + if attrs.block { + self.flush_inline_buffer(); + self.blocks + .push(Node::code_block(Some(attrs.lang.into()), attrs.text.into())); + } else { + self.inline_buffer.push(Node::InlineCode(attrs.text.into())); + } + Ok(()) + } + + md_tag::quote => { + self.flush_inline_buffer(); + self.convert_children(element)?; + self.flush_inline_buffer_as_block(|content| { + Node::BlockQuote(vec![Node::Paragraph(content)]) + }); + Ok(()) + } + + md_tag::figure => InlineParser::convert_figure(self, element), + + tag::p | tag::span => { + self.convert_children(element)?; + Ok(()) + } + + tag::strong | md_tag::strong => InlineParser::convert_strong(self, element), + + tag::em | md_tag::emph => InlineParser::convert_emphasis(self, element), + + md_tag::highlight => InlineParser::convert_highlight(self, element), + + md_tag::strike => InlineParser::convert_strikethrough(self, element), + + md_tag::link => InlineParser::convert_link(self, element), + + md_tag::image => InlineParser::convert_image(self, element), + + md_tag::linebreak => { + self.inline_buffer.push(Node::HardBreak); + Ok(()) + } + + md_tag::table | md_tag::grid => { + self.flush_inline_buffer(); + if let Some(table) = TableParser::convert_table(self, element)? { + self.blocks.push(table); + } + Ok(()) + } + + md_tag::math_equation_inline | md_tag::math_equation_block => { + if element.tag == md_tag::math_equation_block { + self.flush_inline_buffer(); + } + self.convert_children(element)?; + if element.tag == md_tag::math_equation_block { + self.flush_inline_buffer(); + } + Ok(()) + } + + _ => { + let tag_name = element.tag.resolve().to_string(); + + if !tag_name.starts_with("m1") { + let html_element = self.create_html_element(element)?; + self.inline_buffer.push(html_element); + } else { + self.convert_children(element)?; + } + Ok(()) + } + } + } + + /// Create a CommonMark HTML element from the given HTML element + pub(crate) fn create_html_element(&mut self, element: &HtmlElement) -> Result { + let attributes = element + .attrs + .0 + .iter() + .map(|(name, value)| HtmlAttribute { + name: name.to_string(), + value: value.to_string(), + }) + .collect(); + + let mut children = Vec::new(); + self.convert_children_into(&mut children, element)?; + + Ok(Node::HtmlElement(CmarkHtmlElement { + tag: element.tag.resolve().to_string(), + attributes, + children, + self_closing: element.children.is_empty(), + })) + } + + pub fn flush_inline_buffer(&mut self) { + if !self.inline_buffer.is_empty() { + self.blocks + .push(Node::Paragraph(std::mem::take(&mut self.inline_buffer))); + } + } + + pub fn flush_inline_buffer_as_block(&mut self, make_block: impl FnOnce(Vec) -> Node) { + if !self.inline_buffer.is_empty() { + self.blocks + .push(make_block(std::mem::take(&mut self.inline_buffer))); + } + } + + pub fn convert_children(&mut self, element: &HtmlElement) -> Result<()> { + for child in &element.children { + match child { + HtmlNode::Text(text, _) => { + self.inline_buffer + .push(Node::Text(text.as_str().to_string())); + } + HtmlNode::Element(element) => { + self.convert_element(element)?; + } + HtmlNode::Frame(frame) => { + self.inline_buffer + .push(MediaParser::convert_frame(self, frame)); + } + _ => {} + } + } + Ok(()) + } + + pub fn convert_children_into( + &mut self, + target: &mut Vec, + element: &HtmlElement, + ) -> Result<()> { + let prev_buffer = std::mem::take(&mut self.inline_buffer); + self.convert_children(element)?; + target.append(&mut self.inline_buffer); + self.inline_buffer = prev_buffer; + Ok(()) + } + + pub(crate) fn begin_list(&mut self) { + if self.feat.annotate_elem { + self.inline_buffer + .push(Node::Custom(Box::new(Comment(format!( + "typlite:begin:list-item {}", + self.list_level - 1 + ))))) + } + } + + pub(crate) fn end_list(&mut self) { + if self.feat.annotate_elem { + self.inline_buffer + .push(Node::Custom(Box::new(Comment(format!( + "typlite:end:list-item {}", + self.list_level - 1 + ))))) + } + } +} + +#[derive(Debug, Clone)] +struct Comment(String); + +impl CustomNode for Comment { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn write( + &self, + writer: &mut dyn cmark_writer::CustomNodeWriter, + ) -> cmark_writer::WriteResult<()> { + writer.write_str("")?; + Ok(()) + } + + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } + + fn eq_box(&self, other: &dyn CustomNode) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.0 == other.0 + } else { + false + } + } + + fn is_block(&self) -> bool { + false + } +} + +impl HtmlToAstParser { + pub fn parse(mut self, root: &HtmlElement) -> Result { + self.blocks.clear(); + self.inline_buffer.clear(); + + self.convert_element(root)?; + self.flush_inline_buffer(); + + Ok(Node::Document(self.blocks)) + } +} diff --git a/crates/typlite/src/parser/inline.rs b/crates/typlite/src/parser/inline.rs new file mode 100644 index 00000000..af3ae8d0 --- /dev/null +++ b/crates/typlite/src/parser/inline.rs @@ -0,0 +1,98 @@ +//! Inline element processing module, handles text and inline style elements + +use cmark_writer::ast::Node; +use typst::html::HtmlElement; + +use crate::attributes::{FigureAttr, ImageAttr, LinkAttr, TypliteAttrsParser}; +use crate::common::{FigureNode, HighlightNode}; +use crate::Result; + +use super::core::HtmlToAstParser; + +/// Inline style element parser +pub struct InlineParser; + +impl InlineParser { + /// Convert strong emphasis element + pub fn convert_strong(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + let mut content = Vec::new(); + parser.convert_children_into(&mut content, element)?; + parser.inline_buffer.push(Node::Strong(content)); + Ok(()) + } + + /// Convert emphasis element + pub fn convert_emphasis(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + let mut content = Vec::new(); + parser.convert_children_into(&mut content, element)?; + parser.inline_buffer.push(Node::Emphasis(content)); + Ok(()) + } + + /// Convert highlight element + pub fn convert_highlight(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + let mut content = Vec::new(); + parser.convert_children_into(&mut content, element)?; + parser + .inline_buffer + .push(Node::Custom(Box::new(HighlightNode { content }))); + Ok(()) + } + + /// Convert strikethrough element + pub fn convert_strikethrough( + parser: &mut HtmlToAstParser, + element: &HtmlElement, + ) -> Result<()> { + let mut content = Vec::new(); + parser.convert_children_into(&mut content, element)?; + parser.inline_buffer.push(Node::Strikethrough(content)); + Ok(()) + } + + /// Convert link element + pub fn convert_link(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + let attrs = LinkAttr::parse(&element.attrs)?; + let mut content = Vec::new(); + parser.convert_children_into(&mut content, element)?; + parser.inline_buffer.push(Node::Link { + url: attrs.dest.into(), + title: None, + content, + }); + Ok(()) + } + + /// Convert image element + pub fn convert_image(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + let attrs = ImageAttr::parse(&element.attrs)?; + let src = attrs.src.as_str(); + parser.inline_buffer.push(Node::Image { + url: src.to_string(), + title: None, + alt: vec![Node::Text(attrs.alt.into())], + }); + Ok(()) + } + + /// Convert figure element + pub fn convert_figure(parser: &mut HtmlToAstParser, element: &HtmlElement) -> Result<()> { + parser.flush_inline_buffer(); + + // Parse figure attributes to extract caption + let attrs = FigureAttr::parse(&element.attrs)?; + let caption = attrs.caption.to_string(); + + // Find image and body content + let mut body_content = Vec::new(); + parser.convert_children_into(&mut body_content, element)?; + let body = Box::new(Node::Paragraph(body_content)); + + // Create figure node using generic definition + parser + .blocks + .push(Node::Custom(Box::new(FigureNode { body, caption }))); + + Ok(()) + } +} diff --git a/crates/typlite/src/parser/list.rs b/crates/typlite/src/parser/list.rs new file mode 100644 index 00000000..37eb9c56 --- /dev/null +++ b/crates/typlite/src/parser/list.rs @@ -0,0 +1,88 @@ +//! HTML list parsing module, handling conversion of ordered and unordered lists + +use cmark_writer::ast::{ListItem, Node}; +use typst::html::{tag, HtmlElement, HtmlNode}; + +use crate::attributes::{ListItemAttr, TypliteAttrsParser}; +use crate::Result; + +use super::core::HtmlToAstParser; + +/// List parser +pub struct ListParser; + +impl ListParser { + /// Convert HTML list to ListItem vector + pub fn convert_list( + parser: &mut HtmlToAstParser, + element: &HtmlElement, + ) -> Result> { + let mut all_items = Vec::new(); + let prev_buffer = std::mem::take(&mut parser.inline_buffer); + let is_ordered = element.tag == tag::ol; + + for child in &element.children { + if let HtmlNode::Element(li) = child { + if li.tag == tag::li { + let attrs = ListItemAttr::parse(&li.attrs)?; + let mut item_content = Vec::new(); + + parser.begin_list(); + + for li_child in &li.children { + match li_child { + HtmlNode::Text(text, _) => { + parser + .inline_buffer + .push(Node::Text(text.as_str().to_string())); + } + HtmlNode::Element(child_elem) => { + if child_elem.tag == tag::ul || child_elem.tag == tag::ol { + // Handle nested lists + if !parser.inline_buffer.is_empty() { + item_content.push(Node::Paragraph(std::mem::take( + &mut parser.inline_buffer, + ))); + } + + let items = Self::convert_list(parser, child_elem)?; + if child_elem.tag == tag::ul { + item_content.push(Node::UnorderedList(items)); + } else { + item_content.push(Node::OrderedList { start: 1, items }); + } + } else { + parser.convert_element(child_elem)?; + } + } + _ => {} + } + } + + parser.end_list(); + + if !parser.inline_buffer.is_empty() { + item_content + .push(Node::Paragraph(std::mem::take(&mut parser.inline_buffer))); + } + + if !item_content.is_empty() { + if is_ordered { + all_items.push(ListItem::Ordered { + number: attrs.value, + content: item_content, + }); + } else { + all_items.push(ListItem::Unordered { + content: item_content, + }); + } + } + } + } + } + + parser.inline_buffer = prev_buffer; + Ok(all_items) + } +} diff --git a/crates/typlite/src/parser/media.rs b/crates/typlite/src/parser/media.rs new file mode 100644 index 00000000..8a8d823b --- /dev/null +++ b/crates/typlite/src/parser/media.rs @@ -0,0 +1,74 @@ +//! Media processing module, handles images, SVG and Frame media elements + +use base64::Engine; +use cmark_writer::ast::{HtmlAttribute, HtmlElement as CmarkHtmlElement, Node}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use typst::layout::Frame; + +use crate::common::ExternalFrameNode; + +use super::core::HtmlToAstParser; + +/// Media content parser +pub struct MediaParser; + +impl MediaParser { + /// Convert Typst frame to CommonMark node + pub fn convert_frame(parser: &HtmlToAstParser, frame: &Frame) -> Node { + let svg = typst_svg::svg_frame(frame); + let data = base64::engine::general_purpose::STANDARD.encode(svg.as_bytes()); + + if let Some(assets_path) = &parser.feat.assets_path { + // Use a unique static counter to generate filenames + static FRAME_COUNTER: AtomicUsize = AtomicUsize::new(0); + let file_id = FRAME_COUNTER.fetch_add(1, Ordering::Relaxed); + let file_name = format!("frame_{}.svg", file_id); + let file_path = assets_path.join(&file_name); + + if let Err(e) = std::fs::write(&file_path, svg.as_bytes()) { + if parser.feat.soft_error { + return Self::create_embedded_frame(&data); + } else { + // Construct error node + return Node::HtmlElement(CmarkHtmlElement { + tag: "div".to_string(), + attributes: vec![HtmlAttribute { + name: "class".to_string(), + value: "error".to_string(), + }], + children: vec![Node::Text(format!("Error writing frame to file: {}", e))], + self_closing: false, + }); + } + } + + return Node::Custom(Box::new(ExternalFrameNode { + file_path, + alt_text: "typst-frame".to_string(), + svg_data: data, + })); + } + + // Fall back to embedded mode if no external asset path is specified + Self::create_embedded_frame(&data) + } + + /// Create embedded frame node + fn create_embedded_frame(data: &str) -> Node { + Node::HtmlElement(CmarkHtmlElement { + tag: "img".to_string(), + attributes: vec![ + HtmlAttribute { + name: "alt".to_string(), + value: "typst-block".to_string(), + }, + HtmlAttribute { + name: "src".to_string(), + value: format!("data:image/svg+xml;base64,{data}"), + }, + ], + children: vec![], + self_closing: true, + }) + } +} diff --git a/crates/typlite/src/parser/mod.rs b/crates/typlite/src/parser/mod.rs new file mode 100644 index 00000000..f26b2157 --- /dev/null +++ b/crates/typlite/src/parser/mod.rs @@ -0,0 +1,9 @@ +//! Parser implementation for Typst HTML to CommonMark AST + +mod core; +mod inline; +mod list; +mod media; +mod table; + +pub use core::HtmlToAstParser; diff --git a/crates/typlite/src/parser/table.rs b/crates/typlite/src/parser/table.rs new file mode 100644 index 00000000..ada30daf --- /dev/null +++ b/crates/typlite/src/parser/table.rs @@ -0,0 +1,190 @@ +//! HTML table parsing module, processes the conversion of table elements + +use cmark_writer::ast::Node; +use cmark_writer::gfm::TableAlignment; +use typst::html::{tag, HtmlElement, HtmlNode}; +use typst::utils::PicoStr; + +use crate::tags::md_tag; +use crate::Result; + +use super::core::HtmlToAstParser; + +/// Table parser +pub struct TableParser; + +impl TableParser { + /// Convert HTML table to CommonMark AST + pub fn convert_table( + parser: &mut HtmlToAstParser, + element: &HtmlElement, + ) -> Result> { + // Find the real table element + let real_table_elem = Self::find_real_table_element(element); + + // Process the table (if found) + if let Some(table) = real_table_elem { + // Check if the table contains rowspan or colspan attributes + // If it does, fall back to using HtmlElement + if Self::table_has_complex_cells(table) { + if let Ok(html_node) = parser.create_html_element(table) { + return Ok(Some(html_node)); + } + return Ok(None); + } + + let mut headers = Vec::new(); + let mut rows = Vec::new(); + let mut is_header = true; + + Self::extract_table_content(parser, table, &mut headers, &mut rows, &mut is_header)?; + return Self::create_table_node(headers, rows); + } + + Ok(None) + } + + /// Find the real table element in the HTML structure + fn find_real_table_element(element: &HtmlElement) -> Option<&HtmlElement> { + if element.tag == md_tag::grid { + // For grid: grid -> table -> table + Self::find_table_in_grid(element) + } else { + // For m1table -> table + Self::find_table_direct(element) + } + } + + fn find_table_in_grid(grid_element: &HtmlElement) -> Option<&HtmlElement> { + for child in &grid_element.children { + if let HtmlNode::Element(table_elem) = child { + if table_elem.tag == md_tag::table { + // Find table tag within m1table + for inner_child in &table_elem.children { + if let HtmlNode::Element(inner) = inner_child { + if inner.tag == tag::table { + return Some(inner); + } + } + } + } + } + } + None + } + + fn find_table_direct(element: &HtmlElement) -> Option<&HtmlElement> { + for child in &element.children { + if let HtmlNode::Element(table_elem) = child { + if table_elem.tag == tag::table { + return Some(table_elem); + } + } + } + None + } + + // Extract table content from the table element + fn extract_table_content( + parser: &mut HtmlToAstParser, + table: &HtmlElement, + headers: &mut Vec>, + rows: &mut Vec>>, + is_header: &mut bool, + ) -> Result<()> { + // Process rows in the table + for row_node in &table.children { + if let HtmlNode::Element(row_elem) = row_node { + if row_elem.tag == tag::tr { + let current_row = + Self::process_table_row(parser, row_elem, *is_header, headers)?; + + // After the first row, treat remaining rows as data rows + if *is_header { + *is_header = false; + } else if !current_row.is_empty() { + rows.push(current_row); + } + } + } + } + Ok(()) + } + + fn process_table_row( + parser: &mut HtmlToAstParser, + row_elem: &HtmlElement, + is_header: bool, + headers: &mut Vec>, + ) -> Result>> { + let mut current_row = Vec::new(); + + // Process cells in this row + for cell_node in &row_elem.children { + if let HtmlNode::Element(cell) = cell_node { + if cell.tag == tag::td { + let mut cell_content = Vec::new(); + parser.convert_children_into(&mut cell_content, cell)?; + + // Add to appropriate section + if is_header { + headers.push(cell_content); + } else { + current_row.push(cell_content); + } + } + } + } + + Ok(current_row) + } + + /// Check if the table has complex cells (rowspan/colspan) + fn table_has_complex_cells(table: &HtmlElement) -> bool { + for row_node in &table.children { + if let HtmlNode::Element(row_elem) = row_node { + if row_elem.tag == tag::tr { + for cell_node in &row_elem.children { + if let HtmlNode::Element(cell) = cell_node { + if (cell.tag == tag::td || cell.tag == tag::th) + && cell.attrs.0.iter().any(|(name, _)| { + let name = name.into_inner(); + name == PicoStr::constant("colspan") + || name == PicoStr::constant("rowspan") + }) + { + return true; + } + } + } + } + } + } + false + } + + fn create_table_node( + headers: Vec>, + rows: Vec>>, + ) -> Result> { + // Create alignment array (default to None for all columns) + let alignments = vec![TableAlignment::None; headers.len().max(1)]; + + // If there is content, add the table to blocks + if !headers.is_empty() || !rows.is_empty() { + let flattened_headers = headers.into_iter().flatten().collect(); + let flattened_rows: Vec<_> = rows + .into_iter() + .map(|row| row.into_iter().flatten().collect()) + .collect(); + + return Ok(Some(Node::Table { + headers: flattened_headers, + rows: flattened_rows, + alignments, + })); + } + + Ok(None) + } +} diff --git a/crates/typlite/src/tags.rs b/crates/typlite/src/tags.rs new file mode 100644 index 00000000..b25c8a70 --- /dev/null +++ b/crates/typlite/src/tags.rs @@ -0,0 +1,43 @@ +//! Custom HTML tags used by Typlite + +/// Tag definitions specific to markdown conversion +pub mod md_tag { + use typst::html::HtmlTag; + + macro_rules! tags { + ($($tag:ident -> $name:ident)*) => { + $(#[allow(non_upper_case_globals)] + pub const $tag: HtmlTag = HtmlTag::constant( + stringify!($name) + );)* + } + } + + tags! { + parbreak -> m1parbreak + linebreak -> m1linebreak + image -> m1image + strong -> m1strong + emph -> m1emph + highlight -> m1highlight + strike -> m1strike + raw -> m1raw + label -> m1label + reference -> m1ref + heading -> m1heading + outline -> m1outline + outline_entry -> m1outentry + quote -> m1quote + table -> m1table + // table_cell -> m1tablecell + grid -> m1grid + // grid_cell -> m1gridcell + figure -> m1figure + + math_equation_inline -> m1eqinline + math_equation_block -> m1eqblock + + doc -> m1document + link -> m1link + } +} diff --git a/crates/typlite/src/tests.rs b/crates/typlite/src/tests.rs index c23ba491..c32b1730 100644 --- a/crates/typlite/src/tests.rs +++ b/crates/typlite/src/tests.rs @@ -1,6 +1,8 @@ use std::sync::OnceLock; use regex::Regex; +use typst::html::{HtmlNode, HtmlTag}; +use typst_syntax::Span; use super::*; @@ -24,22 +26,49 @@ fn convert_docs() { }); } -fn conv(world: LspWorld, for_docs: bool) -> EcoString { +fn conv(world: LspWorld, for_docs: bool) -> String { let converter = Typlite::new(Arc::new(world)).with_feature(TypliteFeat { annotate_elem: for_docs, ..Default::default() }); - match converter.convert() { - Ok(conv) => { - static REG: OnceLock = OnceLock::new(); - let reg = - REG.get_or_init(|| Regex::new(r#"data:image/svg\+xml;base64,([^"]+)"#).unwrap()); - let res = reg.replace_all(&conv, |_captures: ®ex::Captures| { - "data:image-hash/svg+xml;base64,redacted" - }); + let doc = match converter.convert_doc() { + Ok(doc) => doc, + Err(err) => return format!("failed to convert to markdown: {err}"), + }; - res.into() + let repr = typst_html::html(&redact(doc.base.clone())).unwrap(); + let res = doc.to_md_string().unwrap(); + static REG: OnceLock = OnceLock::new(); + let reg = REG.get_or_init(|| Regex::new(r#"data:image/svg\+xml;base64,([^"]+)"#).unwrap()); + let res = reg.replace_all(&res, |_captures: ®ex::Captures| { + "data:image-hash/svg+xml;base64,redacted" + }); + + [repr.as_str(), res.as_ref()].join("\n=====\n") +} + +fn redact(doc: HtmlDocument) -> HtmlDocument { + let mut doc = doc; + for node in doc.root.children.iter_mut() { + redact_node(node); + } + doc +} + +fn redact_node(node: &mut HtmlNode) { + match node { + HtmlNode::Element(elem) => { + if elem.tag == HtmlTag::constant("svg") { + elem.children = vec![]; + } else { + for child in elem.children.iter_mut() { + redact_node(child); + } + } } - Err(err) => format!("failed to convert to markdown: {err}").into(), + HtmlNode::Frame(_) => { + *node = HtmlNode::Text("redacted-frame".into(), Span::detached()); + } + _ => {} } } diff --git a/crates/typlite/src/value.rs b/crates/typlite/src/value.rs index 1c20751a..675f014c 100644 --- a/crates/typlite/src/value.rs +++ b/crates/typlite/src/value.rs @@ -1,6 +1,12 @@ //! # Typlite Values +use crate::tinymist_std::typst::diag::EcoString; +use crate::worker::TypliteWorker; use core::fmt; +use typst_syntax::{ + ast::{self, AstNode}, + SyntaxNode, +}; use crate::*; diff --git a/crates/typlite/src/worker.rs b/crates/typlite/src/worker.rs new file mode 100644 index 00000000..d79e0f56 --- /dev/null +++ b/crates/typlite/src/worker.rs @@ -0,0 +1,824 @@ +use std::fmt::{self, Write}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, LazyLock}; + +use base64::Engine; +use ecow::{eco_format, EcoString}; +use tinymist_analysis; +use tinymist_project::base::ShadowApi; +use tinymist_project::{EntryReader, LspWorld}; +use typst::foundations::{Bytes, Dict, IntoValue}; +use typst::layout::Abs; +use typst::syntax::{FileId, Source, SyntaxKind, SyntaxNode}; +use typst::utils::LazyHash; +use typst::World; +use typst::WorldExt; +use typst_syntax::ast::{self, AstNode}; +use typst_syntax::ast::{Emph, Equation, Heading, Raw, Strong}; + +use crate::scopes::Scopes; +use crate::tinymist_std::path::unix_slash; +use crate::value::{Args, Value}; +use crate::worker::SyntaxKind::Text; +use crate::Result; +use crate::TypliteFeat; +use crate::WorkspaceResolver; + +/// Typlite worker for converting syntax nodes to markdown +#[derive(Clone)] +pub struct TypliteWorker { + pub current: FileId, + pub scopes: Arc>, + pub world: Arc, + pub list_depth: usize, + pub prepend_code: EcoString, + pub assets_numbering: usize, + /// Features for the conversion. + pub feat: TypliteFeat, +} + +impl TypliteWorker { + /// Convert the content to a markdown string. + pub fn convert(&mut self, node: &SyntaxNode) -> Result { + Ok(Self::value(self.eval(node)?)) + } + + /// Eval the content + pub fn eval(&mut self, node: &SyntaxNode) -> Result { + use SyntaxKind::*; + let res = match node.kind() { + RawLang | RawDelim | RawTrimmed => Err("converting clause")?, + + Math | MathIdent | MathAlignPoint | MathDelimited | MathAttach | MathPrimes + | MathFrac | MathRoot | MathShorthand | MathText => Err("converting math node")?, + + // Error nodes + Error => Err(node.clone().into_text().to_string())?, + None | End => Ok(Value::None), + + // Non-leaf nodes + Markup => self.reduce(node), + Code => self.reduce(node), + Equation => self.equation(node), + CodeBlock => { + let code_block: ast::CodeBlock = node.cast().unwrap(); + self.eval(code_block.body().to_untyped()) + } + ContentBlock => { + let content_block: ast::ContentBlock = node.cast().unwrap(); + self.eval(content_block.body().to_untyped()) + } + Parenthesized => { + let parenthesized: ast::Parenthesized = node.cast().unwrap(); + self.eval(parenthesized.expr().to_untyped()) + } + + // Text nodes + Text | Space | Parbreak => Self::str(node), + Linebreak => Self::char('\n'), + + // Semantic nodes + Escape => Self::escape(node), + Shorthand => Self::shorthand(node), + SmartQuote => Self::str(node), + Strong => self.strong(node), + Emph => self.emph(node), + Raw => Self::raw(node), + Link => self.link(node), + Label => Self::label(node), + Ref => Self::label_ref(node), + RefMarker => Self::ref_marker(node), + Heading => self.heading(node), + HeadingMarker => Self::str(node), + ListItem => self.list_item(node), + ListMarker => Self::str(node), + EnumItem => self.enum_item(node), + EnumMarker => Self::str(node), + TermItem => self.term_item(node), + TermMarker => Self::str(node), + + // Punctuation + // Hash => Self::char('#'), + Hash => Ok(Value::None), + LeftBrace => Self::char('{'), + RightBrace => Self::char('}'), + LeftBracket => Self::char('['), + RightBracket => Self::char(']'), + LeftParen => Self::char('('), + RightParen => Self::char(')'), + Comma => Self::char(','), + Semicolon => Ok(Value::None), + Colon => Self::char(':'), + Star => Self::char('*'), + Underscore => Self::char('_'), + Dollar => Self::char('$'), + Plus => Self::char('+'), + Minus => Self::char('-'), + Slash => Self::char('/'), + Hat => Self::char('^'), + Prime => Self::char('\''), + Dot => Self::char('.'), + Eq => Self::char('='), + Lt => Self::char('<'), + Gt => Self::char('>'), + + // Compound punctuation + EqEq => Self::str(node), + ExclEq => Self::str(node), + LtEq => Self::str(node), + GtEq => Self::str(node), + PlusEq => Self::str(node), + HyphEq => Self::str(node), + StarEq => Self::str(node), + SlashEq => Self::str(node), + Dots => Self::str(node), + Arrow => Self::str(node), + Root => Self::str(node), + + // Keywords + Auto => Self::str(node), + Not => Self::str(node), + And => Self::str(node), + Or => Self::str(node), + Let => Self::str(node), + Set => Self::str(node), + Show => Self::str(node), + Context => Self::str(node), + If => Self::str(node), + Else => Self::str(node), + For => Self::str(node), + In => Self::str(node), + While => Self::str(node), + Break => Self::str(node), + Continue => Self::str(node), + Return => Self::str(node), + Import => Self::str(node), + Include => Self::str(node), + As => Self::str(node), + + LetBinding => self.let_binding(node), + FieldAccess => self.field_access(node), + FuncCall => self.func_call(node), + Contextual => self.contextual(node), + + // Clause nodes + Named => Ok(Value::None), + Keyed => Ok(Value::None), + Unary => Ok(Value::None), + Binary => Ok(Value::None), + Spread => Ok(Value::None), + ImportItems => Ok(Value::None), + ImportItemPath => Ok(Value::None), + RenamedImportItem => Ok(Value::None), + Closure => Ok(Value::None), + Args => Ok(Value::None), + Params => Ok(Value::None), + + // Ignored code expressions + Ident => Ok(Value::None), + Bool => Ok(Value::None), + Int => Ok(Value::None), + Float => Ok(Value::None), + Numeric => Ok(Value::None), + Str => Ok(Value::Str({ + let s: ast::Str = node.cast().unwrap(); + s.get() + })), + Array => Ok(Value::None), + Dict => Ok(Value::None), + + // Ignored code expressions + SetRule => Ok(Value::None), + ShowRule => Ok(Value::None), + Destructuring => Ok(Value::None), + DestructAssignment => Ok(Value::None), + + Conditional => Ok(Value::None), + WhileLoop => Ok(Value::None), + ForLoop => Ok(Value::None), + LoopBreak => Ok(Value::None), + LoopContinue => Ok(Value::None), + FuncReturn => Ok(Value::None), + + ModuleImport => Ok(Value::None), + ModuleInclude => self.include(node), + + // Ignored comments + LineComment => Ok(Value::None), + BlockComment => Ok(Value::None), + Shebang => Ok(Value::None), + }; + if res.clone()? == Value::None + && !matches!( + node.kind(), + Ident | Bool | Int | Float | Numeric | Str | Array | Dict + ) + { + self.prepend_code += node.clone().into_text(); + if node.kind() != Hash { + self.prepend_code += "\n" + }; + } + res + } + + fn reduce(&mut self, node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + + for child in node.children() { + s.push_str(&Self::value(self.eval(child)?)); + } + + Ok(Value::Content(s)) + } + + pub fn to_raw_block(&mut self, node: &SyntaxNode, inline: bool) -> Result { + let content = node.clone().into_text(); + + let s = if inline { + let mut s = EcoString::with_capacity(content.len() + 2); + s.push_str("`"); + s.push_str(&content); + s.push_str("`"); + s + } else { + let mut s = EcoString::with_capacity(content.len() + 15); + s.push_str("```"); + let lang = match node.cast::() { + Some(ast::Expr::Text(..) | ast::Expr::Space(..)) => "typ", + Some(..) => "typc", + None => "typ", + }; + s.push_str(lang); + s.push('\n'); + s.push_str(&content); + s.push('\n'); + s.push_str("```"); + s + }; + + Ok(Value::Content(s)) + } + + pub fn render( + &mut self, + prepend_node: &SyntaxNode, + node: &SyntaxNode, + inline: bool, + ) -> Result { + self.assets_numbering += 1; + let prepend_code = prepend_node.clone().into_text(); + let code = node.clone().into_text(); + // if let Some(assets_src_path) = &self.feat.assets_src_path { + // let file_name = assets_src_path + // .join(self.assets_numbering.to_string()) + // .with_extension("typ"); + // if let Err(e) = std::fs::write(&file_name, format!("#{{\n// render_code\n{}\n}}", code)) + // { + // return Err(format!("failed to write code to file: {}", e).into()); + // } + // } + self.render_code(&prepend_code, &code, false, "center", "", inline) + } + + pub fn render_code( + &mut self, + prepend_code: &str, + code: &str, + is_markup: bool, + align: &str, + extra_attrs: &str, + inline: bool, + ) -> Result { + let theme = self.feat.color_theme; + + // let code_file_name = if let Some(assets_src_path) = &self.feat.assets_src_path { + // Some( + // assets_src_path + // .join(self.assets_numbering.to_string()) + // .with_extension("typ"), + // ) + // } else { + // None + // }; + + let code_file_name = None; + + let mut render = |theme| self.render_inner(prepend_code, code, is_markup, theme); + + let mut content = EcoString::new(); + + let inline_attrs = if inline { + r#" style="vertical-align: -0.35em""# + } else { + "" + }; + + let write_error = |content: &mut EcoString, err: &str| { + let err = err.replace("`", r#"\`"#); + let _ = write!(content, "```\nRender Error\n{err}\n```"); + }; + + let write_image = |content: &mut EcoString, + file_name: &std::path::Path, + code_file_name: Option<&PathBuf>, + inline_attrs: &str, + extra_attrs: &str| { + if let Some(code_file_name) = code_file_name { + let _ = write!( + content, + r#""#, + code_file_name.display(), + file_name.display() + ); + } else { + let _ = write!( + content, + r#""#, + file_name.display() + ); + } + }; + + let write_picture = |content: &mut EcoString, + dark_file_name: &std::path::Path, + light_file_name: &std::path::Path, + code_file_name: Option<&PathBuf>, + inline_attrs: &str, + extra_attrs: &str| { + if let Some(code_file_name) = code_file_name { + let _ = write!( + content, + r#""#, + code_file_name.display(), + dark_file_name.display(), + light_file_name.display() + ); + } else { + let _ = write!( + content, + r#""#, + dark_file_name.display(), + light_file_name.display() + ); + } + }; + + match theme { + Some(theme) => { + let data = match render(theme) { + Ok(data) => data, + Err(err) if self.feat.soft_error => { + write_error(&mut content, &err.to_string()); + return Ok(Value::Content(content)); + } + Err(err) => return Err(err), + }; + + if !inline { + let _ = write!(content, r#"

"#); + } + if let Some(assets_path) = &self.feat.assets_path { + let file_name = + assets_path.join(format!("{}_{:?}.svg", self.assets_numbering, theme)); + std::fs::write(&file_name, &data) + .map_err(|e| format!("failed to write SVG to file: {}", e))?; + + write_image( + &mut content, + &file_name, + code_file_name.as_ref(), + inline_attrs, + extra_attrs, + ); + } else { + let _ = write!( + content, + r#""# + ); + } + if !inline { + content.push_str("

"); + } + } + None => { + let dark = match render(crate::ColorTheme::Dark) { + Ok(d) => d, + Err(err) if self.feat.soft_error => { + write_error(&mut content, &err.to_string()); + return Ok(Value::Content(content)); + } + Err(err) => return Err(err), + }; + let light = match render(crate::ColorTheme::Light) { + Ok(l) => l, + Err(err) if self.feat.soft_error => { + write_error(&mut content, &err.to_string()); + return Ok(Value::Content(content)); + } + Err(err) => return Err(err), + }; + + if !inline { + let _ = write!(content, r#"

"#); + } + if let Some(assets_path) = &self.feat.assets_path { + let dark_file_name = assets_path.join(format!( + "{}_{:?}.svg", + self.assets_numbering, + crate::ColorTheme::Dark + )); + let light_file_name = assets_path.join(format!( + "{}_{:?}.svg", + self.assets_numbering, + crate::ColorTheme::Light + )); + + write_picture( + &mut content, + &dark_file_name, + &light_file_name, + code_file_name.as_ref(), + inline_attrs, + extra_attrs, + ); + } else { + let _ = write!( + content, + r#""# + ); + } + if !inline { + content.push_str("

"); + } + } + } + + Ok(Value::Content(content)) + } + + fn render_inner( + &mut self, + prepend_code: &str, + code: &str, + is_markup: bool, + theme: crate::ColorTheme, + ) -> Result { + static DARK_THEME_INPUT: LazyLock>> = LazyLock::new(|| { + Arc::new(LazyHash::new(Dict::from_iter(std::iter::once(( + "x-color-theme".into(), + "dark".into_value(), + ))))) + }); + + let code = WrapCode(code, is_markup); + let inputs = match theme { + crate::ColorTheme::Dark => Some(DARK_THEME_INPUT.clone()), + crate::ColorTheme::Light => None, + }; + let code = eco_format!( + r##"{prepend_code} + #set page(width: auto, height: auto, margin: (y: 0.45em, rest: 0em), fill: none); + #set text(fill: rgb("#c0caf5")) if sys.inputs.at("x-color-theme", default: none) == "dark"; + {code}"## + ); + let main = Bytes::new(code.as_bytes().to_owned()); + + let path = Path::new("__render__.typ"); + let entry = self.world.entry_state().select_in_workspace(path); + let mut world = self.world.task(tinymist_project::TaskInputs { + entry: Some(entry), + inputs, + }); + world.take_db(); + world.map_shadow_by_id(world.main(), main).unwrap(); + + let document = typst::compile(&world).output; + let document = document.map_err(|diagnostics| { + let mut err = String::new(); + let _ = write!(err, "compiling node: "); + let write_span = |span: typst_syntax::Span, err: &mut String| { + let file = span.id().map(|id| match id.package() { + Some(package) if WorkspaceResolver::is_package_file(id) => { + format!("{package}:{}", unix_slash(id.vpath().as_rooted_path())) + } + Some(_) | None => unix_slash(id.vpath().as_rooted_path()), + }); + let range = world.range(span); + match (file, range) { + (Some(file), Some(range)) => { + let _ = write!(err, "{file:?}:{range:?}"); + } + (Some(file), None) => { + let _ = write!(err, "{file:?}"); + } + (None, Some(range)) => { + let _ = write!(err, "{range:?}"); + } + _ => { + let _ = write!(err, "unknown location"); + } + } + }; + + for s in diagnostics.iter() { + match s.severity { + typst::diag::Severity::Error => { + let _ = write!(err, "error: "); + } + typst::diag::Severity::Warning => { + let _ = write!(err, "warning: "); + } + } + + err.push_str(&s.message); + err.push_str(" at "); + write_span(s.span, &mut err); + + for hint in s.hints.iter() { + err.push_str("\nHint: "); + err.push_str(hint); + } + + for trace in &s.trace { + write!(err, "\nTrace: {} at ", trace.v).unwrap(); + write_span(trace.span, &mut err); + } + + err.push('\n'); + } + + err + })?; + + let svg_payload = typst_svg::svg_merged(&document, Abs::zero()); + + if let Some(assets_path) = &self.feat.assets_path { + let file_name = assets_path.join(format!("{}_{:?}.svg", self.assets_numbering, theme)); + if let Err(e) = std::fs::write(&file_name, &svg_payload) { + return Err(format!("failed to write SVG to file: {}", e).into()); + } + Ok(file_name.to_string_lossy().to_string()) + } else { + Ok(base64::engine::general_purpose::STANDARD.encode(svg_payload)) + } + } + + fn char(arg: char) -> Result { + Ok(Value::Content(arg.into())) + } + + fn str(node: &SyntaxNode) -> Result { + Ok(Value::Content(node.clone().into_text())) + } + + pub fn value(res: Value) -> EcoString { + match res { + Value::None => EcoString::new(), + Value::Content(content) => content, + Value::Str(s) => s, + Value::Image { path, alt } => eco_format!("![{alt}]({path})"), + _ => eco_format!("{res:?}"), + } + } + + fn escape(node: &SyntaxNode) -> Result { + // todo: escape characters + Self::str(node) + } + + fn shorthand(node: &SyntaxNode) -> Result { + // todo: shorthands + Self::str(node) + } + + fn strong(&mut self, node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + + let strong = node.cast::().unwrap(); + s.push_str("**"); + s.push_str(&Self::value(self.eval(strong.body().to_untyped())?)); + s.push_str("**"); + + Ok(Value::Content(s)) + } + + fn emph(&mut self, node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + let emph = node.cast::().unwrap(); + s.push('_'); + s.push_str(&Self::value(self.eval(emph.body().to_untyped())?)); + s.push('_'); + Ok(Value::Content(s)) + } + + fn heading(&mut self, node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + let heading = node.cast::().unwrap(); + let level = heading.depth(); + for _ in 0..level.get() { + s.push('#'); + } + s.push(' '); + s.push_str(&Self::value(self.eval(heading.body().to_untyped())?)); + Ok(Value::Content(s)) + } + + fn raw(node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + let raw = node.cast::().unwrap(); + + // Raw codes with typlite language will not be treated as a code block but + // directly output into the Markdown result. + if let Some(lang) = raw.lang() { + if &EcoString::from("typlite") == lang.get() { + for line in raw.lines() { + s.push_str(&Self::value(Self::str(line.to_untyped())?)); + s.push('\n'); + } + return Ok(Value::Content(s)); + } + } + + if raw.block() { + s.push_str(&Self::value(Self::str(node)?)); + return Ok(Value::Content(s)); + } + s.push('`'); + for line in raw.lines() { + s.push_str(&Self::value(Self::str(line.to_untyped())?)); + } + s.push('`'); + Ok(Value::Content(s)) + } + + fn link(&mut self, node: &SyntaxNode) -> Result { + // GFM supports autolinks + if self.feat.gfm { + return Self::str(node); + } + let mut s = EcoString::new(); + s.push('['); + s.push_str(&Self::value(Self::str(node)?)); + s.push(']'); + s.push('('); + s.push_str(&Self::value(Self::str(node)?)); + s.push(')'); + + Ok(Value::Content(s)) + } + + fn label(_node: &SyntaxNode) -> Result { + Result::Ok(Value::None) + } + + fn label_ref(node: &SyntaxNode) -> Result { + Self::str(node) + } + + fn ref_marker(node: &SyntaxNode) -> Result { + Self::str(node) + } + + fn list_item(&mut self, node: &SyntaxNode) -> Result { + let mut s = EcoString::new(); + + let list_item = node.cast::().unwrap(); + + for _ in 0..self.list_depth { + s.push_str(" "); + } + + s.push_str("- "); + if self.feat.annotate_elem { + let _ = write!(s, "", self.list_depth); + self.list_depth += 1; + } + s.push_str(&Self::value(self.eval(list_item.body().to_untyped())?)); + if self.feat.annotate_elem { + self.list_depth -= 1; + let _ = write!(s, "", self.list_depth); + } + + Ok(Value::Content(s)) + } + + fn enum_item(&mut self, node: &SyntaxNode) -> Result { + let enum_item = node.cast::().unwrap(); + let mut s = EcoString::new(); + + for _ in 0..self.list_depth { + s.push_str(" "); + } + + if self.feat.annotate_elem { + let _ = write!(s, "", self.list_depth); + self.list_depth += 1; + } + + if let Some(num) = enum_item.number() { + s.push_str(&format!("{}. ", num)); + } else { + s.push_str("1. "); + } + + s.push_str(&Self::value(self.eval(enum_item.body().to_untyped())?)); + + if self.feat.annotate_elem { + self.list_depth -= 1; + let _ = write!(s, "", self.list_depth); + } + + Ok(Value::Content(s)) + } + + fn term_item(&mut self, node: &SyntaxNode) -> Result { + self.reduce(node) + } + + fn equation(&mut self, node: &SyntaxNode) -> Result { + let equation: Equation = node.cast().unwrap(); + + if self.feat.remove_html { + return self.to_raw_block(node, !equation.block()); + } + + self.render(&SyntaxNode::leaf(Text, ""), node, !equation.block()) + } + + fn let_binding(&self, node: &SyntaxNode) -> Result { + let _ = node; + + Ok(Value::None) + } + + fn field_access(&self, node: &SyntaxNode) -> Result { + let _ = node; + + Ok(Value::None) + } + + fn func_call(&mut self, node: &SyntaxNode) -> Result { + let c: ast::FuncCall = node.cast().unwrap(); + + let callee = match c.callee() { + ast::Expr::Ident(callee) => self.scopes.get(callee.get()), + ast::Expr::FieldAccess(..) => return Ok(Value::None), + _ => return Ok(Value::None), + }?; + + let Value::RawFunc(func) = callee else { + return Err("callee is not a function")?; + }; + + func(Args::new(self, c.args())) + } + + fn contextual(&mut self, node: &SyntaxNode) -> Result { + if self.feat.remove_html { + return self.to_raw_block(node, false); + } + // Trim the last `#` in the prepend code. (#context) + self.prepend_code = self.prepend_code.trim_end_matches('#').into(); + self.render( + &SyntaxNode::leaf(node.kind(), self.prepend_code.clone()), + node, + false, + ) + } + + fn include(&self, node: &SyntaxNode) -> Result { + let include: ast::ModuleInclude = node.cast().unwrap(); + + let path = include.source(); + let src = + tinymist_analysis::syntax::find_source_by_expr(self.world.as_ref(), self.current, path) + .ok_or_else(|| format!("failed to find source on path {path:?}"))?; + + self.clone().sub_file(src).map(Value::Content) + } + + fn sub_file(mut self, src: Source) -> Result { + self.current = src.id(); + self.convert(src.root()) + } +} + +struct WrapCode<'a>(&'a str, bool); + +impl fmt::Display for WrapCode<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let is_markup = self.1; + if is_markup { + f.write_str("#[")?; + } else { + f.write_str("#{")?; + } + f.write_str(self.0)?; + if is_markup { + f.write_str("]") + } else { + f.write_str("}") + } + } +} diff --git a/crates/typlite/src/writer/markdown.rs b/crates/typlite/src/writer/markdown.rs new file mode 100644 index 00000000..9d89d663 --- /dev/null +++ b/crates/typlite/src/writer/markdown.rs @@ -0,0 +1,31 @@ +//! Markdown writer implementation + +use cmark_writer::ast::Node; +use cmark_writer::writer::CommonMarkWriter; +use ecow::EcoString; + +use crate::common::FormatWriter; +use crate::Result; + +/// Markdown writer implementation +#[derive(Default)] +pub struct MarkdownWriter {} + +impl MarkdownWriter { + pub fn new() -> Self { + Self {} + } +} + +impl FormatWriter for MarkdownWriter { + fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()> { + let mut writer = CommonMarkWriter::new(); + writer.write(document).expect("Failed to write document"); + output.push_str(&writer.into_string()); + Ok(()) + } + + fn write_vec(&mut self, _document: &Node) -> Result> { + Err("Markdown writer does not support writing to Vec".into()) + } +} diff --git a/crates/typlite/src/writer/mod.rs b/crates/typlite/src/writer/mod.rs new file mode 100644 index 00000000..6f4d9edd --- /dev/null +++ b/crates/typlite/src/writer/mod.rs @@ -0,0 +1,25 @@ +//! Writer implementations for different output formats + +pub mod markdown; + +pub use markdown::MarkdownWriter; + +use crate::common::{Format, FormatWriter}; + +/// Create a writer instance based on the specified format +pub fn create_writer(format: Format) -> Box { + match format { + Format::Md => Box::new(markdown::MarkdownWriter::new()), + Format::LaTeX | Format::Docx => { + panic!("LaTeX and Docx writers are not implemented yet") + } + } +} + +pub struct WriterFactory; + +impl WriterFactory { + pub fn create(format: Format) -> Box { + create_writer(format) + } +}