From c0d5f0c800dadade17b2ca9686b0983fed41c8cc Mon Sep 17 00:00:00 2001 From: Hong Jiarong Date: Mon, 16 Jun 2025 06:20:09 +0800 Subject: [PATCH] refactor(typlite): use EcoString instead (#1815) * feat: update cmark-writer to version 0.7.6 and refactor string handling to use EcoString * dev: make use of eco string --------- Co-authored-by: Myriad-Dreamin --- Cargo.lock | 11 ++-- Cargo.toml | 2 +- crates/typlite/src/common.rs | 36 +++++----- crates/typlite/src/parser/core.rs | 16 ++--- crates/typlite/src/parser/inline.rs | 7 +- crates/typlite/src/parser/list.rs | 7 +- crates/typlite/src/parser/media.rs | 84 ++++++++++++------------ crates/typlite/src/parser/table.rs | 3 +- crates/typlite/src/writer/docx/writer.rs | 4 +- crates/typlite/src/writer/latex.rs | 4 +- 10 files changed, 89 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 658777e55..3d4eb7373 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -581,11 +581,12 @@ dependencies = [ [[package]] name = "cmark-writer" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a132e7fc9fddc446c3930910894eab669241332af90981e53eb1b782bd70b77a" +checksum = "7f10f0b392cc21023a462ef0972118edbf4616f5eb8546f3434c53353ba12f9a" dependencies = [ "cmark-writer-macros", + "ecow", "env_logger", "html-escape", "log", @@ -593,9 +594,9 @@ dependencies = [ [[package]] name = "cmark-writer-macros" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d45366269a70c39928d4fd7d95045250757be31ac93b5e85e3a8469b2690d93a" +checksum = "be7dbe651b0dee1bfd8a1a93efe76a43d6bbdee978b802102a9c14d070e8221c" dependencies = [ "proc-macro2", "quote", @@ -5593,7 +5594,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d1127929b..a11dc60c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,7 +97,7 @@ rpds = "1" # Data/Text Format and Processing biblatex = "0.10" -cmark-writer = { version = "0.7.5", features = ["gfm"] } +cmark-writer = { version = "0.7.6", features = ["gfm"] } docx-rs = { version = "0.4.18-rc19", git = "https://github.com/Myriad-Dreamin/docx-rs", default-features = false, rev = "db49a729f68dbdb9e8e91857fbb1c3d414209871" } hayagriva = "0.8" hex = "0.4.3" diff --git a/crates/typlite/src/common.rs b/crates/typlite/src/common.rs index 94215c567..a5fc73a96 100644 --- a/crates/typlite/src/common.rs +++ b/crates/typlite/src/common.rs @@ -10,6 +10,7 @@ use cmark_writer::HtmlWriter; use cmark_writer::HtmlWriterOptions; use cmark_writer::WriteResult; use cmark_writer::WriterOptions; +use ecow::eco_format; use ecow::EcoString; use std::path::PathBuf; @@ -56,10 +57,10 @@ impl FigureNode { fn write_html_custom(&self, writer: &mut HtmlWriter) -> HtmlWriteResult<()> { let body = self.body.clone(); let node = Node::HtmlElement(HtmlElement { - tag: "figure".to_string(), + tag: EcoString::inline("figure"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "figure".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("figure"), }], children: vec![*body], self_closing: false, @@ -76,7 +77,7 @@ pub struct ExternalFrameNode { /// The path to the external file containing the frame pub file_path: PathBuf, /// Alternative text for the frame - pub alt_text: String, + pub alt_text: EcoString, /// Original SVG data (needed for DOCX that still embeds images) pub svg: String, } @@ -94,14 +95,14 @@ impl ExternalFrameNode { fn write_html_custom(&self, writer: &mut HtmlWriter) -> HtmlWriteResult<()> { let node = Node::HtmlElement(HtmlElement { - tag: "img".to_string(), + tag: EcoString::inline("img"), attributes: vec![ HtmlAttribute { - name: "src".to_string(), - value: self.file_path.display().to_string(), + name: EcoString::inline("src"), + value: self.file_path.display().to_string().into(), }, HtmlAttribute { - name: "alt".to_string(), + name: EcoString::inline("alt"), value: self.alt_text.clone(), }, ], @@ -134,7 +135,7 @@ impl HighlightNode { fn write_html_custom(&self, writer: &mut HtmlWriter) -> HtmlWriteResult<()> { let node = Node::HtmlElement(HtmlElement { - tag: "mark".to_string(), + tag: EcoString::inline("mark"), attributes: vec![], children: self.content.clone(), self_closing: false, @@ -156,10 +157,10 @@ impl CenterNode { pub fn new(children: Vec) -> Self { CenterNode { node: Node::HtmlElement(cmark_writer::ast::HtmlElement { - tag: "p".to_string(), + tag: EcoString::inline("p"), attributes: vec![cmark_writer::ast::HtmlAttribute { - name: "align".to_string(), - value: "center".to_string(), + name: EcoString::inline("align"), + value: EcoString::inline("center"), }], children, self_closing: false, @@ -225,10 +226,11 @@ pub struct AlertNode { impl AlertNode { fn write_custom(&self, writer: &mut CommonMarkWriter) -> WriteResult<()> { let quote = Node::BlockQuote(vec![ - Node::Paragraph(vec![Node::Text( - "[!".to_string() + &self.class.clone().to_string().to_ascii_uppercase() + "]", - )]), - Node::Paragraph(vec![Node::Text("".to_string())]), + Node::Paragraph(vec![Node::Text(eco_format!( + "[!{}]", + self.class.to_ascii_uppercase() + ))]), + Node::Paragraph(vec![Node::Text("".into())]), ]); let mut tmp_writer = CommonMarkWriter::with_options(WriterOptions { escape_special_chars: false, @@ -239,7 +241,7 @@ impl AlertNode { let quote = Node::BlockQuote(self.content.clone()); let mut tmp_writer = CommonMarkWriter::with_options(writer.options.clone()); tmp_writer.write("e)?; - content += &tmp_writer.into_string(); + content += tmp_writer.into_string(); writer.write_str(&content)?; Ok(()) } diff --git a/crates/typlite/src/parser/core.rs b/crates/typlite/src/parser/core.rs index 8d185ba3e..fe861d929 100644 --- a/crates/typlite/src/parser/core.rs +++ b/crates/typlite/src/parser/core.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use cmark_writer::ast::{CustomNode, HtmlAttribute, HtmlElement as CmarkHtmlElement, Node}; use cmark_writer::{CommonMarkWriter, WriteResult}; +use ecow::EcoString; use tinymist_project::LspWorld; use typst::html::{tag, HtmlElement, HtmlNode}; @@ -98,9 +99,9 @@ impl HtmlToAstParser { if attrs.block { self.flush_inline_buffer(); self.blocks - .push(Node::code_block(Some(attrs.lang.into()), attrs.text.into())); + .push(Node::code_block(Some(attrs.lang), attrs.text)); } else { - self.inline_buffer.push(Node::InlineCode(attrs.text.into())); + self.inline_buffer.push(Node::InlineCode(attrs.text)); } Ok(()) } @@ -201,8 +202,8 @@ impl HtmlToAstParser { .0 .iter() .map(|(name, value)| HtmlAttribute { - name: name.to_string(), - value: value.to_string(), + name: name.resolve().to_string().into(), + value: value.clone(), }) .collect(); @@ -210,7 +211,7 @@ impl HtmlToAstParser { self.convert_children_into(&mut children, element)?; Ok(Node::HtmlElement(CmarkHtmlElement { - tag: element.tag.resolve().to_string(), + tag: element.tag.resolve().to_string().into(), attributes, children, self_closing: element.children.is_empty(), @@ -235,8 +236,7 @@ impl HtmlToAstParser { for child in &element.children { match child { HtmlNode::Text(text, _) => { - self.inline_buffer - .push(Node::Text(text.as_str().to_string())); + self.inline_buffer.push(Node::Text(text.clone())); } HtmlNode::Element(element) => { self.convert_element(element)?; @@ -265,7 +265,7 @@ impl HtmlToAstParser { } #[derive(Debug, Clone)] -pub(crate) struct Comment(pub String); +pub(crate) struct Comment(pub EcoString); impl CustomNode for Comment { fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/typlite/src/parser/inline.rs b/crates/typlite/src/parser/inline.rs index c86e06ff4..62446a7d7 100644 --- a/crates/typlite/src/parser/inline.rs +++ b/crates/typlite/src/parser/inline.rs @@ -49,7 +49,7 @@ impl HtmlToAstParser { let mut content = Vec::new(); self.convert_children_into(&mut content, element)?; self.inline_buffer.push(Node::Link { - url: attrs.dest.into(), + url: attrs.dest, title: None, content, }); @@ -59,11 +59,10 @@ impl HtmlToAstParser { /// Convert image element pub fn convert_image(&mut self, element: &HtmlElement) -> Result<()> { let attrs = ImageAttr::parse(&element.attrs)?; - let src = attrs.src.as_str(); self.inline_buffer.push(Node::Image { - url: src.to_string(), + url: attrs.src, title: None, - alt: vec![Node::Text(attrs.alt.into())], + alt: vec![Node::Text(attrs.alt)], }); Ok(()) } diff --git a/crates/typlite/src/parser/list.rs b/crates/typlite/src/parser/list.rs index 262370e16..71d016713 100644 --- a/crates/typlite/src/parser/list.rs +++ b/crates/typlite/src/parser/list.rs @@ -1,6 +1,7 @@ //! HTML list parsing module, handling conversion of ordered and unordered lists use cmark_writer::ast::{ListItem, Node}; +use ecow::eco_format; use typst::html::{tag, HtmlElement, HtmlNode}; use crate::attributes::{ListItemAttr, TypliteAttrsParser}; @@ -32,7 +33,7 @@ impl ListParser { let mut li_buffer = Vec::new(); if parser.feat.annotate_elem { - li_buffer.push(Node::Custom(Box::new(super::core::Comment(format!( + li_buffer.push(Node::Custom(Box::new(super::core::Comment(eco_format!( "typlite:begin:list-item {}", parser.list_level - 1 ))))); @@ -41,7 +42,7 @@ impl ListParser { for li_child in &li.children { match li_child { HtmlNode::Text(text, _) => { - li_buffer.push(Node::Text(text.as_str().to_string())); + li_buffer.push(Node::Text(text.clone())); } HtmlNode::Element(child_elem) => { let element_content = @@ -56,7 +57,7 @@ impl ListParser { } if parser.feat.annotate_elem { - li_buffer.push(Node::Custom(Box::new(super::core::Comment(format!( + li_buffer.push(Node::Custom(Box::new(super::core::Comment(eco_format!( "typlite:end:list-item {}", parser.list_level - 1 ))))); diff --git a/crates/typlite/src/parser/media.rs b/crates/typlite/src/parser/media.rs index daa9b8de5..e713a441e 100644 --- a/crates/typlite/src/parser/media.rs +++ b/crates/typlite/src/parser/media.rs @@ -6,7 +6,7 @@ use std::sync::{Arc, LazyLock}; use base64::Engine; use cmark_writer::ast::{HtmlAttribute, HtmlElement as CmarkHtmlElement, Node}; -use ecow::eco_format; +use ecow::{eco_format, EcoString}; use tinymist_project::{base::ShadowApi, EntryReader, TaskInputs, MEMORY_MAIN_ENTRY}; use typst::{ foundations::{Bytes, Dict, IntoValue}, @@ -47,12 +47,12 @@ impl HtmlToAstParser { if element.children.len() != 1 { // Construct error node return Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!( + children: vec![Node::Text(eco_format!( "source contains not only one child: {}, whose attrs: {:?}", element.children.len(), element.attrs @@ -64,12 +64,12 @@ impl HtmlToAstParser { let Some(HtmlNode::Frame(frame)) = element.children.first() else { // todo: utils to remove duplicated error construction return Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!( + children: vec![Node::Text(eco_format!( "source contains not a frame, but: {:?}", element.children ))], @@ -83,12 +83,12 @@ impl HtmlToAstParser { Err(e) => { // Construct error node return Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!("Error creating source URL: {e}"))], + children: vec![Node::Text(eco_format!("Error creating source URL: {e}"))], self_closing: false, }); } @@ -103,17 +103,15 @@ impl HtmlToAstParser { }); Node::HtmlElement(CmarkHtmlElement { - tag: "source".to_string(), + tag: EcoString::inline("source"), attributes: vec![ HtmlAttribute { - name: "media".to_string(), - value: media - .map(|m| m.to_string()) - .unwrap_or_else(|| "all".to_string()), + name: EcoString::inline("media"), + value: media.unwrap_or_else(|| "all".into()), }, HtmlAttribute { - name: "srcset".to_string(), - value: frame_url.to_string(), + name: EcoString::inline("srcset"), + value: frame_url.to_string().into(), }, ], children: vec![], @@ -125,7 +123,7 @@ impl HtmlToAstParser { pub fn convert_frame(&mut self, frame: &Frame) -> Node { if self.feat.remove_html { // todo: make error silent is not good. - return Node::Text(String::new()); + return Node::Text(EcoString::new()); } let svg = typst_svg::svg_frame(frame); @@ -139,7 +137,7 @@ impl HtmlToAstParser { Ok(url @ AssetUrl::Embedded(..)) => Self::create_embedded_frame(&url), Ok(AssetUrl::External(file_path)) => Node::Custom(Box::new(ExternalFrameNode { file_path, - alt_text: "typst-frame".to_string(), + alt_text: EcoString::inline("typst-frame"), svg, })), Err(e) => { @@ -149,12 +147,12 @@ impl HtmlToAstParser { } else { // Construct error node Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!("Error creating frame URL: {}", e))], + children: vec![Node::Text(eco_format!("Error creating frame URL: {e}"))], self_closing: false, }) } @@ -165,15 +163,15 @@ impl HtmlToAstParser { /// Create embedded frame node fn create_embedded_frame(url: &AssetUrl) -> Node { Node::HtmlElement(CmarkHtmlElement { - tag: "img".to_string(), + tag: EcoString::inline("img"), attributes: vec![ HtmlAttribute { - name: "alt".to_string(), - value: "typst-block".to_string(), + name: EcoString::inline("alt"), + value: EcoString::inline("typst-block"), }, HtmlAttribute { - name: "src".to_string(), - value: url.to_string(), + name: EcoString::inline("src"), + value: url.to_string().into(), }, ], children: vec![], @@ -213,22 +211,24 @@ impl HtmlToAstParser { if self.feat.remove_html { eprintln!("Removing idoc element due to remove_html feature"); // todo: make error silent is not good. - return Node::Text(String::new()); + return Node::Text(EcoString::new()); } let attrs = match IdocAttr::parse(&element.attrs) { Ok(attrs) => attrs, Err(e) => { if self.feat.soft_error { - return Node::Text(format!("Error parsing idoc attributes: {e}")); + return Node::Text(eco_format!("Error parsing idoc attributes: {e}")); } else { // Construct error node return Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!("Error parsing idoc attributes: {e}"))], + children: vec![Node::Text(eco_format!( + "Error parsing idoc attributes: {e}" + ))], self_closing: false, }); } @@ -275,16 +275,16 @@ impl HtmlToAstParser { Ok(doc) => doc, Err(e) => { if self.feat.soft_error { - return Node::Text(format!("Error compiling idoc: {e:?}")); + return Node::Text(eco_format!("Error compiling idoc: {e:?}")); } else { // Construct error node return Node::HtmlElement(CmarkHtmlElement { - tag: "div".to_string(), + tag: EcoString::inline("div"), attributes: vec![HtmlAttribute { - name: "class".to_string(), - value: "error".to_string(), + name: EcoString::inline("class"), + value: EcoString::inline("error"), }], - children: vec![Node::Text(format!("Error compiling idoc: {e:?}"))], + children: vec![Node::Text(eco_format!("Error compiling idoc: {e:?}"))], self_closing: false, }); } diff --git a/crates/typlite/src/parser/table.rs b/crates/typlite/src/parser/table.rs index 7f1df1cb0..2144b5ef9 100644 --- a/crates/typlite/src/parser/table.rs +++ b/crates/typlite/src/parser/table.rs @@ -2,6 +2,7 @@ use cmark_writer::ast::Node; use cmark_writer::gfm::TableAlignment; +use ecow::EcoString; use typst::html::{tag, HtmlElement, HtmlNode}; use typst::utils::PicoStr; @@ -181,7 +182,7 @@ impl TableParser { /// Merge cell content nodes into a single node fn merge_cell_content(content: Vec) -> Node { match content.len() { - 0 => Node::Text("".to_string()), + 0 => Node::Text(EcoString::new()), 1 => content.into_iter().next().unwrap(), _ => Node::Custom(Box::new(InlineNode { content })), } diff --git a/crates/typlite/src/writer/docx/writer.rs b/crates/typlite/src/writer/docx/writer.rs index 28eb6d22c..7375c27a7 100644 --- a/crates/typlite/src/writer/docx/writer.rs +++ b/crates/typlite/src/writer/docx/writer.rs @@ -81,7 +81,7 @@ impl DocxWriter { } = node { // Process the image - if let Ok(img_data) = fs::read(url) { + if let Ok(img_data) = fs::read(url.as_str()) { let alt_text = figure_node.caption.clone(); // Add the image with caption docx = self.image_processor.process_image_data( @@ -191,7 +191,7 @@ impl DocxWriter { title: _, alt: _, } => { - if let Ok(img_data) = fs::read(url) { + if let Ok(img_data) = fs::read(url.as_str()) { run = self.image_processor.process_inline_image(run, &img_data)?; } else { run = run.add_text(format!("[Image not found: {}]", url)); diff --git a/crates/typlite/src/writer/latex.rs b/crates/typlite/src/writer/latex.rs index db1b25fca..646ded63d 100644 --- a/crates/typlite/src/writer/latex.rs +++ b/crates/typlite/src/writer/latex.rs @@ -248,7 +248,7 @@ impl LaTeXWriter { } = node { // Path to the image file - let path = unix_slash(Path::new(url)); + let path = unix_slash(Path::new(url.as_str())); // Write includegraphics command output.push_str("\\includegraphics[width=0.8\\textwidth]{"); @@ -352,7 +352,7 @@ impl LaTeXWriter { "".into() }; - let path = unix_slash(Path::new(url)); + let path = unix_slash(Path::new(&url.as_str())); output.push_str("\\begin{figure}\n"); output.push_str("\\centering\n");