mirror of
https://github.com/Myriad-Dreamin/tinymist.git
synced 2025-08-04 18:28:02 +00:00
feat: typlite supports docx export (#1727)
* feat: docx impl Revert "build: remove other cargo deps" This reverts commit 059b480b1f635d1e8f5be7c191075efcf959b40b. feat(typlite): Docx export and export markdown in cmark-writer (#1698) * feat: docx export support * refactor: simplify DocxConverter structure and improve content handling * tests: add binary insta for docx * feat: add MathBlock style and improve frame rendering in DocxConverter * fix: enhance paragraph creation(silly method) * fix: enhance math equation rendering * use md5 instead of docx binary * feat: enhance list numbering and paragraph handling in DocxConverter * Refactor DOCX converter to improve list handling and document structure - Introduced separate methods for creating ordered and unordered list numbering. - Enhanced list management by tracking next numbering IDs. - Consolidated paragraph and run management within the DocxConverter. - Improved image processing with better error handling and placeholder support. - Streamlined the handling of various HTML elements, including headings, lists, and images. - Added functionality for processing captions and preformatted blocks. - Updated methods for processing inline styles and links. * feat: update cmark-writer to version 0.2.0 * feat: refactor code block handling in DOCX converter for improved readability * refactor: refactor DOCX converter to enhance document structure * refactor docx to separated files * chore: update cmark-writer version to 0.3.0 * fix: ol custom value * feat: table and grid processing * use cmark-writer's ast node for consistency * fix: update snapshot hashes for document generation tests * fix: add preamble * update snapshot hashes * refactor DOCX conversion: Split writer functionality into separate module, enhance image processing, and clean up utility functions * update comments in LaTeX and Markdown converters for clarity and consistency * fmt * delete utils * feat: support figure node by custom node in cmark-writer * fix * fix: frame * feat: enhance table conversion logic in MarkdownConverter * refactor: simplify FigureNode implementation by removing CustomNode trait * chore: update cmark-writer to version 0.5.0 * fix: update figure and raw inline snapshots for consistency * fix: update snapshot hashes and correct caption reference in markdown.typ * refactor proj structure * feat: update CompileArgs to support multiple output files and remove debug option * docs: update README to clarify usage of multiple output formats and comment out feature section * remove DocxConverter module * fix: update snapshots for figure caption, list, outline, and docx generation * update tests Co-Authored-By: Hong Jiarong <me@jrhim.com> * test: docx snapshots * fix: use old resvg * feat: make docx opt-in * fix: image process on our hand * dev: remove support to rarely used image formats * feat: use new base64 * test: update snapshot * fix: dim calc * fix: dim calc 2 * test: update snapshot --------- Co-authored-by: Hong Jiarong <me@jrhim.com>
This commit is contained in:
parent
3ba3211d1a
commit
6b7ca47f23
48 changed files with 1450 additions and 6 deletions
34
Cargo.lock
generated
34
Cargo.lock
generated
|
@ -1034,6 +1034,19 @@ dependencies = [
|
|||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docx-rs"
|
||||
version = "0.4.18-rc19"
|
||||
source = "git+https://github.com/Myriad-Dreamin/docx-rs?rev=db49a729f68dbdb9e8e91857fbb1c3d414209871#db49a729f68dbdb9e8e91857fbb1c3d414209871"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 1.0.69",
|
||||
"xml-rs",
|
||||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "downcast-rs"
|
||||
version = "1.2.1"
|
||||
|
@ -4765,9 +4778,12 @@ dependencies = [
|
|||
"clap",
|
||||
"cmark-writer",
|
||||
"comemo",
|
||||
"docx-rs",
|
||||
"ecow",
|
||||
"image",
|
||||
"insta",
|
||||
"regex",
|
||||
"resvg",
|
||||
"tinymist-analysis",
|
||||
"tinymist-derive",
|
||||
"tinymist-project",
|
||||
|
@ -5907,6 +5923,12 @@ dependencies = [
|
|||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xml-rs"
|
||||
version = "0.8.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda"
|
||||
|
||||
[[package]]
|
||||
name = "xmlparser"
|
||||
version = "0.13.6"
|
||||
|
@ -6071,6 +6093,18 @@ dependencies = [
|
|||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"flate2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zune-core"
|
||||
version = "0.4.12"
|
||||
|
|
|
@ -33,7 +33,16 @@ typst.workspace = true
|
|||
typst-svg.workspace = true
|
||||
typst-syntax.workspace = true
|
||||
typst-html.workspace = true
|
||||
regex.workspace = true
|
||||
cmark-writer = { version = "0.6.3", features = ["gfm"] }
|
||||
docx-rs = { git = "https://github.com/Myriad-Dreamin/docx-rs", default-features = false, rev = "db49a729f68dbdb9e8e91857fbb1c3d414209871", optional = true }
|
||||
# typst can only support these formats.
|
||||
image = { version = "0.25.6", default-features = false, features = [
|
||||
"png",
|
||||
"jpeg",
|
||||
"gif",
|
||||
], optional = true }
|
||||
resvg = { version = "0.43.0", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
insta.workspace = true
|
||||
|
@ -41,10 +50,11 @@ regex.workspace = true
|
|||
tinymist-tests.workspace = true
|
||||
|
||||
[features]
|
||||
default = ["cli", "embed-fonts", "no-content-hint"]
|
||||
default = ["cli", "embed-fonts", "no-content-hint", "docx"]
|
||||
clap = ["dep:clap"]
|
||||
cli = ["clap", "clap/wrap_help"]
|
||||
no-content-hint = ["tinymist-project/no-content-hint"]
|
||||
docx = ["docx-rs", "image", "resvg"]
|
||||
|
||||
# Embeds Typst's default fonts for
|
||||
# - text (Linux Libertine),
|
||||
|
|
|
@ -19,6 +19,7 @@ pub enum ListState {
|
|||
pub enum Format {
|
||||
Md,
|
||||
LaTeX,
|
||||
#[cfg(feature = "docx")]
|
||||
Docx,
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/base.typ
|
||||
---
|
||||
siphash128_13:f242a739ddf7cdce8041455cd09bf221
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/enum.typ
|
||||
---
|
||||
siphash128_13:120c2e9245d767d648fd52a8564c9efc
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/enum2.typ
|
||||
---
|
||||
siphash128_13:120c2e9245d767d648fd52a8564c9efc
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_caption.typ
|
||||
---
|
||||
siphash128_13:17d544d88231b74b1119c35f627026b
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_image.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_image_alt.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/image.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/image_alt.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link.typ
|
||||
---
|
||||
siphash128_13:35e614ded7c81c7fb6781d77872add56
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link2.typ
|
||||
---
|
||||
siphash128_13:2374bfc8248e276ed1549f5d6a8b4a40
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link3.typ
|
||||
---
|
||||
siphash128_13:5d5f436195b9b0b0f206881bc4d810f8
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/list.typ
|
||||
---
|
||||
siphash128_13:dd68d2d40ddf137ad77719e71c56a19e
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_block.typ
|
||||
---
|
||||
siphash128_13:ca4f0e6c5b2afee90d9736cb2d3bd6ba
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_block2.typ
|
||||
---
|
||||
siphash128_13:1c9f3489f7742ef572998ff2b4fd5abd
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_inline.typ
|
||||
---
|
||||
siphash128_13:2ac3d241b41c4ee23a122b73e43c8063
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/outline.typ
|
||||
---
|
||||
siphash128_13:549cf83e9b77d8ae061c95ceb4f93ef6
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/raw_inline.typ
|
||||
---
|
||||
siphash128_13:fe468826fde99ac8a0e77767d4045199
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/table.typ
|
||||
---
|
||||
siphash128_13:ce1b6f668016a12edf304ab7f38aea42
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/base.typ
|
||||
---
|
||||
siphash128_13:f242a739ddf7cdce8041455cd09bf221
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/enum.typ
|
||||
---
|
||||
siphash128_13:120c2e9245d767d648fd52a8564c9efc
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/enum2.typ
|
||||
---
|
||||
siphash128_13:120c2e9245d767d648fd52a8564c9efc
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_caption.typ
|
||||
---
|
||||
siphash128_13:17d544d88231b74b1119c35f627026b
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_image.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/figure_image_alt.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/image.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/image_alt.typ
|
||||
---
|
||||
siphash128_13:89ee713812f00bde9ac174f72c81760
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link.typ
|
||||
---
|
||||
siphash128_13:35e614ded7c81c7fb6781d77872add56
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link2.typ
|
||||
---
|
||||
siphash128_13:2374bfc8248e276ed1549f5d6a8b4a40
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/link3.typ
|
||||
---
|
||||
siphash128_13:5d5f436195b9b0b0f206881bc4d810f8
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/list.typ
|
||||
---
|
||||
siphash128_13:dd68d2d40ddf137ad77719e71c56a19e
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_block.typ
|
||||
---
|
||||
siphash128_13:ca4f0e6c5b2afee90d9736cb2d3bd6ba
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_block2.typ
|
||||
---
|
||||
siphash128_13:1c9f3489f7742ef572998ff2b4fd5abd
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/math_inline.typ
|
||||
---
|
||||
siphash128_13:2ac3d241b41c4ee23a122b73e43c8063
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/outline.typ
|
||||
---
|
||||
siphash128_13:549cf83e9b77d8ae061c95ceb4f93ef6
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/raw_inline.typ
|
||||
---
|
||||
siphash128_13:fe468826fde99ac8a0e77767d4045199
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/typlite/src/tests.rs
|
||||
expression: hash
|
||||
input_file: crates/typlite/src/fixtures/integration/table.typ
|
||||
---
|
||||
siphash128_13:ce1b6f668016a12edf304ab7f38aea42
|
|
@ -97,6 +97,15 @@ impl MarkdownDocument {
|
|||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Convert the content to a DOCX document
|
||||
#[cfg(feature = "docx")]
|
||||
pub fn to_docx(&self) -> Result<Vec<u8>> {
|
||||
let ast = self.parse()?;
|
||||
|
||||
let mut writer = WriterFactory::create(Format::Docx);
|
||||
writer.write_vec(&ast)
|
||||
}
|
||||
}
|
||||
|
||||
/// A color theme for rendering the content. The valid values can be checked in [color-scheme](https://developer.mozilla.org/en-US/docs/Web/CSS/color-scheme).
|
||||
|
@ -171,10 +180,20 @@ impl Typlite {
|
|||
match self.format {
|
||||
Format::Md => self.convert_doc()?.to_md_string(),
|
||||
Format::LaTeX => self.convert_doc()?.to_tex_string(true),
|
||||
_ => Err("format is not supported".into()),
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => Err("docx format is not supported".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the content to a DOCX document
|
||||
#[cfg(feature = "docx")]
|
||||
pub fn to_docx(self) -> Result<Vec<u8>> {
|
||||
if self.format != Format::Docx {
|
||||
return Err("format is not DOCX".into());
|
||||
}
|
||||
self.convert_doc()?.to_docx()
|
||||
}
|
||||
|
||||
/// Convert the content to a markdown document.
|
||||
pub fn convert_doc(self) -> Result<MarkdownDocument> {
|
||||
let entry = self.world.entry_state();
|
||||
|
|
|
@ -85,6 +85,7 @@ fn main() -> typlite::Result<()> {
|
|||
Some(output) if output.extension() == Some(std::ffi::OsStr::new("tex")) => {
|
||||
Format::LaTeX
|
||||
}
|
||||
#[cfg(feature = "docx")]
|
||||
Some(output) if output.extension() == Some(std::ffi::OsStr::new("docx")) => {
|
||||
Format::Docx
|
||||
}
|
||||
|
@ -92,7 +93,30 @@ fn main() -> typlite::Result<()> {
|
|||
};
|
||||
|
||||
match format {
|
||||
Format::Docx => todo!(),
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => {
|
||||
let docx_data = match doc.to_docx() {
|
||||
Ok(data) => data,
|
||||
Err(err) => {
|
||||
eprintln!("Error generating DOCX for {}: {}", output_path, err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match output {
|
||||
None => {
|
||||
eprintln!("output file is required for DOCX format");
|
||||
continue;
|
||||
}
|
||||
Some(output) => {
|
||||
if let Err(err) = std::fs::write(&output, docx_data) {
|
||||
eprintln!("failed to write DOCX file {}: {}", output.display(), err);
|
||||
continue;
|
||||
}
|
||||
println!("Generated DOCX file: {}", output.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
Format::LaTeX => {
|
||||
let result = doc.to_tex_string(true);
|
||||
match (result, output) {
|
||||
|
|
|
@ -33,6 +33,42 @@ fn convert_docs() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "docx")]
|
||||
fn test_docx_generation() {
|
||||
snapshot_testing("integration", &|world, _path| {
|
||||
let converter = Typlite::new(Arc::new(world.clone()))
|
||||
.with_feature(TypliteFeat {
|
||||
..Default::default()
|
||||
})
|
||||
.with_format(Format::Docx);
|
||||
|
||||
let docx_data = match converter.to_docx() {
|
||||
Ok(data) => data,
|
||||
Err(err) => {
|
||||
panic!("Failed to generate DOCX: {}", err);
|
||||
}
|
||||
};
|
||||
|
||||
assert!(!docx_data.is_empty(), "DOCX data should not be empty");
|
||||
|
||||
assert_eq!(
|
||||
&docx_data[0..2],
|
||||
&[0x50, 0x4B],
|
||||
"DOCX data should start with PK signature"
|
||||
);
|
||||
|
||||
// insta::assert_binary_snapshot!("test_output.docx", docx_data);
|
||||
|
||||
let hash = format!(
|
||||
"siphash128_13:{:016x}",
|
||||
tinymist_std::hash::hash128(&docx_data)
|
||||
);
|
||||
insta::assert_snapshot!("docx_generation_hash", hash);
|
||||
// insta::assert_binary_snapshot!(".docx", docx_data);
|
||||
});
|
||||
}
|
||||
|
||||
enum ConvKind {
|
||||
Md { for_docs: bool },
|
||||
LaTeX,
|
||||
|
|
210
crates/typlite/src/writer/docx/image_processor.rs
Normal file
210
crates/typlite/src/writer/docx/image_processor.rs
Normal file
|
@ -0,0 +1,210 @@
|
|||
//! Image processing functionality for DOCX conversion
|
||||
|
||||
use base64::Engine;
|
||||
use docx_rs::*;
|
||||
use std::io::Cursor;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Image processor for DOCX documents
|
||||
pub struct DocxImageProcessor;
|
||||
|
||||
impl DocxImageProcessor {
|
||||
/// Create a new image processor
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Convert SVG data to PNG format
|
||||
pub fn convert_svg_to_png(&self, svg_data: &[u8]) -> Result<Vec<u8>> {
|
||||
// Check if data is valid SVG
|
||||
let svg_str = match std::str::from_utf8(svg_data) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return Err("Unable to parse input data as UTF-8 string".into()),
|
||||
};
|
||||
|
||||
let dpi = 300.0;
|
||||
let scale_factor = dpi / 96.0;
|
||||
|
||||
let opt = resvg::usvg::Options {
|
||||
dpi,
|
||||
..resvg::usvg::Options::default()
|
||||
};
|
||||
|
||||
// Parse SVG
|
||||
let rtree = match resvg::usvg::Tree::from_str(svg_str, &opt) {
|
||||
Ok(tree) => tree,
|
||||
Err(e) => return Err(format!("SVG parsing error: {:?}", e).into()),
|
||||
};
|
||||
|
||||
let size = rtree.size().to_int_size();
|
||||
let width = (size.width() as f32 * scale_factor) as u32;
|
||||
let height = (size.height() as f32 * scale_factor) as u32;
|
||||
|
||||
// Create pixel buffer
|
||||
let mut pixmap = match resvg::tiny_skia::Pixmap::new(width, height) {
|
||||
Some(pixmap) => pixmap,
|
||||
None => return Err("Unable to create pixel buffer".into()),
|
||||
};
|
||||
|
||||
// Render SVG to pixel buffer
|
||||
resvg::render(
|
||||
&rtree,
|
||||
resvg::tiny_skia::Transform::from_scale(scale_factor, scale_factor),
|
||||
&mut pixmap.as_mut(),
|
||||
);
|
||||
|
||||
// Encode as PNG
|
||||
pixmap
|
||||
.encode_png()
|
||||
.map_err(|e| format!("PNG encoding error: {:?}", e).into())
|
||||
}
|
||||
|
||||
/// Process image data and add to document
|
||||
pub fn process_image_data(
|
||||
&self,
|
||||
docx: Docx,
|
||||
data: &[u8],
|
||||
alt_text: Option<&str>,
|
||||
scale: Option<f32>,
|
||||
) -> Docx {
|
||||
// Add image format validation
|
||||
match image::guess_format(data) {
|
||||
Ok(..) => {
|
||||
// Process image data
|
||||
|
||||
// For other formats, try to convert to PNG
|
||||
let pic = match image::load_from_memory(data) {
|
||||
Ok(img) => {
|
||||
let (w, h) =
|
||||
Self::image_dim(::image::GenericImageView::dimensions(&img), scale);
|
||||
let mut buffer = Vec::new();
|
||||
if img
|
||||
.write_to(&mut Cursor::new(&mut buffer), image::ImageFormat::Png)
|
||||
.is_ok()
|
||||
{
|
||||
Pic::new_with_dimensions(buffer, w, h)
|
||||
} else {
|
||||
// If conversion fails, return original document (without image)
|
||||
let err_para = Paragraph::new().add_run(Run::new().add_text(
|
||||
"[Image processing error: Unable to convert to supported format]".to_string(),
|
||||
));
|
||||
return docx.add_paragraph(err_para);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// If unable to load image, return original document (without image)
|
||||
let err_para = Paragraph::new().add_run(Run::new().add_text(
|
||||
"[Image processing error: Unable to load image]".to_string(),
|
||||
));
|
||||
return docx.add_paragraph(err_para);
|
||||
}
|
||||
};
|
||||
|
||||
let img_para = Paragraph::new().add_run(Run::new().add_image(pic));
|
||||
let doc_with_img = docx.add_paragraph(img_para);
|
||||
|
||||
if let Some(alt) = alt_text {
|
||||
if !alt.is_empty() {
|
||||
let caption_para = Paragraph::new()
|
||||
.style("Caption")
|
||||
.add_run(Run::new().add_text(alt));
|
||||
doc_with_img.add_paragraph(caption_para)
|
||||
} else {
|
||||
doc_with_img
|
||||
}
|
||||
} else {
|
||||
doc_with_img
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// If unable to determine image format, return original document (without image)
|
||||
let err_para =
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text(
|
||||
"[Image processing error: Unknown image format]".to_string(),
|
||||
));
|
||||
docx.add_paragraph(err_para)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process inline image and add to Run
|
||||
pub fn process_inline_image(&self, mut run: Run, data: &[u8]) -> Result<Run> {
|
||||
match image::guess_format(data) {
|
||||
Ok(..) => {
|
||||
// Try to convert to PNG
|
||||
let pic = match image::load_from_memory(data) {
|
||||
Ok(img) => {
|
||||
let (w, h) = ::image::GenericImageView::dimensions(&img);
|
||||
let mut buffer = Vec::new();
|
||||
if img
|
||||
.write_to(&mut Cursor::new(&mut buffer), image::ImageFormat::Png)
|
||||
.is_ok()
|
||||
{
|
||||
Pic::new_with_dimensions(buffer, w, h)
|
||||
} else {
|
||||
run = run.add_text("[Image conversion error]");
|
||||
return Ok(run);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
run = run.add_text("[Image loading error]");
|
||||
return Ok(run);
|
||||
}
|
||||
};
|
||||
|
||||
run = run.add_image(pic);
|
||||
Ok(run)
|
||||
}
|
||||
Err(_) => {
|
||||
run = run.add_text("[Unknown image format]");
|
||||
Ok(run)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process data URL inline image
|
||||
pub fn process_data_url_image(&self, run: Run, src: &str, is_typst_block: bool) -> Result<Run> {
|
||||
if let Some(data_start) = src.find("base64,") {
|
||||
let base64_data = &src[data_start + 7..];
|
||||
if let Ok(img_data) = base64::engine::general_purpose::STANDARD.decode(base64_data) {
|
||||
// If it's a typst-block (SVG data), special handling is needed
|
||||
if is_typst_block {
|
||||
// Use resvg to convert SVG to PNG
|
||||
if let Ok(png_data) = self.convert_svg_to_png(&img_data) {
|
||||
let mut new_run = run;
|
||||
new_run = self.process_inline_image(new_run, &png_data)?;
|
||||
return Ok(new_run);
|
||||
} else {
|
||||
return Ok(run.add_text("[SVG conversion failed]"));
|
||||
}
|
||||
} else {
|
||||
// Normal image processing
|
||||
let mut new_run = run;
|
||||
new_run = self.process_inline_image(new_run, &img_data)?;
|
||||
return Ok(new_run);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(run.add_text("[Invalid data URL]"))
|
||||
}
|
||||
|
||||
/// Calculate image dimensions for DOCX
|
||||
pub fn image_dim((w, h): (u32, u32), scale_factor: Option<f32>) -> (u32, u32) {
|
||||
let actual_scale = scale_factor.unwrap_or(1.0);
|
||||
|
||||
let max_width = 5486400;
|
||||
let scaled_w = (w as f32 * actual_scale) as u32;
|
||||
let scaled_h = (h as f32 * actual_scale) as u32;
|
||||
|
||||
if scaled_w > max_width {
|
||||
let ratio = scaled_h as f32 / scaled_w as f32;
|
||||
let new_width = max_width;
|
||||
let new_height = (max_width as f32 * ratio) as u32;
|
||||
(new_width, new_height)
|
||||
} else {
|
||||
(scaled_w, scaled_h)
|
||||
}
|
||||
}
|
||||
}
|
14
crates/typlite/src/writer/docx/mod.rs
Normal file
14
crates/typlite/src/writer/docx/mod.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
//! DOCX converter implementation using docx-rs
|
||||
//!
|
||||
//! This module is organized into several main components:
|
||||
//! - Writer: Functionality for rendering intermediate DocxNode structure to DOCX format
|
||||
//! - Styles: Document style management
|
||||
//! - Numbering: List numbering management
|
||||
//! - Node structures: DocxNode and DocxInline representing document structure
|
||||
|
||||
mod image_processor;
|
||||
mod numbering;
|
||||
mod styles;
|
||||
mod writer;
|
||||
|
||||
pub use writer::DocxWriter;
|
126
crates/typlite/src/writer/docx/numbering.rs
Normal file
126
crates/typlite/src/writer/docx/numbering.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
//! List numbering management for DOCX conversion
|
||||
|
||||
use docx_rs::*;
|
||||
|
||||
/// List numbering management for DOCX
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DocxNumbering {
|
||||
initialized: bool,
|
||||
next_id: usize,
|
||||
}
|
||||
|
||||
impl DocxNumbering {
|
||||
/// Create a new numbering manager
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
initialized: false,
|
||||
next_id: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a list level with the specified parameters
|
||||
pub fn create_list_level(id: usize, format: &str, text: &str, is_bullet: bool) -> Level {
|
||||
let indent_size = 720 * (id + 1) as i32;
|
||||
let hanging_indent = if is_bullet { 360 } else { 420 };
|
||||
|
||||
Level::new(
|
||||
id,
|
||||
Start::new(1),
|
||||
NumberFormat::new(format),
|
||||
LevelText::new(text),
|
||||
LevelJc::new("left"),
|
||||
)
|
||||
.indent(
|
||||
Some(indent_size),
|
||||
Some(SpecialIndentType::Hanging(hanging_indent)),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
/// Initialize the numbering manager
|
||||
pub fn initialize_numbering(&mut self, docx: Docx) -> Docx {
|
||||
if self.initialized {
|
||||
return docx;
|
||||
}
|
||||
|
||||
self.initialized = true;
|
||||
docx
|
||||
}
|
||||
|
||||
/// Create a new ordered list numbering, including a new AbstractNumbering instance
|
||||
pub fn create_ordered_numbering(&mut self, docx: Docx) -> (Docx, usize) {
|
||||
let abstract_id = self.next_id;
|
||||
let numbering_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
let mut ordered_abstract = AbstractNumbering::new(abstract_id);
|
||||
|
||||
for i in 0..9 {
|
||||
let level_text = match i {
|
||||
0 => "%1.",
|
||||
1 => "%2.",
|
||||
2 => "%3.",
|
||||
3 => "%4.",
|
||||
4 => "%5.",
|
||||
5 => "%6.",
|
||||
_ => "%7.",
|
||||
};
|
||||
|
||||
let number_format = match i {
|
||||
0 => "decimal",
|
||||
1 => "lowerLetter",
|
||||
2 => "lowerRoman",
|
||||
3 => "upperRoman",
|
||||
4 => "decimal",
|
||||
5 => "lowerLetter",
|
||||
_ => "decimal",
|
||||
};
|
||||
|
||||
let mut ordered_level = Self::create_list_level(i, number_format, level_text, false);
|
||||
|
||||
if i > 0 {
|
||||
ordered_level = ordered_level.level_restart(0_u32);
|
||||
}
|
||||
|
||||
ordered_abstract = ordered_abstract.add_level(ordered_level);
|
||||
}
|
||||
|
||||
let docx = docx
|
||||
.add_abstract_numbering(ordered_abstract)
|
||||
.add_numbering(Numbering::new(numbering_id, abstract_id));
|
||||
|
||||
(docx, numbering_id)
|
||||
}
|
||||
|
||||
/// Create a new unordered list numbering, including a new AbstractNumbering instance
|
||||
pub fn create_unordered_numbering(&mut self, docx: Docx) -> (Docx, usize) {
|
||||
let abstract_id = self.next_id;
|
||||
let numbering_id = self.next_id;
|
||||
self.next_id += 1;
|
||||
|
||||
// Create AbstractNumbering for unordered list
|
||||
let mut unordered_abstract = AbstractNumbering::new(abstract_id);
|
||||
|
||||
// Add 9 levels of definition
|
||||
for i in 0..9 {
|
||||
let bullet_text = match i {
|
||||
0 => "•",
|
||||
1 => "○",
|
||||
2 => "▪",
|
||||
3 => "▫",
|
||||
4 => "◆",
|
||||
_ => "◇",
|
||||
};
|
||||
|
||||
let unordered_level = Self::create_list_level(i, "bullet", bullet_text, true);
|
||||
unordered_abstract = unordered_abstract.add_level(unordered_level);
|
||||
}
|
||||
|
||||
let docx = docx
|
||||
.add_abstract_numbering(unordered_abstract)
|
||||
.add_numbering(Numbering::new(numbering_id, abstract_id));
|
||||
|
||||
(docx, numbering_id)
|
||||
}
|
||||
}
|
107
crates/typlite/src/writer/docx/styles.rs
Normal file
107
crates/typlite/src/writer/docx/styles.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
//! Document style management for DOCX conversion
|
||||
|
||||
use docx_rs::*;
|
||||
|
||||
/// Document style management
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DocxStyles {
|
||||
initialized: bool,
|
||||
}
|
||||
|
||||
impl DocxStyles {
|
||||
/// Create a new style manager
|
||||
pub fn new() -> Self {
|
||||
Self { initialized: false }
|
||||
}
|
||||
|
||||
/// Create a heading style with the specified parameters
|
||||
fn create_heading_style(name: &str, display_name: &str, size: usize) -> Style {
|
||||
Style::new(name, StyleType::Paragraph)
|
||||
.name(display_name)
|
||||
.size(size)
|
||||
.bold()
|
||||
}
|
||||
|
||||
/// Initialize all document styles
|
||||
pub fn initialize_styles(&self, docx: Docx) -> Docx {
|
||||
if self.initialized {
|
||||
return docx;
|
||||
}
|
||||
|
||||
let heading1 = Self::create_heading_style("Heading1", "Heading 1", 32);
|
||||
let heading2 = Self::create_heading_style("Heading2", "Heading 2", 28);
|
||||
let heading3 = Self::create_heading_style("Heading3", "Heading 3", 26);
|
||||
let heading4 = Self::create_heading_style("Heading4", "Heading 4", 24);
|
||||
let heading5 = Self::create_heading_style("Heading5", "Heading 5", 22);
|
||||
let heading6 = Self::create_heading_style("Heading6", "Heading 6", 20);
|
||||
|
||||
let courier_fonts = RunFonts::new()
|
||||
.ascii("Courier New")
|
||||
.hi_ansi("Courier New")
|
||||
.east_asia("Courier New")
|
||||
.cs("Courier New");
|
||||
|
||||
let code_block = Style::new("CodeBlock", StyleType::Paragraph)
|
||||
.name("Code Block")
|
||||
.fonts(courier_fonts.clone())
|
||||
.size(18);
|
||||
|
||||
let code_inline = Style::new("CodeInline", StyleType::Character)
|
||||
.name("Code Inline")
|
||||
.fonts(courier_fonts)
|
||||
.size(18);
|
||||
|
||||
let math_block = Style::new("MathBlock", StyleType::Paragraph)
|
||||
.name("Math Block")
|
||||
.align(AlignmentType::Center);
|
||||
|
||||
let emphasis = Style::new("Emphasis", StyleType::Character)
|
||||
.name("Emphasis")
|
||||
.italic();
|
||||
|
||||
let strong = Style::new("Strong", StyleType::Character)
|
||||
.name("Strong")
|
||||
.bold();
|
||||
|
||||
let highlight = Style::new("Highlight", StyleType::Character)
|
||||
.name("Highlight")
|
||||
.highlight("yellow");
|
||||
|
||||
let hyperlink = Style::new("Hyperlink", StyleType::Character)
|
||||
.name("Hyperlink")
|
||||
.color("0000FF")
|
||||
.underline("single");
|
||||
|
||||
let blockquote = Style::new("Blockquote", StyleType::Paragraph)
|
||||
.name("Block Quote")
|
||||
.indent(Some(720), None, None, None)
|
||||
.italic();
|
||||
|
||||
let caption = Style::new("Caption", StyleType::Paragraph)
|
||||
.name("Caption")
|
||||
.italic()
|
||||
.size(16)
|
||||
.align(AlignmentType::Center);
|
||||
|
||||
let table = Style::new("Table", StyleType::Table)
|
||||
.name("Table")
|
||||
.table_align(TableAlignmentType::Center);
|
||||
|
||||
docx.add_style(heading1)
|
||||
.add_style(heading2)
|
||||
.add_style(heading3)
|
||||
.add_style(heading4)
|
||||
.add_style(heading5)
|
||||
.add_style(heading6)
|
||||
.add_style(code_block)
|
||||
.add_style(code_inline)
|
||||
.add_style(math_block)
|
||||
.add_style(emphasis)
|
||||
.add_style(strong)
|
||||
.add_style(highlight)
|
||||
.add_style(hyperlink)
|
||||
.add_style(blockquote)
|
||||
.add_style(caption)
|
||||
.add_style(table)
|
||||
}
|
||||
}
|
644
crates/typlite/src/writer/docx/writer.rs
Normal file
644
crates/typlite/src/writer/docx/writer.rs
Normal file
|
@ -0,0 +1,644 @@
|
|||
//! DOCX document writer implementation
|
||||
|
||||
use base64::Engine;
|
||||
use cmark_writer::ast::{ListItem, Node};
|
||||
use docx_rs::*;
|
||||
use ecow::EcoString;
|
||||
use std::fs;
|
||||
use std::io::Cursor;
|
||||
|
||||
use crate::common::{FigureNode, FormatWriter};
|
||||
use crate::Result;
|
||||
|
||||
use super::image_processor::DocxImageProcessor;
|
||||
use super::numbering::DocxNumbering;
|
||||
use super::styles::DocxStyles;
|
||||
|
||||
/// DOCX writer that generates DOCX directly from AST (without intermediate representation)
|
||||
pub struct DocxWriter {
|
||||
styles: DocxStyles,
|
||||
numbering: DocxNumbering,
|
||||
list_level: usize,
|
||||
list_numbering_count: usize,
|
||||
image_processor: DocxImageProcessor,
|
||||
}
|
||||
|
||||
impl Default for DocxWriter {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DocxWriter {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
styles: DocxStyles::new(),
|
||||
numbering: DocxNumbering::new(),
|
||||
list_level: 0,
|
||||
list_numbering_count: 0,
|
||||
image_processor: DocxImageProcessor::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Process image node
|
||||
fn process_image(&self, docx: Docx, url: &str, alt_nodes: &[Node]) -> Result<Docx> {
|
||||
// Build alt text
|
||||
let alt_text = if !alt_nodes.is_empty() {
|
||||
let mut text = String::new();
|
||||
for node in alt_nodes {
|
||||
if let Node::Text(content) = node {
|
||||
text.push_str(content);
|
||||
}
|
||||
}
|
||||
Some(text)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Try reading image file
|
||||
if let Ok(img_data) = fs::read(url) {
|
||||
Ok(self
|
||||
.image_processor
|
||||
.process_image_data(docx, &img_data, alt_text.as_deref(), None))
|
||||
} else {
|
||||
let placeholder = format!("[Image not found: {}]", url);
|
||||
let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
|
||||
Ok(docx.add_paragraph(para))
|
||||
}
|
||||
}
|
||||
|
||||
/// Process figure node (image with caption)
|
||||
fn process_figure(&mut self, mut docx: Docx, figure_node: &FigureNode) -> Result<Docx> {
|
||||
// First handle the figure body (typically an image)
|
||||
match &*figure_node.body {
|
||||
Node::Paragraph(content) => {
|
||||
for node in content {
|
||||
if let Node::Image {
|
||||
url,
|
||||
title: _,
|
||||
alt: _,
|
||||
} = node
|
||||
{
|
||||
// Process the image
|
||||
if let Ok(img_data) = fs::read(url) {
|
||||
let alt_text = figure_node.caption.clone();
|
||||
// Add the image with caption
|
||||
docx = self.image_processor.process_image_data(
|
||||
docx,
|
||||
&img_data,
|
||||
Some(&alt_text),
|
||||
None,
|
||||
);
|
||||
|
||||
// Add caption as a separate paragraph with Caption style
|
||||
if !figure_node.caption.is_empty() {
|
||||
let caption_text = format!("Figure: {}", figure_node.caption);
|
||||
let caption_para = Paragraph::new()
|
||||
.style("Caption")
|
||||
.add_run(Run::new().add_text(caption_text));
|
||||
docx = docx.add_paragraph(caption_para);
|
||||
}
|
||||
} else {
|
||||
// Image not found, show placeholder
|
||||
let placeholder = format!("[Image not found: {}]", url);
|
||||
let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
|
||||
docx = docx.add_paragraph(para);
|
||||
|
||||
// Still add caption
|
||||
if !figure_node.caption.is_empty() {
|
||||
let caption_para = Paragraph::new()
|
||||
.style("Caption")
|
||||
.add_run(Run::new().add_text(&figure_node.caption));
|
||||
docx = docx.add_paragraph(caption_para);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Handle non-image content
|
||||
let mut para = Paragraph::new();
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, node)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
docx = docx.add_paragraph(para);
|
||||
}
|
||||
|
||||
// Add caption as a separate paragraph
|
||||
if !figure_node.caption.is_empty() {
|
||||
let caption_para = Paragraph::new()
|
||||
.style("Caption")
|
||||
.add_run(Run::new().add_text(&figure_node.caption));
|
||||
docx = docx.add_paragraph(caption_para);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle other content types within figure
|
||||
_ => {
|
||||
// Process the content using standard node processing
|
||||
docx = self.process_node(docx, &figure_node.body)?;
|
||||
|
||||
// Add caption as a separate paragraph
|
||||
if !figure_node.caption.is_empty() {
|
||||
let caption_para = Paragraph::new()
|
||||
.style("Caption")
|
||||
.add_run(Run::new().add_text(&figure_node.caption));
|
||||
docx = docx.add_paragraph(caption_para);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Process inline element and add to Run
|
||||
fn process_inline_to_run(&self, mut run: Run, node: &Node) -> Result<Run> {
|
||||
match node {
|
||||
Node::Text(text) => {
|
||||
run = run.add_text(text);
|
||||
}
|
||||
Node::Strong(content) => {
|
||||
run = run.style("Strong");
|
||||
for child in content {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
}
|
||||
Node::Emphasis(content) => {
|
||||
run = run.style("Emphasis");
|
||||
for child in content {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
}
|
||||
Node::Strikethrough(content) => {
|
||||
run = run.strike();
|
||||
for child in content {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
}
|
||||
Node::Link {
|
||||
url: _,
|
||||
title: _,
|
||||
content,
|
||||
} => {
|
||||
// Hyperlinks need to be processed at paragraph level, only handle content here
|
||||
run = run.style("Hyperlink");
|
||||
for child in content {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
}
|
||||
Node::Image {
|
||||
url,
|
||||
title: _,
|
||||
alt: _,
|
||||
} => {
|
||||
if let Ok(img_data) = fs::read(url) {
|
||||
run = self.image_processor.process_inline_image(run, &img_data)?;
|
||||
} else {
|
||||
run = run.add_text(format!("[Image not found: {}]", url));
|
||||
}
|
||||
}
|
||||
Node::HtmlElement(element) => {
|
||||
// Handle special HTML elements
|
||||
if element.tag == "mark" {
|
||||
run = run.style("Highlight");
|
||||
for child in &element.children {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
} else if element.tag == "img" && element.self_closing {
|
||||
let is_typst_block = element
|
||||
.attributes
|
||||
.iter()
|
||||
.any(|a| a.name == "alt" && a.value == "typst-block");
|
||||
|
||||
let src = element
|
||||
.attributes
|
||||
.iter()
|
||||
.find(|a| a.name == "src")
|
||||
.map(|a| a.value.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
if src.starts_with("data:image/") {
|
||||
run = self.image_processor.process_data_url_image(
|
||||
run,
|
||||
src,
|
||||
is_typst_block,
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
// Standard element content processing
|
||||
for child in &element.children {
|
||||
run = self.process_inline_to_run(run, child)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::InlineCode(code) => {
|
||||
run = run.style("CodeInline").add_text(code);
|
||||
}
|
||||
Node::HardBreak => {
|
||||
run = run.add_break(BreakType::TextWrapping);
|
||||
}
|
||||
Node::SoftBreak => {
|
||||
run = run.add_text(" ");
|
||||
}
|
||||
// Other inline element types
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Ok(run)
|
||||
}
|
||||
|
||||
/// Process paragraph and add to document
|
||||
fn process_paragraph(
|
||||
&self,
|
||||
mut docx: Docx,
|
||||
content: &[Node],
|
||||
style: Option<&str>,
|
||||
) -> Result<Docx> {
|
||||
let mut para = Paragraph::new();
|
||||
|
||||
// Apply style
|
||||
if let Some(style_name) = style {
|
||||
para = para.style(style_name);
|
||||
}
|
||||
|
||||
// Extract all link nodes
|
||||
let mut links = Vec::new();
|
||||
for (i, node) in content.iter().enumerate() {
|
||||
if let Node::Link {
|
||||
url,
|
||||
title: _,
|
||||
content: _,
|
||||
} = node
|
||||
{
|
||||
links.push((i, url.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// If no links, process paragraph normally
|
||||
if links.is_empty() {
|
||||
// Process paragraph content
|
||||
for node in content {
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, node)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If links exist, we need to process in segments
|
||||
let mut last_idx = 0;
|
||||
for (idx, url) in links {
|
||||
// Process content before the link
|
||||
for item in content.iter().take(idx).skip(last_idx) {
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, item)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
}
|
||||
|
||||
// Process link
|
||||
if let Node::Link {
|
||||
url: _,
|
||||
title: _,
|
||||
content: link_content,
|
||||
} = &content[idx]
|
||||
{
|
||||
let mut hyperlink_run = Run::new().style("Hyperlink");
|
||||
for child in link_content {
|
||||
hyperlink_run = self.process_inline_to_run(hyperlink_run, child)?;
|
||||
}
|
||||
|
||||
// Create and add hyperlink
|
||||
if !hyperlink_run.children.is_empty() {
|
||||
let hyperlink =
|
||||
Hyperlink::new(&url, HyperlinkType::External).add_run(hyperlink_run);
|
||||
para = para.add_hyperlink(hyperlink);
|
||||
}
|
||||
}
|
||||
|
||||
last_idx = idx + 1;
|
||||
}
|
||||
|
||||
// Process content after the last link
|
||||
for item in content.iter().skip(last_idx) {
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, item)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only add when paragraph has content
|
||||
if !para.children.is_empty() {
|
||||
docx = docx.add_paragraph(para);
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Process node and add to document
|
||||
fn process_node(&mut self, mut docx: Docx, node: &Node) -> Result<Docx> {
|
||||
match node {
|
||||
Node::Document(blocks) => {
|
||||
for block in blocks {
|
||||
docx = self.process_node(docx, block)?;
|
||||
}
|
||||
}
|
||||
Node::Paragraph(content) => {
|
||||
docx = self.process_paragraph(docx, content, None)?;
|
||||
}
|
||||
Node::Heading {
|
||||
level,
|
||||
content,
|
||||
heading_type: _,
|
||||
} => {
|
||||
// Determine heading style name
|
||||
let style_name = match level {
|
||||
1 => "Heading1",
|
||||
2 => "Heading2",
|
||||
3 => "Heading3",
|
||||
4 => "Heading4",
|
||||
5 => "Heading5",
|
||||
_ => "Heading6",
|
||||
};
|
||||
|
||||
docx = self.process_paragraph(docx, content, Some(style_name))?;
|
||||
}
|
||||
Node::BlockQuote(content) => {
|
||||
for block in content {
|
||||
if let Node::Paragraph(inline) = block {
|
||||
docx = self.process_paragraph(docx, inline, Some("Blockquote"))?;
|
||||
} else {
|
||||
docx = self.process_node(docx, block)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::CodeBlock {
|
||||
language,
|
||||
content,
|
||||
block_type: _,
|
||||
} => {
|
||||
// Add language information
|
||||
if let Some(lang) = language {
|
||||
if !lang.is_empty() {
|
||||
let lang_para = Paragraph::new()
|
||||
.style("CodeBlock")
|
||||
.add_run(Run::new().add_text(lang));
|
||||
docx = docx.add_paragraph(lang_para);
|
||||
}
|
||||
}
|
||||
|
||||
// Process code line by line, preserving line breaks
|
||||
let lines: Vec<&str> = content.split('\n').collect();
|
||||
for line in lines {
|
||||
let code_para = Paragraph::new()
|
||||
.style("CodeBlock")
|
||||
.add_run(Run::new().add_text(line));
|
||||
docx = docx.add_paragraph(code_para);
|
||||
}
|
||||
}
|
||||
Node::OrderedList { start: _, items } => {
|
||||
docx = self.process_ordered_list(docx, items)?;
|
||||
}
|
||||
Node::UnorderedList(items) => {
|
||||
docx = self.process_unordered_list(docx, items)?;
|
||||
}
|
||||
Node::Table {
|
||||
headers,
|
||||
rows,
|
||||
alignments: _,
|
||||
} => {
|
||||
docx = self.process_table(docx, headers, rows)?;
|
||||
}
|
||||
Node::Image { url, title: _, alt } => {
|
||||
docx = self.process_image(docx, url, alt)?;
|
||||
}
|
||||
Node::Custom(custom_node) => {
|
||||
if let Some(figure_node) = custom_node.as_any().downcast_ref::<FigureNode>() {
|
||||
// Process figure node with special handling
|
||||
docx = self.process_figure(docx, figure_node)?;
|
||||
} else if let Some(external_frame) = custom_node
|
||||
.as_any()
|
||||
.downcast_ref::<crate::common::ExternalFrameNode>(
|
||||
) {
|
||||
let data = base64::engine::general_purpose::STANDARD
|
||||
.decode(&external_frame.svg_data)
|
||||
.map_err(|e| format!("Failed to decode SVG data: {}", e))?;
|
||||
|
||||
docx = self.image_processor.process_image_data(
|
||||
docx,
|
||||
&data,
|
||||
Some(&external_frame.alt_text),
|
||||
None,
|
||||
);
|
||||
} else {
|
||||
// Fallback for unknown custom nodes - ignore or add placeholder
|
||||
let placeholder = "[Unknown custom content]";
|
||||
let para = Paragraph::new().add_run(Run::new().add_text(placeholder));
|
||||
docx = docx.add_paragraph(para);
|
||||
}
|
||||
}
|
||||
Node::ThematicBreak => {
|
||||
// Add horizontal line as specially formatted paragraph
|
||||
let hr_para = Paragraph::new()
|
||||
.style("HorizontalLine")
|
||||
.add_run(Run::new().add_text(""));
|
||||
docx = docx.add_paragraph(hr_para);
|
||||
}
|
||||
// Inline elements should not be processed here individually
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Process ordered list
|
||||
fn process_ordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
|
||||
// Enter deeper list level
|
||||
self.list_level += 1;
|
||||
let current_level = self.list_level - 1;
|
||||
|
||||
// Create new ordered list numbering definition
|
||||
let (doc, num_id) = self.numbering.create_ordered_numbering(docx);
|
||||
docx = doc;
|
||||
|
||||
// Process list items
|
||||
for item in items {
|
||||
if let ListItem::Ordered { content, .. } = item {
|
||||
docx = self.process_list_item_content(docx, content, num_id, current_level)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Exit list level
|
||||
self.list_level -= 1;
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Process unordered list
|
||||
fn process_unordered_list(&mut self, mut docx: Docx, items: &[ListItem]) -> Result<Docx> {
|
||||
// Enter deeper list level
|
||||
self.list_level += 1;
|
||||
let current_level = self.list_level - 1;
|
||||
|
||||
// Create new unordered list numbering definition
|
||||
let (doc, num_id) = self.numbering.create_unordered_numbering(docx);
|
||||
docx = doc;
|
||||
|
||||
// Process list items
|
||||
for item in items {
|
||||
if let ListItem::Unordered { content } = item {
|
||||
docx = self.process_list_item_content(docx, content, num_id, current_level)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Exit list level
|
||||
self.list_level -= 1;
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Helper function to process list item content
|
||||
fn process_list_item_content(
|
||||
&mut self,
|
||||
mut docx: Docx,
|
||||
content: &[Node],
|
||||
num_id: usize,
|
||||
level: usize,
|
||||
) -> Result<Docx> {
|
||||
// If content is empty, add empty paragraph
|
||||
if content.is_empty() {
|
||||
let empty_para = Paragraph::new()
|
||||
.numbering(NumberingId::new(num_id), IndentLevel::new(level))
|
||||
.add_run(Run::new().add_text(""));
|
||||
return Ok(docx.add_paragraph(empty_para));
|
||||
}
|
||||
|
||||
// Process content
|
||||
for block in content {
|
||||
match block {
|
||||
Node::Paragraph(inline) => {
|
||||
let mut para = Paragraph::new()
|
||||
.numbering(NumberingId::new(num_id), IndentLevel::new(level));
|
||||
|
||||
// Process paragraph content
|
||||
for node in inline {
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, node)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
}
|
||||
|
||||
docx = docx.add_paragraph(para);
|
||||
}
|
||||
// Recursively process nested lists
|
||||
Node::OrderedList { start: _, items: _ } | Node::UnorderedList(_) => {
|
||||
docx = self.process_node(docx, block)?;
|
||||
}
|
||||
_ => {
|
||||
docx = self.process_node(docx, block)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Process table
|
||||
fn process_table(&self, mut docx: Docx, headers: &[Node], rows: &[Vec<Node>]) -> Result<Docx> {
|
||||
let mut table = Table::new(vec![]).style("Table");
|
||||
|
||||
// Process table headers
|
||||
if !headers.is_empty() {
|
||||
let mut cells = Vec::new();
|
||||
|
||||
for header_node in headers {
|
||||
let mut table_cell = TableCell::new();
|
||||
let mut para = Paragraph::new();
|
||||
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, header_node)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
|
||||
if !para.children.is_empty() {
|
||||
table_cell = table_cell.add_paragraph(para);
|
||||
}
|
||||
|
||||
cells.push(table_cell);
|
||||
}
|
||||
|
||||
if !cells.is_empty() {
|
||||
let header_row = TableRow::new(cells);
|
||||
table = table.add_row(header_row);
|
||||
}
|
||||
}
|
||||
|
||||
// Process table rows
|
||||
for row in rows {
|
||||
let mut cells = Vec::new();
|
||||
|
||||
for cell_node in row {
|
||||
let mut table_cell = TableCell::new();
|
||||
let mut para = Paragraph::new();
|
||||
|
||||
let run = Run::new();
|
||||
let run = self.process_inline_to_run(run, cell_node)?;
|
||||
if !run.children.is_empty() {
|
||||
para = para.add_run(run);
|
||||
}
|
||||
|
||||
if !para.children.is_empty() {
|
||||
table_cell = table_cell.add_paragraph(para);
|
||||
}
|
||||
|
||||
cells.push(table_cell);
|
||||
}
|
||||
|
||||
if !cells.is_empty() {
|
||||
let data_row = TableRow::new(cells);
|
||||
table = table.add_row(data_row);
|
||||
}
|
||||
}
|
||||
|
||||
// Add table to document
|
||||
docx = docx.add_table(table);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Generate DOCX document
|
||||
pub fn generate_docx(&mut self, doc: &Node) -> Result<Vec<u8>> {
|
||||
// Create DOCX document and initialize styles
|
||||
let mut docx = Docx::new();
|
||||
docx = self.styles.initialize_styles(docx);
|
||||
|
||||
// Process document content
|
||||
docx = self.process_node(docx, doc)?;
|
||||
|
||||
// Initialize numbering definitions
|
||||
docx = self.numbering.initialize_numbering(docx);
|
||||
|
||||
// Build and pack document
|
||||
let docx_built = docx.build();
|
||||
let mut buffer = Vec::new();
|
||||
docx_built
|
||||
.pack(&mut Cursor::new(&mut buffer))
|
||||
.map_err(|e| format!("Failed to pack DOCX: {}", e))?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatWriter for DocxWriter {
|
||||
fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
|
||||
self.list_level = 0;
|
||||
self.list_numbering_count = 0;
|
||||
self.generate_docx(document)
|
||||
}
|
||||
|
||||
fn write_eco(&mut self, _document: &Node, _output: &mut EcoString) -> Result<()> {
|
||||
Err("DOCX format does not support EcoString output".into())
|
||||
}
|
||||
}
|
|
@ -1,8 +1,12 @@
|
|||
//! Writer implementations for different output formats
|
||||
|
||||
#[cfg(feature = "docx")]
|
||||
pub mod docx;
|
||||
pub mod latex;
|
||||
pub mod markdown;
|
||||
|
||||
#[cfg(feature = "docx")]
|
||||
pub use self::docx::DocxWriter;
|
||||
pub use latex::LaTeXWriter;
|
||||
pub use markdown::MarkdownWriter;
|
||||
|
||||
|
@ -13,9 +17,8 @@ pub fn create_writer(format: Format) -> Box<dyn FormatWriter> {
|
|||
match format {
|
||||
Format::Md => Box::new(markdown::MarkdownWriter::new()),
|
||||
Format::LaTeX => Box::new(latex::LaTeXWriter::new()),
|
||||
Format::Docx => {
|
||||
panic!("Docx writers are not implemented yet")
|
||||
}
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => Box::new(docx::DocxWriter::new()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue