feat(typlite): add plain text output support (#1731)

* feat: add plain text output support

* fix: correct task chaining in Typlite implementation

* feat: add process-math-eq function for improved math equation handling

* fix: simplify ordered and unordered list output formatting

* tests: update insta

* 更新 media.rs

* 更新 media.rs

* fix: expose a bit and remove unused deps

---------

Co-authored-by: Myriad-Dreamin <camiyoru@gmail.com>
This commit is contained in:
Hong Jiarong 2025-05-13 11:58:26 +08:00 committed by GitHub
parent 38974a3b5e
commit 58b5df97e1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 253 additions and 8 deletions

View file

@ -25,7 +25,6 @@ base64.workspace = true
clap = { workspace = true, optional = true }
comemo.workspace = true
ecow.workspace = true
tinymist-analysis.workspace = true
tinymist-std.workspace = true
tinymist-derive.workspace = true
tinymist-project = { workspace = true, features = ["lsp"] }

View file

@ -20,6 +20,7 @@ pub enum Format {
#[default]
Md,
LaTeX,
Text,
#[cfg(feature = "docx")]
Docx,
}

View file

@ -29,9 +29,13 @@ use crate::parser::HtmlToAstParser;
use crate::writer::WriterFactory;
use typst_syntax::FileId;
use crate::tinymist_std::typst::foundations::Value::Str;
use crate::tinymist_std::typst::{LazyHash, TypstDict};
/// The result type for typlite.
pub type Result<T, Err = Error> = std::result::Result<T, Err>;
pub use cmark_writer::ast;
pub use tinymist_project::CompileOnceArgs;
pub use tinymist_std;
@ -81,6 +85,17 @@ impl MarkdownDocument {
Ok(output)
}
/// Convert content to plain text string
pub fn to_text_string(&self) -> Result<ecow::EcoString> {
let mut output = ecow::EcoString::new();
let ast = self.parse()?;
let mut writer = WriterFactory::create(Format::Text);
writer.write_eco(&ast, &mut output)?;
Ok(output)
}
/// Convert the content to a LaTeX string.
pub fn to_tex_string(&self, prelude: bool) -> Result<ecow::EcoString> {
let mut output = ecow::EcoString::new();
@ -170,6 +185,7 @@ impl Typlite {
match self.format {
Format::Md => self.convert_doc(Format::Md)?.to_md_string(),
Format::LaTeX => self.convert_doc(Format::LaTeX)?.to_tex_string(true),
Format::Text => self.convert_doc(Format::Text)?.to_text_string(),
#[cfg(feature = "docx")]
Format::Docx => Err("docx format is not supported".into()),
}
@ -200,10 +216,18 @@ impl Typlite {
.path_for_id(wrap_main_id)
.map_err(|err| format!("getting source for main file: {err:?}"))?;
let mut world = world.html_task().task(TaskInputs {
let task_inputs = TaskInputs {
entry: Some(entry.select_in_workspace(wrap_main_id.vpath().as_rooted_path())),
inputs: None,
});
inputs: if format == Format::Text || self.feat.remove_html {
let mut dict = TypstDict::new();
dict.insert("x-remove-html".into(), Str("true".into()));
Some(Arc::new(LazyHash::new(dict)))
} else {
None
},
};
let mut world = world.task(task_inputs).html_task().into_owned();
let markdown_id = FileId::new(
Some(typst_syntax::package::PackageSpec::from_str("@local/markdown:0.1.0").unwrap()),

View file

@ -45,6 +45,7 @@ fn main() -> typlite::Result<()> {
let output_format = match output_path.extension() {
Some(ext) if ext == std::ffi::OsStr::new("tex") => Format::LaTeX,
Some(ext) if ext == std::ffi::OsStr::new("txt") => Format::Text,
#[cfg(feature = "docx")]
Some(ext) if ext == std::ffi::OsStr::new("docx") => Format::Docx,
_ => Format::Md,
@ -75,6 +76,7 @@ fn main() -> typlite::Result<()> {
let result = match output_format {
Format::Md => Bytes::from_string(doc.to_md_string()?),
Format::LaTeX => Bytes::from_string(doc.to_tex_string(true)?),
Format::Text => Bytes::from_string(doc.to_text_string()?),
#[cfg(feature = "docx")]
Format::Docx => Bytes::new(doc.to_docx()?),
};

View file

@ -116,6 +116,25 @@
#let example(code) = eval(code.text, mode: "markup")
#let process-math-eq(item) = {
if type(item) == str {
return item
}
if type(item) == array {
if (
item.any(x => {
type(x) == content and x.func() == str
})
) {
item.flatten()
} else {
item.map(x => process-math-eq(x)).flatten()
}
} else {
process-math-eq(item.fields().values().flatten().filter(x => type(x) == content or type(x) == str))
}
}
#let md-doc(body) = context {
// distinguish parbreak from <p> tag
show parbreak: it => if-not-paged(it, md-parbreak)
@ -139,11 +158,21 @@
show math.equation.where(block: false): it => if-not-paged(
it,
html.elem("m1eqinline", html.frame(box(inset: 0.5em, it))),
html.elem(
"m1eqinline",
if sys.inputs.at("x-remove-html", default: none) != "true" { html.frame(box(inset: 0.5em, it)) } else {
process-math-eq(it.body).flatten().join()
},
),
)
show math.equation.where(block: true): it => if-not-paged(
it,
html.elem("m1eqblock", html.frame(block(inset: 0.5em, it))),
html.elem(
"m1eqblock",
if sys.inputs.at("x-remove-html", default: none) != "true" { html.frame(block(inset: 0.5em, it)) } else {
process-math-eq(it.body).flatten().join()
},
),
)
// show linebreak: it => if-not-paged(it, md-linebreak)

View file

@ -4,11 +4,13 @@
pub mod docx;
pub mod latex;
pub mod markdown;
pub mod text;
#[cfg(feature = "docx")]
pub use self::docx::DocxWriter;
pub use latex::LaTeXWriter;
pub use markdown::MarkdownWriter;
pub use text::TextWriter;
use crate::common::{Format, FormatWriter};
@ -17,6 +19,7 @@ pub fn create_writer(format: Format) -> Box<dyn FormatWriter> {
match format {
Format::Md => Box::new(markdown::MarkdownWriter::new()),
Format::LaTeX => Box::new(latex::LaTeXWriter::new()),
Format::Text => Box::new(text::TextWriter::new()),
#[cfg(feature = "docx")]
Format::Docx => Box::new(docx::DocxWriter::new()),
}

View file

@ -0,0 +1,187 @@
//! Text writer implementation - produces plain text output
use cmark_writer::ast::Node;
use ecow::EcoString;
use crate::common::{ExternalFrameNode, FigureNode, FormatWriter};
use crate::Result;
/// Text writer implementation
#[derive(Default)]
pub struct TextWriter {}
impl TextWriter {
pub fn new() -> Self {
Self {}
}
fn write_node(node: &Node, output: &mut EcoString) -> Result<()> {
match node {
Node::Document(blocks) => {
for block in blocks {
Self::write_node(block, output)?;
output.push_str("\n");
}
}
Node::Paragraph(inlines) => {
for inline in inlines {
Self::write_node(inline, output)?;
}
output.push_str("\n");
}
Node::Heading {
level: _,
content,
heading_type: _,
} => {
for inline in content {
Self::write_node(inline, output)?;
}
output.push_str("\n");
}
Node::BlockQuote(content) => {
for block in content {
Self::write_node(block, output)?;
}
}
Node::CodeBlock {
language: _,
content,
block_type: _,
} => {
output.push_str(content);
output.push_str("\n\n");
}
Node::OrderedList { start: _, items } => {
for item in items.iter() {
match item {
cmark_writer::ast::ListItem::Ordered { content, .. }
| cmark_writer::ast::ListItem::Unordered { content } => {
for block in content {
Self::write_node(block, output)?;
}
}
_ => {}
}
}
}
Node::UnorderedList(items) => {
for item in items {
match item {
cmark_writer::ast::ListItem::Ordered { content, .. }
| cmark_writer::ast::ListItem::Unordered { content } => {
for block in content {
Self::write_node(block, output)?;
}
}
_ => {}
}
}
}
Node::Table {
headers,
rows,
alignments: _,
} => {
// Write headers
for header in headers {
Self::write_node(header, output)?;
output.push(' ');
}
output.push_str("\n");
// Write rows
for row in rows {
for cell in row {
Self::write_node(cell, output)?;
output.push(' ');
}
output.push_str("\n");
}
output.push_str("\n");
}
Node::Text(text) => {
output.push_str(text);
}
Node::Emphasis(content) | Node::Strong(content) | Node::Strikethrough(content) => {
for inline in content {
Self::write_node(inline, output)?;
}
}
Node::Link {
url: _,
title: _,
content,
} => {
for inline in content {
Self::write_node(inline, output)?;
}
}
Node::Image {
url: _,
title: _,
alt,
} => {
if !alt.is_empty() {
for inline in alt {
Self::write_node(inline, output)?;
}
}
}
Node::InlineCode(code) => {
output.push_str(code);
}
Node::HardBreak => {
output.push_str("\n");
}
Node::SoftBreak => {
output.push(' ');
}
Node::ThematicBreak => {
output.push_str("\n");
}
Node::HtmlElement(element) => {
for child in &element.children {
Self::write_node(child, output)?;
}
}
node if node.is_custom_type::<FigureNode>() => {
if let Some(figure_node) = node.as_custom_type::<FigureNode>() {
Self::write_node(&figure_node.body, output)?;
if !figure_node.caption.is_empty() {
output.push_str("\n");
output.push_str(&figure_node.caption);
}
}
}
node if node.is_custom_type::<ExternalFrameNode>() => {
if let Some(external_frame) = node.as_custom_type::<ExternalFrameNode>() {
if !external_frame.alt_text.is_empty() {
output.push_str(&external_frame.alt_text);
}
}
}
node if node.is_custom_type::<crate::common::HighlightNode>() => {
if let Some(highlight) = node.as_custom_type::<crate::common::HighlightNode>() {
for child in &highlight.content {
Self::write_node(child, output)?;
}
}
}
_ => {}
}
Ok(())
}
}
impl FormatWriter for TextWriter {
fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()> {
Self::write_node(document, output)
}
fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
let mut output = EcoString::new();
Self::write_node(document, &mut output)?;
Ok(output.as_str().as_bytes().to_vec())
}
}