mirror of
https://github.com/Myriad-Dreamin/tinymist.git
synced 2025-08-04 02:08:17 +00:00
feat(typlite): add plain text output support (#1731)
* feat: add plain text output support * fix: correct task chaining in Typlite implementation * feat: add process-math-eq function for improved math equation handling * fix: simplify ordered and unordered list output formatting * tests: update insta * 更新 media.rs * 更新 media.rs * fix: expose a bit and remove unused deps --------- Co-authored-by: Myriad-Dreamin <camiyoru@gmail.com>
This commit is contained in:
parent
38974a3b5e
commit
58b5df97e1
9 changed files with 253 additions and 8 deletions
|
@ -25,7 +25,6 @@ base64.workspace = true
|
|||
clap = { workspace = true, optional = true }
|
||||
comemo.workspace = true
|
||||
ecow.workspace = true
|
||||
tinymist-analysis.workspace = true
|
||||
tinymist-std.workspace = true
|
||||
tinymist-derive.workspace = true
|
||||
tinymist-project = { workspace = true, features = ["lsp"] }
|
||||
|
|
|
@ -20,6 +20,7 @@ pub enum Format {
|
|||
#[default]
|
||||
Md,
|
||||
LaTeX,
|
||||
Text,
|
||||
#[cfg(feature = "docx")]
|
||||
Docx,
|
||||
}
|
||||
|
|
|
@ -29,9 +29,13 @@ use crate::parser::HtmlToAstParser;
|
|||
use crate::writer::WriterFactory;
|
||||
use typst_syntax::FileId;
|
||||
|
||||
use crate::tinymist_std::typst::foundations::Value::Str;
|
||||
use crate::tinymist_std::typst::{LazyHash, TypstDict};
|
||||
|
||||
/// The result type for typlite.
|
||||
pub type Result<T, Err = Error> = std::result::Result<T, Err>;
|
||||
|
||||
pub use cmark_writer::ast;
|
||||
pub use tinymist_project::CompileOnceArgs;
|
||||
pub use tinymist_std;
|
||||
|
||||
|
@ -81,6 +85,17 @@ impl MarkdownDocument {
|
|||
Ok(output)
|
||||
}
|
||||
|
||||
/// Convert content to plain text string
|
||||
pub fn to_text_string(&self) -> Result<ecow::EcoString> {
|
||||
let mut output = ecow::EcoString::new();
|
||||
let ast = self.parse()?;
|
||||
|
||||
let mut writer = WriterFactory::create(Format::Text);
|
||||
writer.write_eco(&ast, &mut output)?;
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Convert the content to a LaTeX string.
|
||||
pub fn to_tex_string(&self, prelude: bool) -> Result<ecow::EcoString> {
|
||||
let mut output = ecow::EcoString::new();
|
||||
|
@ -170,6 +185,7 @@ impl Typlite {
|
|||
match self.format {
|
||||
Format::Md => self.convert_doc(Format::Md)?.to_md_string(),
|
||||
Format::LaTeX => self.convert_doc(Format::LaTeX)?.to_tex_string(true),
|
||||
Format::Text => self.convert_doc(Format::Text)?.to_text_string(),
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => Err("docx format is not supported".into()),
|
||||
}
|
||||
|
@ -200,10 +216,18 @@ impl Typlite {
|
|||
.path_for_id(wrap_main_id)
|
||||
.map_err(|err| format!("getting source for main file: {err:?}"))?;
|
||||
|
||||
let mut world = world.html_task().task(TaskInputs {
|
||||
let task_inputs = TaskInputs {
|
||||
entry: Some(entry.select_in_workspace(wrap_main_id.vpath().as_rooted_path())),
|
||||
inputs: None,
|
||||
});
|
||||
inputs: if format == Format::Text || self.feat.remove_html {
|
||||
let mut dict = TypstDict::new();
|
||||
dict.insert("x-remove-html".into(), Str("true".into()));
|
||||
Some(Arc::new(LazyHash::new(dict)))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
};
|
||||
|
||||
let mut world = world.task(task_inputs).html_task().into_owned();
|
||||
|
||||
let markdown_id = FileId::new(
|
||||
Some(typst_syntax::package::PackageSpec::from_str("@local/markdown:0.1.0").unwrap()),
|
||||
|
|
|
@ -45,6 +45,7 @@ fn main() -> typlite::Result<()> {
|
|||
|
||||
let output_format = match output_path.extension() {
|
||||
Some(ext) if ext == std::ffi::OsStr::new("tex") => Format::LaTeX,
|
||||
Some(ext) if ext == std::ffi::OsStr::new("txt") => Format::Text,
|
||||
#[cfg(feature = "docx")]
|
||||
Some(ext) if ext == std::ffi::OsStr::new("docx") => Format::Docx,
|
||||
_ => Format::Md,
|
||||
|
@ -75,6 +76,7 @@ fn main() -> typlite::Result<()> {
|
|||
let result = match output_format {
|
||||
Format::Md => Bytes::from_string(doc.to_md_string()?),
|
||||
Format::LaTeX => Bytes::from_string(doc.to_tex_string(true)?),
|
||||
Format::Text => Bytes::from_string(doc.to_text_string()?),
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => Bytes::new(doc.to_docx()?),
|
||||
};
|
||||
|
|
|
@ -116,6 +116,25 @@
|
|||
|
||||
#let example(code) = eval(code.text, mode: "markup")
|
||||
|
||||
#let process-math-eq(item) = {
|
||||
if type(item) == str {
|
||||
return item
|
||||
}
|
||||
if type(item) == array {
|
||||
if (
|
||||
item.any(x => {
|
||||
type(x) == content and x.func() == str
|
||||
})
|
||||
) {
|
||||
item.flatten()
|
||||
} else {
|
||||
item.map(x => process-math-eq(x)).flatten()
|
||||
}
|
||||
} else {
|
||||
process-math-eq(item.fields().values().flatten().filter(x => type(x) == content or type(x) == str))
|
||||
}
|
||||
}
|
||||
|
||||
#let md-doc(body) = context {
|
||||
// distinguish parbreak from <p> tag
|
||||
show parbreak: it => if-not-paged(it, md-parbreak)
|
||||
|
@ -139,11 +158,21 @@
|
|||
|
||||
show math.equation.where(block: false): it => if-not-paged(
|
||||
it,
|
||||
html.elem("m1eqinline", html.frame(box(inset: 0.5em, it))),
|
||||
html.elem(
|
||||
"m1eqinline",
|
||||
if sys.inputs.at("x-remove-html", default: none) != "true" { html.frame(box(inset: 0.5em, it)) } else {
|
||||
process-math-eq(it.body).flatten().join()
|
||||
},
|
||||
),
|
||||
)
|
||||
show math.equation.where(block: true): it => if-not-paged(
|
||||
it,
|
||||
html.elem("m1eqblock", html.frame(block(inset: 0.5em, it))),
|
||||
html.elem(
|
||||
"m1eqblock",
|
||||
if sys.inputs.at("x-remove-html", default: none) != "true" { html.frame(block(inset: 0.5em, it)) } else {
|
||||
process-math-eq(it.body).flatten().join()
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
// show linebreak: it => if-not-paged(it, md-linebreak)
|
||||
|
|
|
@ -4,11 +4,13 @@
|
|||
pub mod docx;
|
||||
pub mod latex;
|
||||
pub mod markdown;
|
||||
pub mod text;
|
||||
|
||||
#[cfg(feature = "docx")]
|
||||
pub use self::docx::DocxWriter;
|
||||
pub use latex::LaTeXWriter;
|
||||
pub use markdown::MarkdownWriter;
|
||||
pub use text::TextWriter;
|
||||
|
||||
use crate::common::{Format, FormatWriter};
|
||||
|
||||
|
@ -17,6 +19,7 @@ pub fn create_writer(format: Format) -> Box<dyn FormatWriter> {
|
|||
match format {
|
||||
Format::Md => Box::new(markdown::MarkdownWriter::new()),
|
||||
Format::LaTeX => Box::new(latex::LaTeXWriter::new()),
|
||||
Format::Text => Box::new(text::TextWriter::new()),
|
||||
#[cfg(feature = "docx")]
|
||||
Format::Docx => Box::new(docx::DocxWriter::new()),
|
||||
}
|
||||
|
|
187
crates/typlite/src/writer/text.rs
Normal file
187
crates/typlite/src/writer/text.rs
Normal file
|
@ -0,0 +1,187 @@
|
|||
//! Text writer implementation - produces plain text output
|
||||
|
||||
use cmark_writer::ast::Node;
|
||||
use ecow::EcoString;
|
||||
|
||||
use crate::common::{ExternalFrameNode, FigureNode, FormatWriter};
|
||||
use crate::Result;
|
||||
|
||||
/// Text writer implementation
|
||||
#[derive(Default)]
|
||||
pub struct TextWriter {}
|
||||
|
||||
impl TextWriter {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
fn write_node(node: &Node, output: &mut EcoString) -> Result<()> {
|
||||
match node {
|
||||
Node::Document(blocks) => {
|
||||
for block in blocks {
|
||||
Self::write_node(block, output)?;
|
||||
output.push_str("\n");
|
||||
}
|
||||
}
|
||||
Node::Paragraph(inlines) => {
|
||||
for inline in inlines {
|
||||
Self::write_node(inline, output)?;
|
||||
}
|
||||
output.push_str("\n");
|
||||
}
|
||||
Node::Heading {
|
||||
level: _,
|
||||
content,
|
||||
heading_type: _,
|
||||
} => {
|
||||
for inline in content {
|
||||
Self::write_node(inline, output)?;
|
||||
}
|
||||
output.push_str("\n");
|
||||
}
|
||||
Node::BlockQuote(content) => {
|
||||
for block in content {
|
||||
Self::write_node(block, output)?;
|
||||
}
|
||||
}
|
||||
Node::CodeBlock {
|
||||
language: _,
|
||||
content,
|
||||
block_type: _,
|
||||
} => {
|
||||
output.push_str(content);
|
||||
output.push_str("\n\n");
|
||||
}
|
||||
Node::OrderedList { start: _, items } => {
|
||||
for item in items.iter() {
|
||||
match item {
|
||||
cmark_writer::ast::ListItem::Ordered { content, .. }
|
||||
| cmark_writer::ast::ListItem::Unordered { content } => {
|
||||
for block in content {
|
||||
Self::write_node(block, output)?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::UnorderedList(items) => {
|
||||
for item in items {
|
||||
match item {
|
||||
cmark_writer::ast::ListItem::Ordered { content, .. }
|
||||
| cmark_writer::ast::ListItem::Unordered { content } => {
|
||||
for block in content {
|
||||
Self::write_node(block, output)?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::Table {
|
||||
headers,
|
||||
rows,
|
||||
alignments: _,
|
||||
} => {
|
||||
// Write headers
|
||||
for header in headers {
|
||||
Self::write_node(header, output)?;
|
||||
output.push(' ');
|
||||
}
|
||||
output.push_str("\n");
|
||||
|
||||
// Write rows
|
||||
for row in rows {
|
||||
for cell in row {
|
||||
Self::write_node(cell, output)?;
|
||||
output.push(' ');
|
||||
}
|
||||
output.push_str("\n");
|
||||
}
|
||||
output.push_str("\n");
|
||||
}
|
||||
Node::Text(text) => {
|
||||
output.push_str(text);
|
||||
}
|
||||
Node::Emphasis(content) | Node::Strong(content) | Node::Strikethrough(content) => {
|
||||
for inline in content {
|
||||
Self::write_node(inline, output)?;
|
||||
}
|
||||
}
|
||||
Node::Link {
|
||||
url: _,
|
||||
title: _,
|
||||
content,
|
||||
} => {
|
||||
for inline in content {
|
||||
Self::write_node(inline, output)?;
|
||||
}
|
||||
}
|
||||
Node::Image {
|
||||
url: _,
|
||||
title: _,
|
||||
alt,
|
||||
} => {
|
||||
if !alt.is_empty() {
|
||||
for inline in alt {
|
||||
Self::write_node(inline, output)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Node::InlineCode(code) => {
|
||||
output.push_str(code);
|
||||
}
|
||||
Node::HardBreak => {
|
||||
output.push_str("\n");
|
||||
}
|
||||
Node::SoftBreak => {
|
||||
output.push(' ');
|
||||
}
|
||||
Node::ThematicBreak => {
|
||||
output.push_str("\n");
|
||||
}
|
||||
Node::HtmlElement(element) => {
|
||||
for child in &element.children {
|
||||
Self::write_node(child, output)?;
|
||||
}
|
||||
}
|
||||
node if node.is_custom_type::<FigureNode>() => {
|
||||
if let Some(figure_node) = node.as_custom_type::<FigureNode>() {
|
||||
Self::write_node(&figure_node.body, output)?;
|
||||
if !figure_node.caption.is_empty() {
|
||||
output.push_str("\n");
|
||||
output.push_str(&figure_node.caption);
|
||||
}
|
||||
}
|
||||
}
|
||||
node if node.is_custom_type::<ExternalFrameNode>() => {
|
||||
if let Some(external_frame) = node.as_custom_type::<ExternalFrameNode>() {
|
||||
if !external_frame.alt_text.is_empty() {
|
||||
output.push_str(&external_frame.alt_text);
|
||||
}
|
||||
}
|
||||
}
|
||||
node if node.is_custom_type::<crate::common::HighlightNode>() => {
|
||||
if let Some(highlight) = node.as_custom_type::<crate::common::HighlightNode>() {
|
||||
for child in &highlight.content {
|
||||
Self::write_node(child, output)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatWriter for TextWriter {
|
||||
fn write_eco(&mut self, document: &Node, output: &mut EcoString) -> Result<()> {
|
||||
Self::write_node(document, output)
|
||||
}
|
||||
|
||||
fn write_vec(&mut self, document: &Node) -> Result<Vec<u8>> {
|
||||
let mut output = EcoString::new();
|
||||
Self::write_node(document, &mut output)?;
|
||||
Ok(output.as_str().as_bytes().to_vec())
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue