feat: employ l10n to tinymist-cli and vscode extension (#1505)

* feat: runtime translation

* feat: poc of rust translation

* feat: clean up implementation

* feat: initialize correctly

* dev: remove dirty log

* dev: rename l10nMsg

* fix: desc

* feat: update assets building

* feat: update assets building

* build: update cargo.lock

* fix: warnings

* fix: warnings

* dev: expose api

* fix: compile error

* fix: compile errors in scripts
This commit is contained in:
Myriad-Dreamin 2025-03-15 10:38:07 +08:00 committed by GitHub
parent dc9013e253
commit 4cbe35a286
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 615 additions and 62 deletions

View file

@ -14,3 +14,4 @@ include = ["src/**/*"]
[features]
typst-preview = []
l10n = []

View file

@ -1 +1,2 @@
typst-preview.html
typst-preview.html
tinymist-rt.toml

View file

@ -3,3 +3,10 @@
pub const TYPST_PREVIEW_HTML: &str = include_str!("typst-preview.html");
#[cfg(not(feature = "typst-preview"))]
pub const TYPST_PREVIEW_HTML: &str = "<html><body>Typst Preview needs to be built with the `embed-html` feature to work!</body></html>";
/// If this file is not found, please runs `yarn extract:l10n:rs` to extract the
/// localization data.
#[cfg(feature = "l10n")]
pub const L10N_DATA: &str = include_str!("tinymist-rt.toml");
#[cfg(not(feature = "l10n"))]
pub const L10N_DATA: &str = "";

View file

@ -18,6 +18,3 @@ quote.workspace = true
[lib]
proc-macro = true
[features]
typst-preview = []

View file

@ -0,0 +1,41 @@
[package]
name = "tinymist-l10n"
description = "Localization support for tinymist and typst."
categories = ["compilers", "command-line-utilities"]
keywords = ["language", "typst"]
authors.workspace = true
version.workspace = true
license.workspace = true
edition.workspace = true
homepage.workspace = true
repository.workspace = true
rust-version.workspace = true
[[bin]]
name = "tinymist-l10n"
path = "src/main.rs"
required-features = ["cli"]
test = false
doctest = false
bench = false
doc = false
[dependencies]
anyhow.workspace = true
clap = { workspace = true, optional = true }
ecow.workspace = true
rayon.workspace = true
rustc-hash.workspace = true
serde_json.workspace = true
walkdir.workspace = true
[dev-dependencies]
insta.workspace = true
[features]
default = ["cli"]
cli = ["clap", "clap/wrap_help"]
[lints]
workspace = true

View file

@ -0,0 +1,3 @@
# tinymist-l10n
Tinymist's l10n tool.

View file

@ -0,0 +1,3 @@
[dist]
dist = false

View file

@ -0,0 +1,251 @@
//! Tinymist's localization library.
use core::panic;
use std::{
borrow::Cow,
collections::HashSet,
path::Path,
sync::{OnceLock, RwLock},
};
use rayon::{
iter::{IntoParallelRefMutIterator, ParallelIterator},
str::ParallelString,
};
use rustc_hash::FxHashMap;
/// A map of translations.
pub type TranslationMap = FxHashMap<String, String>;
/// A set of translation maps.
pub type TranslationMapSet = FxHashMap<String, TranslationMap>;
static ALL_TRANSLATIONS: OnceLock<TranslationMapSet> = OnceLock::new();
static LOCALE_TRANSLATIONS: RwLock<Option<&'static TranslationMap>> = RwLock::new(Option::None);
/// Sets the current translations. It can only be called once.
pub fn set_translations(translations: TranslationMapSet) {
let new_translations = ALL_TRANSLATIONS.set(translations);
if let Err(new_translations) = new_translations {
eprintln!("cannot set translations: len = {}", new_translations.len());
}
}
/// Sets the current locale.
pub fn set_locale(locale: &str) -> Option<()> {
let translations = ALL_TRANSLATIONS.get()?;
let lower_locale = locale.to_lowercase();
let locale = lower_locale.as_str();
let translations = translations.get(locale).or_else(|| {
// Tries s to find a language that starts with the locale and follow a hyphen.
translations
.iter()
.find(|(k, _)| locale.starts_with(*k) && locale.chars().nth(k.len()) == Some('-'))
.map(|(_, v)| v)
})?;
*LOCALE_TRANSLATIONS.write().unwrap() = Some(translations);
Some(())
}
/// Loads a TOML string into a map of translations.
pub fn load_translations(input: &str) -> anyhow::Result<TranslationMapSet> {
let mut translations = deserialize(input, false)?;
translations.par_iter_mut().for_each(|(_, v)| {
v.par_iter_mut().for_each(|(_, v)| {
if !v.starts_with('"') {
return;
}
*v = serde_json::from_str::<String>(v)
.unwrap_or_else(|e| panic!("cannot parse translation message: {e}, message: {v}"));
});
});
Ok(translations)
}
/// Updates disk translations with new key-value pairs.
pub fn update_disk_translations(
mut key_values: Vec<(String, String)>,
output: &Path,
) -> anyhow::Result<()> {
key_values.sort_by(|(key_x, _), (key_y, _)| key_x.cmp(key_y));
// Reads and parses existing translations
let mut translations = match std::fs::read_to_string(output) {
Ok(existing_translations) => deserialize(&existing_translations, true)?,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => TranslationMapSet::default(),
Err(e) => Err(e)?,
};
// Removes unused translations
update_translations(key_values, &mut translations);
// Writes translations
let result = serialize_translations(translations);
std::fs::write(output, result)?;
Ok(())
}
/// Updates a map of translations with new key-value pairs.
pub fn update_translations(
key_values: Vec<(String, String)>,
translations: &mut TranslationMapSet,
) {
let used = key_values.iter().map(|e| &e.0).collect::<HashSet<_>>();
translations.retain(|k, _| used.contains(k));
// Updates translations
let en = "en".to_owned();
for (key, value) in key_values {
translations
.entry(key)
.or_default()
.insert(en.clone(), value);
}
}
/// Writes a map of translations to a TOML string.
pub fn serialize_translations(translations: TranslationMapSet) -> String {
let mut result = String::new();
result.push_str("\n# The translations are partially generated by copilot\n");
let mut translations = translations.into_iter().collect::<Vec<_>>();
translations.sort_by(|a, b| a.0.cmp(&b.0));
for (key, mut data) in translations {
result.push_str(&format!("\n[{key}]\n"));
let en = data.remove("en").expect("en translation is missing");
result.push_str(&format!("en = {en}\n"));
// sort by lang
let mut data = data.into_iter().collect::<Vec<_>>();
data.sort_by(|a, b| a.0.cmp(&b.0));
for (lang, value) in data {
result.push_str(&format!("{lang} = {value}\n"));
}
}
result
}
/// Tries to translate a string to the current language.
#[macro_export]
macro_rules! t {
($key:expr, $message:expr) => {
$crate::t_without_args($key, $message)
};
($key:expr, $message:expr, $($args:expr),*) => {
$crate::t_with_args($key, $message, &[$($args),*])
};
}
/// Tries to get a translation for a key.
fn find_message(key: &'static str, message: &'static str) -> &'static str {
let Some(translations) = LOCALE_TRANSLATIONS.read().unwrap().as_ref().copied() else {
return message;
};
translations.get(key).map(String::as_str).unwrap_or(message)
}
/// Tries to translate a string to the current language.
pub fn t_without_args(key: &'static str, message: &'static str) -> Cow<'static, str> {
Cow::Borrowed(find_message(key, message))
}
/// An argument for a translation.
pub enum Arg<'a> {
/// A string argument.
Str(&'a str),
/// An integer argument.
Int(i64),
/// A float argument.
Float(f64),
}
/// Tries to translate a string to the current language.
pub fn t_with_args(key: &'static str, message: &'static str, args: &[&Arg]) -> Cow<'static, str> {
let message = find_message(key, message);
let mut result = String::new();
let mut arg_index = 0;
for c in message.chars() {
if c == '{' {
let mut arg_index_str = String::new();
let chars = message.chars().skip(arg_index + 1);
for c in chars {
if c == '}' {
break;
}
arg_index_str.push(c);
}
arg_index = arg_index_str.parse::<usize>().unwrap();
let arg = args[arg_index];
match arg {
Arg::Str(s) => result.push_str(s),
Arg::Int(i) => result.push_str(&i.to_string()),
Arg::Float(f) => result.push_str(&f.to_string()),
}
arg_index += arg_index_str.len() + 2;
} else {
result.push(c);
}
}
Cow::Owned(result)
}
/// Deserializes a TOML string into a map of translations.
pub fn deserialize(input: &str, key_first: bool) -> anyhow::Result<TranslationMapSet> {
let lines = input
.par_split('\n')
.map(|line| line.trim())
.filter(|line| !line.starts_with('#') && !line.is_empty())
.collect::<Vec<_>>();
let mut translations = FxHashMap::default();
let mut key = String::new();
for line in lines {
if line.starts_with('[') {
key = line[1..line.len() - 1].to_string();
} else {
let equal_index = line.find('=').map_or_else(
|| {
Err(anyhow::anyhow!(
"cannot find equal sign in translation line: {line}"
))
},
Ok,
)?;
let lang = line[..equal_index].trim().to_string();
let value = line[equal_index + 1..].trim().to_string();
if key_first {
translations
.entry(key.clone())
.or_insert_with(FxHashMap::default)
.insert(lang, value);
} else {
translations
.entry(lang)
.or_insert_with(FxHashMap::default)
.insert(key.clone(), value);
}
}
}
Ok(translations)
}

View file

@ -0,0 +1,125 @@
//! Fully parallelized l10n tool for Rust and TypeScript.
use std::path::Path;
use clap::Parser;
use rayon::{
iter::{ParallelBridge, ParallelIterator},
str::ParallelString,
};
use tinymist_l10n::update_disk_translations;
/// The CLI arguments of the tool.
#[derive(Debug, Clone, PartialEq, Eq, Parser)]
struct Args {
/// The kind of file to process.
///
/// It can be `rs` for Rust or `ts` for TypeScript.
/// - `rs`: checks `tinymist_l10n::t!` macro in Rust files.
/// - `ts`: checks `l10nMsg` function in TypeScript files.
#[clap(long)]
kind: String,
/// The directory to process recursively.
#[clap(long)]
dir: String,
/// The output file to write the translations. The file will be in-place
/// updated with new translations.
#[clap(long)]
output: String,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
let is_rs = args.kind == "rs";
let file_calls = walkdir::WalkDir::new(&args.dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| {
e.path()
.extension()
.is_some_and(|e| e == args.kind.as_str())
})
.par_bridge()
.flat_map(|e| check_calls(e, is_rs))
.collect::<Vec<_>>();
update_disk_translations(file_calls, Path::new(&args.output))?;
Ok(())
}
const L10N_FN_TS: &str = "l10nMsg";
const L10N_FN_RS: &str = "tinymist_l10n::t!";
fn check_calls(e: walkdir::DirEntry, is_rs: bool) -> Vec<(String, String)> {
let path = e.path();
let content = match std::fs::read_to_string(path) {
Ok(content) => content,
Err(err) => {
eprintln!("failed to read file {path:?}: {err}");
return Vec::new();
}
};
content
.as_str()
.par_match_indices(if is_rs { 't' } else { 'l' })
.flat_map(|e| {
let s = &content[e.0..];
if !is_rs && s.starts_with(L10N_FN_TS) {
let suffix = &content[e.0 + L10N_FN_TS.len()..];
return parse_l10n_args_ts(suffix);
fn parse_l10n_args_ts(s: &str) -> Option<(String, String)> {
let s = parse_char(s, '(')?;
let (key, _s) = parse_str(s)?;
Some((format!("\"{key}\""), format!("\"{key}\"")))
}
}
if is_rs && s.starts_with(L10N_FN_RS) {
let suffix = &content[e.0 + L10N_FN_RS.len()..];
return parse_l10n_args_rs(suffix);
fn parse_l10n_args_rs(s: &str) -> Option<(String, String)> {
let s = parse_char(s, '(')?;
let (key, s) = parse_str(s)?;
let s = parse_char(s, ',')?;
let (value, _s) = parse_str(s)?;
Some((key.to_string(), format!("\"{value}\"")))
}
}
None
})
.collect::<Vec<_>>()
}
fn parse_char(s: &str, ch: char) -> Option<&str> {
let s = s.trim_start();
if s.starts_with(ch) {
Some(&s[1..])
} else {
None
}
}
fn parse_str(s: &str) -> Option<(&str, &str)> {
let s = parse_char(s, '"')?;
let mut escape = false;
for (i, ch) in s.char_indices() {
if escape {
escape = false;
} else {
match ch {
'\\' => escape = true,
'"' => return Some((&s[..i], &s[i + 1..])),
_ => (),
}
}
}
None
}

View file

@ -60,6 +60,7 @@ tinymist-project = { workspace = true, features = ["lsp"] }
tinymist-analysis.workspace = true
tinymist-derive.workspace = true
tinymist-std.workspace = true
tinymist-l10n.workspace = true
[dev-dependencies]
once_cell.workspace = true

View file

@ -31,18 +31,33 @@ impl SemanticRequest for CodeLensRequest {
data: None,
};
res.push(doc_lens("Profile", vec!["profile".into()]));
res.push(doc_lens("Preview", vec!["preview".into()]));
res.push(doc_lens(
&tinymist_l10n::t!("tinymist-query.code-action.profile", "Profile"),
vec!["profile".into()],
));
res.push(doc_lens(
&tinymist_l10n::t!("tinymist-query.code-action.preview", "Preview"),
vec!["preview".into()],
));
let is_html = ctx.world.library.features.is_enabled(typst::Feature::Html);
if is_html {
res.push(doc_lens("Export HTML", vec!["export-html".into()]));
res.push(doc_lens(
&tinymist_l10n::t!("tinymist-query.code-action.exportHtml", "Export HTML"),
vec!["export-html".into()],
));
} else {
res.push(doc_lens("Export PDF", vec!["export-pdf".into()]));
res.push(doc_lens(
&tinymist_l10n::t!("tinymist-query.code-action.exportPdf", "Export PDF"),
vec!["export-pdf".into()],
));
}
res.push(doc_lens("More ..", vec!["more".into()]));
res.push(doc_lens(
&tinymist_l10n::t!("tinymist-query.code-action.more", "More .."),
vec!["more".into()],
));
Some(res)
}

View file

@ -83,9 +83,19 @@ typstfmt.workspace = true
typstyle-core.workspace = true
unicode-script.workspace = true
walkdir.workspace = true
tinymist-l10n.workspace = true
[features]
default = ["cli", "html", "pdf", "preview", "embed-fonts", "no-content-hint"]
default = [
"cli",
"html",
"pdf",
# enable it once we upload assets.
# "l10n",
"preview",
"embed-fonts",
"no-content-hint",
]
cli = ["sync-lsp/clap", "clap/wrap_help"]
@ -121,6 +131,9 @@ preview = [
"hyper-tungstenite",
]
# l10n = ["tinymist-assets/l10n"]
l10n = []
[dev-dependencies]
temp-env.workspace = true

View file

@ -73,6 +73,10 @@ impl Initializer for RegularInit {
fn initialize(self, params: InitializeParams) -> (ServerState, AnySchedulableResponse) {
let (config, err) = Config::from_params(params, self.font_opts);
if let Some(locale) = config.const_config.locale.as_ref() {
tinymist_l10n::set_locale(locale);
}
let super_init = SuperInit {
client: self.client,
exec_cmds: self.exec_cmds,
@ -524,6 +528,8 @@ pub struct ConstConfig {
pub doc_line_folding_only: bool,
/// Allow dynamic registration of document formatting.
pub doc_fmt_dynamic_registration: bool,
/// The locale of the editor.
pub locale: Option<String>,
}
impl Default for ConstConfig {
@ -555,6 +561,12 @@ impl From<&InitializeParams> for ConstConfig {
let fold = try_(|| doc?.folding_range.as_ref());
let format = try_(|| doc?.formatting.as_ref());
let locale = params
.initialization_options
.as_ref()
.and_then(|init| init.get("locale").and_then(|v| v.as_str()))
.or(params.locale.as_deref());
Self {
position_encoding,
cfg_change_registration: try_or(|| workspace?.configuration, false),
@ -564,6 +576,7 @@ impl From<&InitializeParams> for ConstConfig {
tokens_multiline_token_support: try_or(|| sema?.multiline_token_support, false),
doc_line_folding_only: try_or(|| fold?.line_folding_only, true),
doc_fmt_dynamic_registration: try_or(|| format?.dynamic_registration, false),
locale: locale.map(ToOwned::to_owned),
}
}
}

View file

@ -26,6 +26,9 @@ use tinymist_project::EntryResolver;
use tinymist_query::package::PackageInfo;
use tinymist_std::{bail, error::prelude::*};
#[cfg(feature = "l10n")]
use tinymist_l10n::{load_translations, set_translations};
use crate::args::*;
#[cfg(feature = "dhat-heap")]
@ -56,13 +59,21 @@ fn main() -> Result<()> {
#[cfg(feature = "dhat-heap")]
let _profiler = dhat::Profiler::new_heap();
// Parse command line arguments
// Parses command line arguments
let args = CliArguments::parse();
let is_transient_cmd = matches!(args.command, Some(Commands::Compile(..)));
// Probes soon to avoid other initializations causing errors
if matches!(args.command, Some(Commands::Probe)) {
return Ok(());
}
// Start logging
// Loads translations
#[cfg(feature = "l10n")]
set_translations(load_translations(tinymist_assets::L10N_DATA)?);
// Starts logging
let _ = {
let is_transient_cmd = matches!(args.command, Some(Commands::Compile(..)));
use log::LevelFilter::*;
let base_level = if is_transient_cmd { Warn } else { Info };