slint/internal/compiler/translations.rs
Simon Hausmann cc932fceb9
Improve font embedding for MCUs when bundling translations (#7875)
Include messages from bundled translations into the character set used for deciding which glyphs to embed.
2025-03-18 11:54:28 +01:00

456 lines
18 KiB
Rust

// Copyright © SixtyFPS GmbH <info@slint.dev>
// SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-Slint-Royalty-free-2.0 OR LicenseRef-Slint-Software-3.0
use crate::llr::Expression;
use core::ops::Not;
use smol_str::{SmolStr, ToSmolStr};
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::path::Path;
use std::rc::Rc;
#[derive(Clone, Debug)]
pub struct Translations {
/// An array with all the array of string
/// The first vector index is stored in the LLR.
/// The inner vector index is the language id. (The first is the original)
/// Only contains the string that are not having plural forms
pub strings: Vec<Vec<Option<SmolStr>>>,
/// An array with all the strings that are used in a plural form.
/// The first vector index is stored in the LLR.
/// The inner vector index is the language. (The first is the original string)
/// The last vector contains each form
pub plurals: Vec<Vec<Option<Vec<SmolStr>>>>,
/// Expression is a function that maps its first and only argument (an integer)
/// to the plural form index (an integer)
/// It can only do basic mathematical operations.
/// The expression cannot reference properties or variable.
/// Only builtin math functions, and its first argument
pub plural_rules: Vec<Option<Expression>>,
/// The "names" of the languages
pub languages: Vec<SmolStr>,
}
#[derive(Clone)]
pub struct TranslationsBuilder {
result: Translations,
/// Maps (msgid, msgid_plural, msgctx) to the index in the result
/// (the index is in strings or plurals depending if there is a plural)
map: HashMap<(SmolStr, SmolStr, SmolStr), usize>,
/// The catalog containing the translations
catalogs: Rc<Vec<polib::catalog::Catalog>>,
}
impl TranslationsBuilder {
pub fn load_translations(path: &Path, domain: &str) -> std::io::Result<Self> {
let mut languages = vec!["".into()];
let mut catalogs = Vec::new();
let mut plural_rules =
vec![Some(plural_rule_parser::parse_rule_expression("n!=1").unwrap())];
for l in std::fs::read_dir(path)
.map_err(|e| std::io::Error::other(format!("Error reading directory {path:?}: {e}")))?
{
let l = l?;
let path = l.path().join("LC_MESSAGES").join(format!("{domain}.po"));
if path.exists() {
let catalog = polib::po_file::parse(&path).map_err(|e| {
std::io::Error::other(format!("Error parsing {}: {e}", path.display()))
})?;
languages.push(l.file_name().to_string_lossy().into());
plural_rules.push(Some(
plural_rule_parser::parse_rule_expression(&catalog.metadata.plural_rules.expr)
.map_err(|_| {
std::io::Error::other(format!(
"Error parsing plural rules in {}",
path.display()
))
})?,
));
catalogs.push(catalog);
}
}
if catalogs.is_empty() {
return Err(std::io::Error::other(format!(
"No translations found. We look for files in '{}/<lang>/LC_MESSAGES/{domain}.po",
path.display()
)));
}
Ok(Self {
result: Translations {
strings: Vec::new(),
plurals: Vec::new(),
plural_rules,
languages,
},
map: HashMap::new(),
catalogs: Rc::new(catalogs),
})
}
pub fn lower_translate_call(&mut self, args: Vec<Expression>) -> Expression {
let [original, contextid, _domain, format_args, n, plural] = args
.try_into()
.expect("The resolving pass should have ensured that the arguments are correct");
let original = get_string(original).expect("original must be a string");
let contextid = get_string(contextid).expect("contextid must be a string");
let plural = get_string(plural).expect("plural must be a string");
let is_plural =
!plural.is_empty() || !matches!(n, Expression::NumberLiteral(f) if f == 1.0);
match self.map.entry((original.clone(), plural.clone(), contextid.clone())) {
Entry::Occupied(entry) => Expression::TranslationReference {
format_args: format_args.into(),
string_index: *entry.get(),
plural: is_plural.then(|| n.into()),
},
Entry::Vacant(entry) => {
let messages = self.catalogs.iter().map(|catalog| {
catalog.find_message(
contextid.is_empty().not().then_some(contextid.as_str()),
&original,
is_plural.then_some(plural.as_str()),
)
});
let idx = if is_plural {
let messages = std::iter::once(Some(vec![original.clone(), plural.clone()]))
.chain(messages.map(|x| {
x.and_then(|x| {
Some(
x.msgstr_plural()
.ok()?
.iter()
.map(|x| x.to_smolstr())
.collect(),
)
})
}))
.collect();
self.result.plurals.push(messages);
self.result.plurals.len() - 1
} else {
let messages = std::iter::once(Some(original.clone()))
.chain(
messages
.map(|x| x.and_then(|x| x.msgstr().ok()).map(|x| x.to_smolstr())),
)
.collect::<Vec<_>>();
self.result.strings.push(messages);
self.result.strings.len() - 1
};
Expression::TranslationReference {
format_args: format_args.into(),
string_index: *entry.insert(idx),
plural: is_plural.then(|| n.into()),
}
}
}
}
pub fn result(self) -> Translations {
self.result
}
pub fn collect_characters_seen(&self, characters_seen: &mut impl Extend<char>) {
characters_seen.extend(
self.catalogs
.iter()
.flat_map(|catalog| {
catalog.messages().flat_map(|msg| {
msg.msgstr().ok().into_iter().chain(
msg.msgstr_plural()
.ok()
.into_iter()
.flat_map(|vec| vec.iter().map(|s| s.as_ref())),
)
})
})
.flat_map(|str| str.chars()),
);
}
}
fn get_string(plural: Expression) -> Option<SmolStr> {
match plural {
Expression::StringLiteral(s) => Some(s),
_ => None,
}
}
mod plural_rule_parser {
use super::Expression;
pub struct ParseError<'a>(&'static str, &'a [u8]);
impl std::fmt::Debug for ParseError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "ParseError({}, rest={:?})", self.0, std::str::from_utf8(self.1).unwrap())
}
}
pub fn parse_rule_expression(string: &str) -> Result<Expression, ParseError> {
let ascii = string.as_bytes();
let s = parse_expression(ascii)?;
if !s.rest.is_empty() {
return Err(ParseError("extra character in string", s.rest));
}
match s.ty {
Ty::Number => Ok(s.expr),
Ty::Boolean => Ok(Expression::Condition {
condition: s.expr.into(),
true_expr: Expression::NumberLiteral(1.).into(),
false_expr: Expression::NumberLiteral(0.).into(),
}),
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum Ty {
Number,
Boolean,
}
struct ParsingState<'a> {
expr: Expression,
rest: &'a [u8],
ty: Ty,
}
impl ParsingState<'_> {
fn skip_whitespace(self) -> Self {
let rest = skip_whitespace(self.rest);
Self { rest, ..self }
}
}
/// `<condition> ('?' <expr> : <expr> )?`
fn parse_expression(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let state = parse_condition(string)?.skip_whitespace();
if state.ty != Ty::Boolean {
return Ok(state);
}
if let Some(rest) = state.rest.strip_prefix(b"?") {
let s1 = parse_expression(rest)?.skip_whitespace();
let rest = s1.rest.strip_prefix(b":").ok_or(ParseError("expected ':'", s1.rest))?;
let s2 = parse_expression(rest)?;
if s1.ty != s2.ty {
return Err(ParseError("incompatible types in ternary operator", s2.rest));
}
Ok(ParsingState {
expr: Expression::Condition {
condition: state.expr.into(),
true_expr: s1.expr.into(),
false_expr: s2.expr.into(),
},
rest: skip_whitespace(s2.rest),
ty: s2.ty,
})
} else {
Ok(state)
}
}
/// `<and_expr> ("||" <condition>)?`
fn parse_condition(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let state = parse_and_expr(string)?.skip_whitespace();
if state.rest.is_empty() {
return Ok(state);
}
if let Some(rest) = state.rest.strip_prefix(b"||") {
let state2 = parse_condition(rest)?;
if state.ty != Ty::Boolean || state2.ty != Ty::Boolean {
return Err(ParseError("incompatible types in || operator", state2.rest));
}
Ok(ParsingState {
expr: Expression::BinaryExpression {
lhs: state.expr.into(),
rhs: state2.expr.into(),
op: '|',
},
ty: Ty::Boolean,
rest: skip_whitespace(state2.rest),
})
} else {
Ok(state)
}
}
/// `<cmp_expr> ("&&" <and_expr>)?`
fn parse_and_expr(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let state = parse_cmp_expr(string)?.skip_whitespace();
if state.rest.is_empty() {
return Ok(state);
}
if let Some(rest) = state.rest.strip_prefix(b"&&") {
let state2 = parse_and_expr(rest)?;
if state.ty != Ty::Boolean || state2.ty != Ty::Boolean {
return Err(ParseError("incompatible types in || operator", state2.rest));
}
Ok(ParsingState {
expr: Expression::BinaryExpression {
lhs: state.expr.into(),
rhs: state2.expr.into(),
op: '&',
},
ty: Ty::Boolean,
rest: skip_whitespace(state2.rest),
})
} else {
Ok(state)
}
}
/// `<value> ('=='|'!='|'<'|'>'|'<='|'>=' <cmp_expr>)?`
fn parse_cmp_expr(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let mut state = parse_value(string)?;
state.rest = skip_whitespace(state.rest);
if state.rest.is_empty() {
return Ok(state);
}
for (token, op) in [
(b"==" as &[u8], '='),
(b"!=", '!'),
(b"<=", '≤'),
(b">=", '≥'),
(b"<", '<'),
(b">", '>'),
] {
if let Some(rest) = state.rest.strip_prefix(token) {
let state2 = parse_cmp_expr(rest)?;
if state.ty != Ty::Number || state2.ty != Ty::Number {
return Err(ParseError("incompatible types in comparison", state2.rest));
}
return Ok(ParsingState {
expr: Expression::BinaryExpression {
lhs: state.expr.into(),
rhs: state2.expr.into(),
op,
},
ty: Ty::Boolean,
rest: skip_whitespace(state2.rest),
});
}
}
Ok(state)
}
/// `<term> ('%' <term>)?`
fn parse_value(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let mut state = parse_term(string)?;
state.rest = skip_whitespace(state.rest);
if state.rest.is_empty() {
return Ok(state);
}
if let Some(rest) = state.rest.strip_prefix(b"%") {
let state2 = parse_term(rest)?;
if state.ty != Ty::Number || state2.ty != Ty::Number {
return Err(ParseError("incompatible types in % operator", state2.rest));
}
Ok(ParsingState {
expr: Expression::BuiltinFunctionCall {
function: crate::expression_tree::BuiltinFunction::Mod,
arguments: vec![state.expr.into(), state2.expr.into()],
},
ty: Ty::Number,
rest: skip_whitespace(state2.rest),
})
} else {
Ok(state)
}
}
fn parse_term(string: &[u8]) -> Result<ParsingState, ParseError> {
let string = skip_whitespace(string);
let state = match string.first().ok_or(ParseError("unexpected end of string", string))? {
b'n' => ParsingState {
expr: Expression::FunctionParameterReference { index: 0 },
rest: &string[1..],
ty: Ty::Number,
},
b'(' => {
let mut s = parse_expression(&string[1..])?;
s.rest = s.rest.strip_prefix(b")").ok_or(ParseError("expected ')'", s.rest))?;
s
}
x if x.is_ascii_digit() => {
let (n, rest) = parse_number(string)?;
ParsingState { expr: Expression::NumberLiteral(n as _), rest, ty: Ty::Number }
}
_ => return Err(ParseError("unexpected token", string)),
};
Ok(state)
}
fn parse_number(string: &[u8]) -> Result<(i32, &[u8]), ParseError> {
let end = string.iter().position(|&c| !c.is_ascii_digit()).unwrap_or(string.len());
let n = std::str::from_utf8(&string[..end])
.expect("string is valid utf-8")
.parse()
.map_err(|_| ParseError("can't parse number", string))?;
Ok((n, &string[end..]))
}
fn skip_whitespace(mut string: &[u8]) -> &[u8] {
// slice::trim_ascii_start when MSRV >= 1.80
while !string.is_empty() && string[0].is_ascii_whitespace() {
string = &string[1..];
}
string
}
#[test]
fn test_parse_rule_expression() {
#[track_caller]
fn p(string: &str) -> String {
let ctx = crate::llr::EvaluationContext {
compilation_unit: &crate::llr::CompilationUnit {
public_components: Default::default(),
sub_components: Default::default(),
used_sub_components: Default::default(),
globals: Default::default(),
has_debug_info: false,
translations: None,
popup_menu: None,
},
current_sub_component: None,
current_global: None,
generator_state: (),
parent: None,
argument_types: &[crate::langtype::Type::Int32],
};
crate::llr::pretty_print::DisplayExpression(
&parse_rule_expression(string).expect("parse error"),
&ctx,
)
.to_string()
}
// en
assert_eq!(p("n != 1"), "((arg_0 ! 1.0) ? 1.0 : 0.0)");
// fr
assert_eq!(p("n > 1"), "((arg_0 > 1.0) ? 1.0 : 0.0)");
// ar
assert_eq!(
p("(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5)"),
"((arg_0 = 0.0) ? 0.0 : ((arg_0 = 1.0) ? 1.0 : ((arg_0 = 2.0) ? 2.0 : (((Mod(arg_0, 100.0) ≥ 3.0) & (Mod(arg_0, 100.0) ≤ 10.0)) ? 3.0 : ((Mod(arg_0, 100.0) ≥ 11.0) ? 4.0 : 5.0)))))"
);
// ga
assert_eq!(p("n==1 ? 0 : n==2 ? 1 : (n>2 && n<7) ? 2 :(n>6 && n<11) ? 3 : 4"), "((arg_0 = 1.0) ? 0.0 : ((arg_0 = 2.0) ? 1.0 : (((arg_0 > 2.0) & (arg_0 < 7.0)) ? 2.0 : (((arg_0 > 6.0) & (arg_0 < 11.0)) ? 3.0 : 4.0))))");
// ja
assert_eq!(p("0"), "0.0");
// pl
assert_eq!(
p("(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"),
"((arg_0 = 1.0) ? 0.0 : (((Mod(arg_0, 10.0) ≥ 2.0) & ((Mod(arg_0, 10.0) ≤ 4.0) & ((Mod(arg_0, 100.0) < 10.0) | (Mod(arg_0, 100.0) ≥ 20.0)))) ? 1.0 : 2.0))",
);
// ru
assert_eq!(
p("(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"),
"(((Mod(arg_0, 10.0) = 1.0) & (Mod(arg_0, 100.0) ! 11.0)) ? 0.0 : (((Mod(arg_0, 10.0) ≥ 2.0) & ((Mod(arg_0, 10.0) ≤ 4.0) & ((Mod(arg_0, 100.0) < 10.0) | (Mod(arg_0, 100.0) ≥ 20.0)))) ? 1.0 : 2.0))",
);
}
}