l10n: when built indiv, only embed the relevant locale file

This commit is contained in:
Sylvestre Ledru 2025-08-01 17:34:51 +02:00
parent afa0f00e90
commit df2fca588c
6 changed files with 350 additions and 170 deletions

View file

@ -58,10 +58,10 @@ jobs:
brew install coreutils
;;
esac
- name: Build with l10n
- name: Build with platform features
shell: bash
run: |
## Build with l10n
## Build with platform-specific features to enable l10n functionality
cargo build --features ${{ matrix.job.features }}
- name: Test l10n functionality
shell: bash
@ -96,14 +96,14 @@ jobs:
# Check if any .ftl files exist
fluent_files=$(find . -name "*.ftl" -type f 2>/dev/null || true)
if [ -z "$fluent_files" ]; then
if [ -n "$fluent_files" ]; then
echo "Found Fluent files:"
echo "$fluent_files"
else
echo "::notice::No Fluent (.ftl) files found in the repository"
exit 0
fi
echo "Found Fluent files:"
echo "$fluent_files"
# Use Mozilla Fluent Linter for comprehensive validation
echo "Running Mozilla Fluent Linter..."
@ -952,3 +952,147 @@ jobs:
echo "✓ All locale-specific functionality tests passed"
env:
RUST_BACKTRACE: "1"
l10n_locale_embedding_regression_test:
name: L10n/Locale Embedding Regression Test
runs-on: ubuntu-latest
env:
SCCACHE_GHA_ENABLED: "true"
RUSTC_WRAPPER: "sccache"
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Install/setup prerequisites
shell: bash
run: |
## Install/setup prerequisites
sudo apt-get -y update ; sudo apt-get -y install libselinux1-dev build-essential
- name: Build binaries for locale embedding test
shell: bash
run: |
## Build individual utilities and multicall binary for locale embedding test
echo "Building binaries with different locale embedding configurations..."
mkdir -p target
# Build cat utility with targeted locale embedding
echo "Building cat utility with targeted locale embedding..."
echo "cat" > target/uucore_target_util.txt
cargo build -p uu_cat --release
# Build ls utility with targeted locale embedding
echo "Building ls utility with targeted locale embedding..."
echo "ls" > target/uucore_target_util.txt
cargo build -p uu_ls --release
# Build multicall binary (should have all locales)
echo "Building multicall binary (should have all locales)..."
echo "multicall" > target/uucore_target_util.txt
cargo build --release
echo "✓ All binaries built successfully"
env:
RUST_BACKTRACE: "1"
- name: Analyze embedded locale files
shell: bash
run: |
## Extract and analyze .ftl files embedded in each binary
echo "=== Embedded Locale File Analysis ==="
# Analyze cat binary
echo "--- cat binary embedded .ftl files ---"
cat_ftl_files=$(strings target/release/cat | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq)
cat_locales=$(echo "$cat_ftl_files" | wc -l)
if [ -n "$cat_ftl_files" ]; then
echo "$cat_ftl_files"
else
echo "(no locale keys found)"
fi
echo "Total: $cat_locales files"
echo
# Analyze ls binary
echo "--- ls binary embedded .ftl files ---"
ls_ftl_files=$(strings target/release/ls | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq)
ls_locales=$(echo "$ls_ftl_files" | wc -l)
if [ -n "$ls_ftl_files" ]; then
echo "$ls_ftl_files"
else
echo "(no locale keys found)"
fi
echo "Total: $ls_locales files"
echo
# Analyze multicall binary
echo "--- multicall binary embedded .ftl files (first 10) ---"
multi_ftl_files=$(strings target/release/coreutils | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq)
multi_locales=$(echo "$multi_ftl_files" | wc -l)
if [ -n "$multi_ftl_files" ]; then
echo "$multi_ftl_files" | head -10
echo "... (showing first 10 of $multi_locales total files)"
else
echo "(no locale keys found)"
fi
echo
# Store counts for validation step
echo "cat_locales=$cat_locales" >> $GITHUB_ENV
echo "ls_locales=$ls_locales" >> $GITHUB_ENV
echo "multi_locales=$multi_locales" >> $GITHUB_ENV
- name: Validate cat binary locale embedding
shell: bash
run: |
## Validate that cat binary only embeds its own locale files
echo "Validating cat binary locale embedding..."
if [ "$cat_locales" -le 5 ]; then
echo "✓ SUCCESS: cat binary uses targeted locale embedding ($cat_locales files)"
else
echo "✗ FAILURE: cat binary has too many embedded locale files ($cat_locales). Expected ≤ 5."
echo "This indicates LOCALE EMBEDDING REGRESSION - all locales are being embedded instead of just the target utility's locale."
echo "The optimization is not working correctly!"
exit 1
fi
- name: Validate ls binary locale embedding
shell: bash
run: |
## Validate that ls binary only embeds its own locale files
echo "Validating ls binary locale embedding..."
if [ "$ls_locales" -le 5 ]; then
echo "✓ SUCCESS: ls binary uses targeted locale embedding ($ls_locales files)"
else
echo "✗ FAILURE: ls binary has too many embedded locale files ($ls_locales). Expected ≤ 5."
echo "This indicates LOCALE EMBEDDING REGRESSION - all locales are being embedded instead of just the target utility's locale."
echo "The optimization is not working correctly!"
exit 1
fi
- name: Validate multicall binary locale embedding
shell: bash
run: |
## Validate that multicall binary embeds all utility locale files
echo "Validating multicall binary locale embedding..."
if [ "$multi_locales" -ge 80 ]; then
echo "✓ SUCCESS: multicall binary has all locales ($multi_locales files)"
else
echo "✗ FAILURE: multicall binary has too few embedded locale files ($multi_locales). Expected ≥ 80."
echo "This indicates the multicall binary is not getting all required locales."
exit 1
fi
- name: Finalize locale embedding tests
shell: bash
run: |
## Clean up and report overall test results
rm -f test.txt target/uucore_target_util.txt
echo "✓ All locale embedding regression tests passed"
echo "Summary:"
echo " - cat binary: $cat_locales locale files (targeted embedding)"
echo " - ls binary: $ls_locales locale files (targeted embedding)"
echo " - multicall binary: $multi_locales locale files (full embedding)"

View file

@ -105,83 +105,4 @@ pub fn main() {
mf.write_all(b"\n}\n").unwrap();
mf.flush().unwrap();
// Always generate embedded English locale files
// This ensures English works even when locale files aren't available (e.g., cargo install)
generate_embedded_english_locales(&out_dir, &crates).unwrap();
}
/// Generate embedded English locale files
///
/// # Errors
///
/// Returns an error if file operations fail or if there are I/O issues
fn generate_embedded_english_locales(
out_dir: &str,
crates: &[String],
) -> Result<(), Box<dyn std::error::Error>> {
use std::fs;
let mut embedded_file = File::create(Path::new(&out_dir).join("embedded_locales.rs"))?;
writeln!(embedded_file, "// Generated at compile time - do not edit")?;
writeln!(
embedded_file,
"// This file contains embedded English locale files for all utilities"
)?;
writeln!(embedded_file)?;
writeln!(embedded_file, "use std::collections::HashMap;")?;
writeln!(embedded_file)?;
// Start the function that returns embedded locales
writeln!(
embedded_file,
"pub fn get_embedded_locales() -> HashMap<&'static str, &'static str> {{"
)?;
writeln!(embedded_file, " let mut locales = HashMap::new();")?;
writeln!(embedded_file)?;
// Embed locale files for each utility
for krate in crates {
let util_name = if let Some(stripped) = krate.strip_prefix("uu_") {
stripped
} else {
krate
};
let locale_path = Path::new("src/uu")
.join(util_name)
.join("locales/en-US.ftl");
if locale_path.exists() {
let content = fs::read_to_string(&locale_path)?;
writeln!(embedded_file, " // Locale for {util_name}")?;
writeln!(
embedded_file,
" locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);"
)?;
writeln!(embedded_file)?;
// Tell Cargo to rerun if this file changes
println!("cargo:rerun-if-changed={}", locale_path.display());
}
}
// Also embed uucore locale file if it exists
let uucore_locale_path = Path::new("src/uucore/locales/en-US.ftl");
if uucore_locale_path.exists() {
let content = fs::read_to_string(uucore_locale_path)?;
writeln!(embedded_file, " // Common uucore locale")?;
writeln!(
embedded_file,
" locales.insert(\"uucore/en-US.ftl\", r###\"{content}\"###);"
)?;
println!("cargo:rerun-if-changed={}", uucore_locale_path.display());
}
writeln!(embedded_file)?;
writeln!(embedded_file, " locales")?;
writeln!(embedded_file, "}}")?;
embedded_file.flush()?;
Ok(())
}

View file

@ -4,7 +4,7 @@ This guide explains how localization (L10n) is implemented in the **Rust-based c
## 🏗️ Architecture Overview
**English locale files are embedded directly in the binary**, ensuring that English always works regardless of how the software is installed. Other language locale files are loaded from the filesystem at runtime.
**English (US) locale files (`en-US.ftl`) are embedded directly in the binary**, ensuring that English always works regardless of how the software is installed. Other language locale files are loaded from the filesystem at runtime.
### Source Repository Structure

View file

@ -4,38 +4,19 @@
// file that was distributed with this source code.
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::Path;
fn main() {
println!("cargo:rerun-if-changed=build.rs");
pub fn main() -> Result<(), Box<dyn std::error::Error>> {
let out_dir = env::var("OUT_DIR")?;
let out_dir = env::var("OUT_DIR").unwrap();
// Always generate embedded English locale files for fallback
generate_embedded_english_locales(&out_dir).unwrap();
}
/// Generate embedded English locale files
///
/// # Errors
///
/// Returns an error if file operations fail or if there are I/O issues
fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box<dyn std::error::Error>> {
use std::fs::{self, File};
use std::io::Write;
// Since we're in uucore, we need to go up to the project root
let project_root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent() // src/
.and_then(|p| p.parent()) // project root
.ok_or("Failed to find project root")?;
let mut embedded_file = File::create(Path::new(out_dir).join("embedded_locales.rs"))?;
let mut embedded_file = File::create(Path::new(&out_dir).join("embedded_locales.rs"))?;
writeln!(embedded_file, "// Generated at compile time - do not edit")?;
writeln!(
embedded_file,
"// This file contains embedded English locale files for all utilities"
"// This file contains embedded English locale files"
)?;
writeln!(embedded_file)?;
writeln!(embedded_file, "use std::collections::HashMap;")?;
@ -49,29 +30,163 @@ fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box<dyn std::e
writeln!(embedded_file, " let mut locales = HashMap::new();")?;
writeln!(embedded_file)?;
// Scan for all utilities in src/uu/
let uu_dir = project_root.join("src/uu");
for entry in fs::read_dir(&uu_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
let util_name = entry.file_name().to_string_lossy().to_string();
let locale_path = path.join("locales/en-US.ftl");
if locale_path.exists() {
let content = fs::read_to_string(&locale_path)?;
writeln!(embedded_file, " // Locale for {util_name}")?;
writeln!(
embedded_file,
" locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);"
)?;
writeln!(embedded_file)?;
// Try to detect if we're building for a specific utility by checking build configuration
// This attempts to identify individual utility builds vs multicall binary builds
let target_utility = detect_target_utility();
// Tell Cargo to rerun if this file changes
println!("cargo:rerun-if-changed={}", locale_path.display());
match target_utility {
Some(util_name) => {
// Embed only the specific utility's locale (cat.ftl for cat for example)
embed_single_utility_locale(&mut embedded_file, &project_root()?, &util_name)?;
}
None => {
// Embed all utilities locales (multicall binary or fallback)
embed_all_utilities_locales(&mut embedded_file, &project_root()?)?;
}
}
writeln!(embedded_file)?;
writeln!(embedded_file, " locales")?;
writeln!(embedded_file, "}}")?;
embedded_file.flush()?;
Ok(())
}
/// Get the project root directory
fn project_root() -> Result<std::path::PathBuf, Box<dyn std::error::Error>> {
let manifest_dir = env::var("CARGO_MANIFEST_DIR")?;
let uucore_path = std::path::Path::new(&manifest_dir);
// Navigate from src/uucore to project root
let project_root = uucore_path
.parent() // src/
.and_then(|p| p.parent()) // project root
.ok_or("Could not determine project root")?;
Ok(project_root.to_path_buf())
}
/// Attempt to detect which specific utility is being built
fn detect_target_utility() -> Option<String> {
use std::fs;
// First check if an explicit environment variable was set
if let Ok(target_util) = env::var("UUCORE_TARGET_UTIL") {
if !target_util.is_empty() {
return Some(target_util);
}
}
// Check for a build configuration file in the target directory
if let Ok(target_dir) = env::var("CARGO_TARGET_DIR") {
let config_path = std::path::Path::new(&target_dir).join("uucore_target_util.txt");
if let Ok(content) = fs::read_to_string(&config_path) {
let util_name = content.trim();
if !util_name.is_empty() && util_name != "multicall" {
return Some(util_name.to_string());
}
}
}
// Fallback: Check the default target directory
if let Ok(project_root) = project_root() {
let config_path = project_root.join("target/uucore_target_util.txt");
if let Ok(content) = fs::read_to_string(&config_path) {
let util_name = content.trim();
if !util_name.is_empty() && util_name != "multicall" {
return Some(util_name.to_string());
}
}
}
// If no configuration found, assume multicall build
None
}
/// Embed locale for a single specific utility
fn embed_single_utility_locale(
embedded_file: &mut std::fs::File,
project_root: &Path,
util_name: &str,
) -> Result<(), Box<dyn std::error::Error>> {
use std::fs;
// Embed the specific utility's locale
let locale_path = project_root
.join("src/uu")
.join(util_name)
.join("locales/en-US.ftl");
if locale_path.exists() {
let content = fs::read_to_string(&locale_path)?;
writeln!(embedded_file, " // Locale for {util_name}")?;
writeln!(
embedded_file,
" locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);"
)?;
writeln!(embedded_file)?;
// Tell Cargo to rerun if this file changes
println!("cargo:rerun-if-changed={}", locale_path.display());
}
// Always embed uucore locale file if it exists
let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl");
if uucore_locale_path.exists() {
let content = fs::read_to_string(&uucore_locale_path)?;
writeln!(embedded_file, " // Common uucore locale")?;
writeln!(
embedded_file,
" locales.insert(\"uucore/en-US.ftl\", r###\"{content}\"###);"
)?;
println!("cargo:rerun-if-changed={}", uucore_locale_path.display());
}
Ok(())
}
/// Embed locale files for all utilities (multicall binary)
fn embed_all_utilities_locales(
embedded_file: &mut std::fs::File,
project_root: &Path,
) -> Result<(), Box<dyn std::error::Error>> {
use std::fs;
// Discover all uu_* directories
let src_uu_dir = project_root.join("src/uu");
if !src_uu_dir.exists() {
return Ok(());
}
let mut util_dirs = Vec::new();
for entry in fs::read_dir(&src_uu_dir)? {
let entry = entry?;
if entry.file_type()?.is_dir() {
if let Some(dir_name) = entry.file_name().to_str() {
util_dirs.push(dir_name.to_string());
}
}
}
util_dirs.sort();
// Embed locale files for each utility
for util_name in &util_dirs {
let locale_path = src_uu_dir.join(util_name).join("locales/en-US.ftl");
if locale_path.exists() {
let content = fs::read_to_string(&locale_path)?;
writeln!(embedded_file, " // Locale for {util_name}")?;
writeln!(
embedded_file,
" locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);"
)?;
writeln!(embedded_file)?;
// Tell Cargo to rerun if this file changes
println!("cargo:rerun-if-changed={}", locale_path.display());
}
}
// Also embed uucore locale file if it exists
let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl");
if uucore_locale_path.exists() {
@ -84,10 +199,6 @@ fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box<dyn std::e
println!("cargo:rerun-if-changed={}", uucore_locale_path.display());
}
writeln!(embedded_file)?;
writeln!(embedded_file, " locales")?;
writeln!(embedded_file, "}}")?;
embedded_file.flush()?;
Ok(())
}

View file

@ -179,19 +179,17 @@ macro_rules! bin {
use uucore::locale;
// suppress extraneous error output for SIGPIPE failures/panics
uucore::panic::mute_sigpipe_panic();
locale::setup_localization_with_common(uucore::get_canonical_util_name(stringify!(
$util
)))
.unwrap_or_else(|err| {
match err {
uucore::locale::LocalizationError::ParseResource {
error: err_msg,
snippet,
} => eprintln!("Localization parse error at {snippet}: {err_msg:?}"),
other => eprintln!("Could not init the localization system: {other}"),
}
std::process::exit(99)
});
locale::setup_localization(uucore::get_canonical_util_name(stringify!($util)))
.unwrap_or_else(|err| {
match err {
uucore::locale::LocalizationError::ParseResource {
error: err_msg,
snippet,
} => eprintln!("Localization parse error at {snippet}: {err_msg:?}"),
other => eprintln!("Could not init the localization system: {other}"),
}
std::process::exit(99)
});
// execute utility code
let code = $util::uumain(uucore::args_os());

View file

@ -113,14 +113,18 @@ fn init_localization(
locales_dir: &Path,
util_name: &str,
) -> Result<(), LocalizationError> {
let en_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE)
let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE)
.expect("Default locale should always be valid");
// Try to load English from filesystem, fall back to embedded if that fails
let english_bundle = create_bundle(&default_locale, locales_dir).or_else(|_| {
// Try embedded English as fallback
create_bundle_from_embedded(&default_locale, util_name)
})?;
// Try to load English from embedded resources first, then fall back to filesystem.
// This ensures consistent behavior and faster loading since embedded resources
// are immediately available. The filesystem fallback allows for development
// and testing scenarios where locale files might be present in the filesystem.
let english_bundle =
create_english_bundle_from_embedded(&default_locale, util_name).or_else(|_| {
// Try filesystem as fallback (useful for development/testing)
create_bundle(&default_locale, locales_dir)
})?;
let loc = if locale == &default_locale {
// If requesting English, just use English as primary (no fallback needed)
@ -189,8 +193,8 @@ fn create_bundle(
Ok(bundle)
}
/// Create a bundle from embedded locale files
fn create_bundle_from_embedded(
/// Create a bundle from embedded English locale files
fn create_english_bundle_from_embedded(
locale: &LanguageIdentifier,
util_name: &str,
) -> Result<FluentBundle<FluentResource>, LocalizationError> {
@ -209,16 +213,20 @@ fn create_bundle_from_embedded(
})?;
let resource = FluentResource::try_new(ftl_content.to_string()).map_err(
|(_partial_resource, mut errs): (FluentResource, Vec<ParserError>)| {
let first_err = errs.remove(0);
let snippet = if let Some(range) = first_err.slice.clone() {
ftl_content.get(range).unwrap_or("").to_string()
|(_partial_resource, errs): (FluentResource, Vec<ParserError>)| {
if let Some(first_err) = errs.into_iter().next() {
let snippet = first_err
.slice
.clone()
.and_then(|range| ftl_content.get(range))
.unwrap_or("")
.to_string();
LocalizationError::ParseResource {
error: first_err,
snippet,
}
} else {
String::new()
};
LocalizationError::ParseResource {
error: first_err,
snippet,
LocalizationError::LocalesDirNotFound("Parse error without details".to_string())
}
},
)?;
@ -360,14 +368,16 @@ pub fn setup_localization(p: &str) -> Result<(), LocalizationError> {
LanguageIdentifier::from_str(DEFAULT_LOCALE).expect("Default locale should always be valid")
});
// Try to get locales from filesystem first
// Try to find the locales directory. If found, use init_localization which
// will prioritize embedded resources but can also load from filesystem.
// If no locales directory exists, directly use embedded English resources.
match get_locales_dir(p) {
Ok(locales_dir) => init_localization(&locale, &locales_dir, p),
Err(_) => {
// Fallback to embedded English locales
// No locales directory found, use embedded English directly
let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE)
.expect("Default locale should always be valid");
let english_bundle = create_bundle_from_embedded(&default_locale, p)?;
let english_bundle = create_english_bundle_from_embedded(&default_locale, p)?;
let localizer = Localizer::new(english_bundle);
LOCALIZER.with(|lock| {
@ -379,10 +389,6 @@ pub fn setup_localization(p: &str) -> Result<(), LocalizationError> {
}
}
pub fn setup_localization_with_common(util_name: &str) -> Result<(), LocalizationError> {
setup_localization(util_name)
}
#[cfg(not(debug_assertions))]
fn resolve_locales_dir_from_exe_dir(exe_dir: &Path, p: &str) -> Option<PathBuf> {
// 1. <bindir>/locales/<prog>