From df2fca588c7ff2c99983266fabc2f3b171a5831f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 1 Aug 2025 17:34:51 +0200 Subject: [PATCH] l10n: when built indiv, only embed the relevant locale file --- .github/workflows/l10n.yml | 156 ++++++++++++++++++++++- build.rs | 79 ------------ docs/src/l10n.md | 2 +- src/uucore/build.rs | 205 +++++++++++++++++++++++------- src/uucore/src/lib/lib.rs | 24 ++-- src/uucore/src/lib/mods/locale.rs | 54 ++++---- 6 files changed, 350 insertions(+), 170 deletions(-) diff --git a/.github/workflows/l10n.yml b/.github/workflows/l10n.yml index 878630372..fe89cf5d8 100644 --- a/.github/workflows/l10n.yml +++ b/.github/workflows/l10n.yml @@ -58,10 +58,10 @@ jobs: brew install coreutils ;; esac - - name: Build with l10n + - name: Build with platform features shell: bash run: | - ## Build with l10n + ## Build with platform-specific features to enable l10n functionality cargo build --features ${{ matrix.job.features }} - name: Test l10n functionality shell: bash @@ -96,14 +96,14 @@ jobs: # Check if any .ftl files exist fluent_files=$(find . -name "*.ftl" -type f 2>/dev/null || true) - if [ -z "$fluent_files" ]; then + if [ -n "$fluent_files" ]; then + echo "Found Fluent files:" + echo "$fluent_files" + else echo "::notice::No Fluent (.ftl) files found in the repository" exit 0 fi - echo "Found Fluent files:" - echo "$fluent_files" - # Use Mozilla Fluent Linter for comprehensive validation echo "Running Mozilla Fluent Linter..." @@ -952,3 +952,147 @@ jobs: echo "✓ All locale-specific functionality tests passed" env: RUST_BACKTRACE: "1" + + l10n_locale_embedding_regression_test: + name: L10n/Locale Embedding Regression Test + runs-on: ubuntu-latest + env: + SCCACHE_GHA_ENABLED: "true" + RUSTC_WRAPPER: "sccache" + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + - name: Install/setup prerequisites + shell: bash + run: | + ## Install/setup prerequisites + sudo apt-get -y update ; sudo apt-get -y install libselinux1-dev build-essential + - name: Build binaries for locale embedding test + shell: bash + run: | + ## Build individual utilities and multicall binary for locale embedding test + echo "Building binaries with different locale embedding configurations..." + mkdir -p target + + # Build cat utility with targeted locale embedding + echo "Building cat utility with targeted locale embedding..." + echo "cat" > target/uucore_target_util.txt + cargo build -p uu_cat --release + + # Build ls utility with targeted locale embedding + echo "Building ls utility with targeted locale embedding..." + echo "ls" > target/uucore_target_util.txt + cargo build -p uu_ls --release + + # Build multicall binary (should have all locales) + echo "Building multicall binary (should have all locales)..." + echo "multicall" > target/uucore_target_util.txt + cargo build --release + + echo "✓ All binaries built successfully" + env: + RUST_BACKTRACE: "1" + + - name: Analyze embedded locale files + shell: bash + run: | + ## Extract and analyze .ftl files embedded in each binary + echo "=== Embedded Locale File Analysis ===" + + # Analyze cat binary + echo "--- cat binary embedded .ftl files ---" + cat_ftl_files=$(strings target/release/cat | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq) + cat_locales=$(echo "$cat_ftl_files" | wc -l) + if [ -n "$cat_ftl_files" ]; then + echo "$cat_ftl_files" + else + echo "(no locale keys found)" + fi + echo "Total: $cat_locales files" + echo + + # Analyze ls binary + echo "--- ls binary embedded .ftl files ---" + ls_ftl_files=$(strings target/release/ls | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq) + ls_locales=$(echo "$ls_ftl_files" | wc -l) + if [ -n "$ls_ftl_files" ]; then + echo "$ls_ftl_files" + else + echo "(no locale keys found)" + fi + echo "Total: $ls_locales files" + echo + + # Analyze multicall binary + echo "--- multicall binary embedded .ftl files (first 10) ---" + multi_ftl_files=$(strings target/release/coreutils | grep -o "[a-z_][a-z_]*/en-US\.ftl" | sort | uniq) + multi_locales=$(echo "$multi_ftl_files" | wc -l) + if [ -n "$multi_ftl_files" ]; then + echo "$multi_ftl_files" | head -10 + echo "... (showing first 10 of $multi_locales total files)" + else + echo "(no locale keys found)" + fi + echo + + # Store counts for validation step + echo "cat_locales=$cat_locales" >> $GITHUB_ENV + echo "ls_locales=$ls_locales" >> $GITHUB_ENV + echo "multi_locales=$multi_locales" >> $GITHUB_ENV + + - name: Validate cat binary locale embedding + shell: bash + run: | + ## Validate that cat binary only embeds its own locale files + echo "Validating cat binary locale embedding..." + if [ "$cat_locales" -le 5 ]; then + echo "✓ SUCCESS: cat binary uses targeted locale embedding ($cat_locales files)" + else + echo "✗ FAILURE: cat binary has too many embedded locale files ($cat_locales). Expected ≤ 5." + echo "This indicates LOCALE EMBEDDING REGRESSION - all locales are being embedded instead of just the target utility's locale." + echo "The optimization is not working correctly!" + exit 1 + fi + + - name: Validate ls binary locale embedding + shell: bash + run: | + ## Validate that ls binary only embeds its own locale files + echo "Validating ls binary locale embedding..." + if [ "$ls_locales" -le 5 ]; then + echo "✓ SUCCESS: ls binary uses targeted locale embedding ($ls_locales files)" + else + echo "✗ FAILURE: ls binary has too many embedded locale files ($ls_locales). Expected ≤ 5." + echo "This indicates LOCALE EMBEDDING REGRESSION - all locales are being embedded instead of just the target utility's locale." + echo "The optimization is not working correctly!" + exit 1 + fi + + - name: Validate multicall binary locale embedding + shell: bash + run: | + ## Validate that multicall binary embeds all utility locale files + echo "Validating multicall binary locale embedding..." + if [ "$multi_locales" -ge 80 ]; then + echo "✓ SUCCESS: multicall binary has all locales ($multi_locales files)" + else + echo "✗ FAILURE: multicall binary has too few embedded locale files ($multi_locales). Expected ≥ 80." + echo "This indicates the multicall binary is not getting all required locales." + exit 1 + fi + + - name: Finalize locale embedding tests + shell: bash + run: | + ## Clean up and report overall test results + rm -f test.txt target/uucore_target_util.txt + echo "✓ All locale embedding regression tests passed" + echo "Summary:" + echo " - cat binary: $cat_locales locale files (targeted embedding)" + echo " - ls binary: $ls_locales locale files (targeted embedding)" + echo " - multicall binary: $multi_locales locale files (full embedding)" diff --git a/build.rs b/build.rs index a0b86f273..1a7bba8dc 100644 --- a/build.rs +++ b/build.rs @@ -105,83 +105,4 @@ pub fn main() { mf.write_all(b"\n}\n").unwrap(); mf.flush().unwrap(); - - // Always generate embedded English locale files - // This ensures English works even when locale files aren't available (e.g., cargo install) - generate_embedded_english_locales(&out_dir, &crates).unwrap(); -} - -/// Generate embedded English locale files -/// -/// # Errors -/// -/// Returns an error if file operations fail or if there are I/O issues -fn generate_embedded_english_locales( - out_dir: &str, - crates: &[String], -) -> Result<(), Box> { - use std::fs; - - let mut embedded_file = File::create(Path::new(&out_dir).join("embedded_locales.rs"))?; - - writeln!(embedded_file, "// Generated at compile time - do not edit")?; - writeln!( - embedded_file, - "// This file contains embedded English locale files for all utilities" - )?; - writeln!(embedded_file)?; - writeln!(embedded_file, "use std::collections::HashMap;")?; - writeln!(embedded_file)?; - - // Start the function that returns embedded locales - writeln!( - embedded_file, - "pub fn get_embedded_locales() -> HashMap<&'static str, &'static str> {{" - )?; - writeln!(embedded_file, " let mut locales = HashMap::new();")?; - writeln!(embedded_file)?; - - // Embed locale files for each utility - for krate in crates { - let util_name = if let Some(stripped) = krate.strip_prefix("uu_") { - stripped - } else { - krate - }; - - let locale_path = Path::new("src/uu") - .join(util_name) - .join("locales/en-US.ftl"); - if locale_path.exists() { - let content = fs::read_to_string(&locale_path)?; - writeln!(embedded_file, " // Locale for {util_name}")?; - writeln!( - embedded_file, - " locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);" - )?; - writeln!(embedded_file)?; - - // Tell Cargo to rerun if this file changes - println!("cargo:rerun-if-changed={}", locale_path.display()); - } - } - - // Also embed uucore locale file if it exists - let uucore_locale_path = Path::new("src/uucore/locales/en-US.ftl"); - if uucore_locale_path.exists() { - let content = fs::read_to_string(uucore_locale_path)?; - writeln!(embedded_file, " // Common uucore locale")?; - writeln!( - embedded_file, - " locales.insert(\"uucore/en-US.ftl\", r###\"{content}\"###);" - )?; - println!("cargo:rerun-if-changed={}", uucore_locale_path.display()); - } - - writeln!(embedded_file)?; - writeln!(embedded_file, " locales")?; - writeln!(embedded_file, "}}")?; - - embedded_file.flush()?; - Ok(()) } diff --git a/docs/src/l10n.md b/docs/src/l10n.md index d23a4e652..5704004ce 100644 --- a/docs/src/l10n.md +++ b/docs/src/l10n.md @@ -4,7 +4,7 @@ This guide explains how localization (L10n) is implemented in the **Rust-based c ## 🏗️ Architecture Overview -**English locale files are embedded directly in the binary**, ensuring that English always works regardless of how the software is installed. Other language locale files are loaded from the filesystem at runtime. +**English (US) locale files (`en-US.ftl`) are embedded directly in the binary**, ensuring that English always works regardless of how the software is installed. Other language locale files are loaded from the filesystem at runtime. ### Source Repository Structure diff --git a/src/uucore/build.rs b/src/uucore/build.rs index 6b9816e29..1fa88d93c 100644 --- a/src/uucore/build.rs +++ b/src/uucore/build.rs @@ -4,38 +4,19 @@ // file that was distributed with this source code. use std::env; +use std::fs::File; +use std::io::Write; use std::path::Path; -fn main() { - println!("cargo:rerun-if-changed=build.rs"); +pub fn main() -> Result<(), Box> { + let out_dir = env::var("OUT_DIR")?; - let out_dir = env::var("OUT_DIR").unwrap(); - - // Always generate embedded English locale files for fallback - generate_embedded_english_locales(&out_dir).unwrap(); -} - -/// Generate embedded English locale files -/// -/// # Errors -/// -/// Returns an error if file operations fail or if there are I/O issues -fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box> { - use std::fs::{self, File}; - use std::io::Write; - - // Since we're in uucore, we need to go up to the project root - let project_root = Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() // src/ - .and_then(|p| p.parent()) // project root - .ok_or("Failed to find project root")?; - - let mut embedded_file = File::create(Path::new(out_dir).join("embedded_locales.rs"))?; + let mut embedded_file = File::create(Path::new(&out_dir).join("embedded_locales.rs"))?; writeln!(embedded_file, "// Generated at compile time - do not edit")?; writeln!( embedded_file, - "// This file contains embedded English locale files for all utilities" + "// This file contains embedded English locale files" )?; writeln!(embedded_file)?; writeln!(embedded_file, "use std::collections::HashMap;")?; @@ -49,29 +30,163 @@ fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box { + // Embed only the specific utility's locale (cat.ftl for cat for example) + embed_single_utility_locale(&mut embedded_file, &project_root()?, &util_name)?; + } + None => { + // Embed all utilities locales (multicall binary or fallback) + embed_all_utilities_locales(&mut embedded_file, &project_root()?)?; + } + } + + writeln!(embedded_file)?; + writeln!(embedded_file, " locales")?; + writeln!(embedded_file, "}}")?; + + embedded_file.flush()?; + Ok(()) +} + +/// Get the project root directory +fn project_root() -> Result> { + let manifest_dir = env::var("CARGO_MANIFEST_DIR")?; + let uucore_path = std::path::Path::new(&manifest_dir); + + // Navigate from src/uucore to project root + let project_root = uucore_path + .parent() // src/ + .and_then(|p| p.parent()) // project root + .ok_or("Could not determine project root")?; + + Ok(project_root.to_path_buf()) +} + +/// Attempt to detect which specific utility is being built +fn detect_target_utility() -> Option { + use std::fs; + + // First check if an explicit environment variable was set + if let Ok(target_util) = env::var("UUCORE_TARGET_UTIL") { + if !target_util.is_empty() { + return Some(target_util); + } + } + + // Check for a build configuration file in the target directory + if let Ok(target_dir) = env::var("CARGO_TARGET_DIR") { + let config_path = std::path::Path::new(&target_dir).join("uucore_target_util.txt"); + if let Ok(content) = fs::read_to_string(&config_path) { + let util_name = content.trim(); + if !util_name.is_empty() && util_name != "multicall" { + return Some(util_name.to_string()); } } } + // Fallback: Check the default target directory + if let Ok(project_root) = project_root() { + let config_path = project_root.join("target/uucore_target_util.txt"); + if let Ok(content) = fs::read_to_string(&config_path) { + let util_name = content.trim(); + if !util_name.is_empty() && util_name != "multicall" { + return Some(util_name.to_string()); + } + } + } + + // If no configuration found, assume multicall build + None +} + +/// Embed locale for a single specific utility +fn embed_single_utility_locale( + embedded_file: &mut std::fs::File, + project_root: &Path, + util_name: &str, +) -> Result<(), Box> { + use std::fs; + + // Embed the specific utility's locale + let locale_path = project_root + .join("src/uu") + .join(util_name) + .join("locales/en-US.ftl"); + + if locale_path.exists() { + let content = fs::read_to_string(&locale_path)?; + writeln!(embedded_file, " // Locale for {util_name}")?; + writeln!( + embedded_file, + " locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);" + )?; + writeln!(embedded_file)?; + + // Tell Cargo to rerun if this file changes + println!("cargo:rerun-if-changed={}", locale_path.display()); + } + + // Always embed uucore locale file if it exists + let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl"); + if uucore_locale_path.exists() { + let content = fs::read_to_string(&uucore_locale_path)?; + writeln!(embedded_file, " // Common uucore locale")?; + writeln!( + embedded_file, + " locales.insert(\"uucore/en-US.ftl\", r###\"{content}\"###);" + )?; + println!("cargo:rerun-if-changed={}", uucore_locale_path.display()); + } + + Ok(()) +} + +/// Embed locale files for all utilities (multicall binary) +fn embed_all_utilities_locales( + embedded_file: &mut std::fs::File, + project_root: &Path, +) -> Result<(), Box> { + use std::fs; + + // Discover all uu_* directories + let src_uu_dir = project_root.join("src/uu"); + if !src_uu_dir.exists() { + return Ok(()); + } + + let mut util_dirs = Vec::new(); + for entry in fs::read_dir(&src_uu_dir)? { + let entry = entry?; + if entry.file_type()?.is_dir() { + if let Some(dir_name) = entry.file_name().to_str() { + util_dirs.push(dir_name.to_string()); + } + } + } + util_dirs.sort(); + + // Embed locale files for each utility + for util_name in &util_dirs { + let locale_path = src_uu_dir.join(util_name).join("locales/en-US.ftl"); + if locale_path.exists() { + let content = fs::read_to_string(&locale_path)?; + writeln!(embedded_file, " // Locale for {util_name}")?; + writeln!( + embedded_file, + " locales.insert(\"{util_name}/en-US.ftl\", r###\"{content}\"###);" + )?; + writeln!(embedded_file)?; + + // Tell Cargo to rerun if this file changes + println!("cargo:rerun-if-changed={}", locale_path.display()); + } + } + // Also embed uucore locale file if it exists let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl"); if uucore_locale_path.exists() { @@ -84,10 +199,6 @@ fn generate_embedded_english_locales(out_dir: &str) -> Result<(), Box eprintln!("Localization parse error at {snippet}: {err_msg:?}"), - other => eprintln!("Could not init the localization system: {other}"), - } - std::process::exit(99) - }); + locale::setup_localization(uucore::get_canonical_util_name(stringify!($util))) + .unwrap_or_else(|err| { + match err { + uucore::locale::LocalizationError::ParseResource { + error: err_msg, + snippet, + } => eprintln!("Localization parse error at {snippet}: {err_msg:?}"), + other => eprintln!("Could not init the localization system: {other}"), + } + std::process::exit(99) + }); // execute utility code let code = $util::uumain(uucore::args_os()); diff --git a/src/uucore/src/lib/mods/locale.rs b/src/uucore/src/lib/mods/locale.rs index 8412065e3..6bb4c0202 100644 --- a/src/uucore/src/lib/mods/locale.rs +++ b/src/uucore/src/lib/mods/locale.rs @@ -113,14 +113,18 @@ fn init_localization( locales_dir: &Path, util_name: &str, ) -> Result<(), LocalizationError> { - let en_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) + let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) .expect("Default locale should always be valid"); - // Try to load English from filesystem, fall back to embedded if that fails - let english_bundle = create_bundle(&default_locale, locales_dir).or_else(|_| { - // Try embedded English as fallback - create_bundle_from_embedded(&default_locale, util_name) - })?; + // Try to load English from embedded resources first, then fall back to filesystem. + // This ensures consistent behavior and faster loading since embedded resources + // are immediately available. The filesystem fallback allows for development + // and testing scenarios where locale files might be present in the filesystem. + let english_bundle = + create_english_bundle_from_embedded(&default_locale, util_name).or_else(|_| { + // Try filesystem as fallback (useful for development/testing) + create_bundle(&default_locale, locales_dir) + })?; let loc = if locale == &default_locale { // If requesting English, just use English as primary (no fallback needed) @@ -189,8 +193,8 @@ fn create_bundle( Ok(bundle) } -/// Create a bundle from embedded locale files -fn create_bundle_from_embedded( +/// Create a bundle from embedded English locale files +fn create_english_bundle_from_embedded( locale: &LanguageIdentifier, util_name: &str, ) -> Result, LocalizationError> { @@ -209,16 +213,20 @@ fn create_bundle_from_embedded( })?; let resource = FluentResource::try_new(ftl_content.to_string()).map_err( - |(_partial_resource, mut errs): (FluentResource, Vec)| { - let first_err = errs.remove(0); - let snippet = if let Some(range) = first_err.slice.clone() { - ftl_content.get(range).unwrap_or("").to_string() + |(_partial_resource, errs): (FluentResource, Vec)| { + if let Some(first_err) = errs.into_iter().next() { + let snippet = first_err + .slice + .clone() + .and_then(|range| ftl_content.get(range)) + .unwrap_or("") + .to_string(); + LocalizationError::ParseResource { + error: first_err, + snippet, + } } else { - String::new() - }; - LocalizationError::ParseResource { - error: first_err, - snippet, + LocalizationError::LocalesDirNotFound("Parse error without details".to_string()) } }, )?; @@ -360,14 +368,16 @@ pub fn setup_localization(p: &str) -> Result<(), LocalizationError> { LanguageIdentifier::from_str(DEFAULT_LOCALE).expect("Default locale should always be valid") }); - // Try to get locales from filesystem first + // Try to find the locales directory. If found, use init_localization which + // will prioritize embedded resources but can also load from filesystem. + // If no locales directory exists, directly use embedded English resources. match get_locales_dir(p) { Ok(locales_dir) => init_localization(&locale, &locales_dir, p), Err(_) => { - // Fallback to embedded English locales + // No locales directory found, use embedded English directly let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) .expect("Default locale should always be valid"); - let english_bundle = create_bundle_from_embedded(&default_locale, p)?; + let english_bundle = create_english_bundle_from_embedded(&default_locale, p)?; let localizer = Localizer::new(english_bundle); LOCALIZER.with(|lock| { @@ -379,10 +389,6 @@ pub fn setup_localization(p: &str) -> Result<(), LocalizationError> { } } -pub fn setup_localization_with_common(util_name: &str) -> Result<(), LocalizationError> { - setup_localization(util_name) -} - #[cfg(not(debug_assertions))] fn resolve_locales_dir_from_exe_dir(exe_dir: &Path, p: &str) -> Option { // 1. /locales/