Parser: skip UTF-8 BOM

Skip the byte order mark.
BOM is also allowed in .rs or Cargo.toml file.

Fixes #7291
This commit is contained in:
Olivier Goffart 2025-01-07 11:12:52 +01:00
parent e1e8f04e9f
commit aedd04a3d8
3 changed files with 29 additions and 0 deletions

View file

@ -198,6 +198,17 @@ pub fn lex(mut source: &str) -> Vec<crate::parser::Token> {
let mut result = vec![];
let mut offset = 0;
let mut state = LexState::default();
if source.starts_with("\u{FEFF}") {
// Skip BOM
result.push(crate::parser::Token {
kind: SyntaxKind::Whitespace,
text: source[..3].into(),
offset: 0,
..Default::default()
});
source = &source[3..];
offset += 3;
}
while !source.is_empty() {
if let Some((len, kind)) = crate::parser::lex_next_token(source, &mut state) {
result.push(crate::parser::Token {

View file

@ -0,0 +1,12 @@
// Copyright © SixtyFPS GmbH <info@slint.dev>
// SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-Slint-Royalty-free-2.0 OR LicenseRef-Slint-Software-3.0
// This file contains an UTF-8 BOM at the beginning of the file.
component Foo { }
//^warning{Component is neither used nor exported}
export component XX {
}

View file

@ -69,6 +69,12 @@ fn syntax_tests() -> std::io::Result<()> {
fn process_file(path: &std::path::Path) -> std::io::Result<bool> {
let source = std::fs::read_to_string(path)?;
if path.to_str().unwrap_or("").contains("bom-") && !source.starts_with("\u{FEFF}") {
// make sure that the file still contains BOM and it wasn't remove by some tools
return Err(std::io::Error::other(format!(
"{path:?} does not contains BOM while it should"
)));
}
std::panic::catch_unwind(|| process_file_source(path, source, false)).unwrap_or_else(|err| {
println!("Panic while processing {}: {:?}", path.display(), err);
Ok(false)