mirror of
https://github.com/SpaceManiac/SpacemanDMM.git
synced 2025-12-23 05:36:47 +00:00
Attempt UTF-8 decoding before falling back to Latin-1
This commit is contained in:
parent
50e8d95961
commit
3bc83b11d1
3 changed files with 24 additions and 33 deletions
|
|
@ -345,14 +345,14 @@ impl fmt::Display for ConstFn {
|
|||
// The constant evaluator
|
||||
|
||||
pub fn evaluate_str(location: Location, input: &[u8]) -> Result<Constant, DMError> {
|
||||
use super::lexer::{Lexer, from_latin1_borrowed};
|
||||
use super::lexer::{Lexer, from_utf8_or_latin1_borrowed};
|
||||
use super::parser::Parser;
|
||||
|
||||
let mut bytes = input.iter().map(|&x| Ok(x));
|
||||
let ctx = Context::default();
|
||||
let expr = Parser::new(&ctx, Lexer::new(&ctx, Default::default(), &mut bytes)).require_expression()?;
|
||||
if bytes.next().is_some() {
|
||||
return Err(DMError::new(location, format!("leftover: {:?} {}", from_latin1_borrowed(&input), bytes.len())));
|
||||
return Err(DMError::new(location, format!("leftover: {:?} {}", from_utf8_or_latin1_borrowed(&input), bytes.len())));
|
||||
}
|
||||
expr.simple_evaluate(location)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -429,17 +429,8 @@ fn is_ident(ch: u8) -> bool {
|
|||
(ch >= b'a' && ch <= b'z') || (ch >= b'A' && ch <= b'Z') || ch == b'_'
|
||||
}
|
||||
|
||||
/// Convert the input bytes to a `String` assuming Latin-1 encoding.
|
||||
pub fn from_latin1(mut bytes: Vec<u8>) -> String {
|
||||
fn from_latin1(bytes: &[u8]) -> String {
|
||||
let non_ascii = bytes.iter().filter(|&&i| i > 0x7f).count();
|
||||
if non_ascii == 0 {
|
||||
match String::from_utf8(bytes) {
|
||||
Ok(v) => return v,
|
||||
// shouldn't happen, but try to produce a sensible result anyways
|
||||
Err(e) => bytes = e.into_bytes(),
|
||||
}
|
||||
}
|
||||
|
||||
let mut output = String::with_capacity(bytes.len() + non_ascii);
|
||||
for &byte in bytes.iter() {
|
||||
output.push(byte as char);
|
||||
|
|
@ -447,20 +438,20 @@ pub fn from_latin1(mut bytes: Vec<u8>) -> String {
|
|||
output
|
||||
}
|
||||
|
||||
/// Convert the input bytes to a `String` assuming Latin-1 encoding.
|
||||
pub fn from_latin1_borrowed(bytes: &[u8]) -> Cow<str> {
|
||||
let non_ascii = bytes.iter().filter(|&&i| i > 0x7f).count();
|
||||
if non_ascii == 0 {
|
||||
if let Ok(v) = ::std::str::from_utf8(bytes) {
|
||||
return Cow::Borrowed(v);
|
||||
}
|
||||
/// Convert the input bytes to a `String` attempting UTF-8 or falling back to Latin-1.
|
||||
pub fn from_utf8_or_latin1(bytes: Vec<u8>) -> String {
|
||||
match String::from_utf8(bytes) {
|
||||
Ok(v) => v,
|
||||
Err(e) => from_latin1(e.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
let mut output = String::with_capacity(bytes.len() + non_ascii);
|
||||
for &byte in bytes.iter() {
|
||||
output.push(byte as char);
|
||||
/// Convert the input bytes to a `String` attempting UTF-8 or falling back to Latin-1.
|
||||
pub fn from_utf8_or_latin1_borrowed(bytes: &[u8]) -> Cow<str> {
|
||||
match ::std::str::from_utf8(bytes) {
|
||||
Ok(v) => Cow::Borrowed(v),
|
||||
Err(_) => Cow::Owned(from_latin1(bytes)),
|
||||
}
|
||||
Cow::Owned(output)
|
||||
}
|
||||
|
||||
// Used to track nested string interpolations and know when they end.
|
||||
|
|
@ -841,7 +832,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
}
|
||||
from_latin1(ident)
|
||||
from_utf8_or_latin1(ident)
|
||||
}
|
||||
|
||||
fn read_resource(&mut self) -> String {
|
||||
|
|
@ -863,7 +854,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
}
|
||||
from_latin1(buf)
|
||||
from_utf8_or_latin1(buf)
|
||||
}
|
||||
|
||||
fn read_string(&mut self, end: &'static [u8], interp_closed: bool) -> Token {
|
||||
|
|
@ -925,7 +916,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
|
||||
let string = from_latin1(buf);
|
||||
let string = from_utf8_or_latin1(buf);
|
||||
match (interp_opened, interp_closed) {
|
||||
(true, true) => Token::InterpStringPart(string),
|
||||
(true, false) => Token::InterpStringBegin(string),
|
||||
|
|
@ -952,7 +943,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
break;
|
||||
}
|
||||
}
|
||||
Token::String(from_latin1(buf))
|
||||
Token::String(from_utf8_or_latin1(buf))
|
||||
}
|
||||
|
||||
fn read_raw_string(&mut self) -> Token {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use ndarray::{self, Array3, Axis};
|
|||
use linked_hash_map::LinkedHashMap;
|
||||
|
||||
use dm::{DMError, Location, HasLocation};
|
||||
use dm::lexer::{LocationTracker, from_latin1};
|
||||
use dm::lexer::{LocationTracker, from_utf8_or_latin1};
|
||||
use dm::constants::Constant;
|
||||
|
||||
const MAX_KEY_LENGTH: u8 = 3;
|
||||
|
|
@ -329,14 +329,14 @@ fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
|
|||
skip_whitespace = true;
|
||||
} else if ch == b';' {
|
||||
curr_prefab.vars.insert(
|
||||
from_latin1(take(&mut curr_var)),
|
||||
from_utf8_or_latin1(take(&mut curr_var)),
|
||||
dm::constants::evaluate_str(chars.location(), &take(&mut curr_datum))?,
|
||||
);
|
||||
skip_whitespace = true;
|
||||
} else if ch == b'}' {
|
||||
if !curr_var.is_empty() {
|
||||
curr_prefab.vars.insert(
|
||||
from_latin1(take(&mut curr_var)),
|
||||
from_utf8_or_latin1(take(&mut curr_var)),
|
||||
dm::constants::evaluate_str(chars.location(), &take(&mut curr_datum))?,
|
||||
);
|
||||
}
|
||||
|
|
@ -346,16 +346,16 @@ fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
|
|||
}
|
||||
}
|
||||
} else if ch == b'{' {
|
||||
curr_prefab.path = from_latin1(take(&mut curr_datum));
|
||||
curr_prefab.path = from_utf8_or_latin1(take(&mut curr_datum));
|
||||
in_varedit_block = true;
|
||||
} else if ch == b',' {
|
||||
if curr_prefab.path.is_empty() && !curr_datum.is_empty() {
|
||||
curr_prefab.path = from_latin1(take(&mut curr_datum));
|
||||
curr_prefab.path = from_utf8_or_latin1(take(&mut curr_datum));
|
||||
}
|
||||
curr_data.push(take(&mut curr_prefab));
|
||||
} else if ch == b')' {
|
||||
if curr_prefab.path.is_empty() && !curr_datum.is_empty() {
|
||||
curr_prefab.path = from_latin1(take(&mut curr_datum));
|
||||
curr_prefab.path = from_utf8_or_latin1(take(&mut curr_datum));
|
||||
}
|
||||
curr_data.push(take(&mut curr_prefab));
|
||||
let key = take(&mut curr_key);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue