format invisible characters in strings to unicode escapes

This commit is contained in:
Aidan 2024-08-04 13:00:11 -04:00
parent b0be698354
commit 7be537dd55
No known key found for this signature in database
2 changed files with 66 additions and 4 deletions

View file

@ -618,6 +618,22 @@ fn starts_with_newline(expr: &Expr) -> bool {
}
}
fn fmt_str_body(body: &str, buf: &mut Buf) {
for c in body.chars() {
match c {
// Format blank characters as unicode escapes
'\u{200a}' => buf.push_str("\\u(200a)"),
'\u{200b}' => buf.push_str("\\u(200b)"),
'\u{200c}' => buf.push_str("\\u(200c)"),
'\u{feff}' => buf.push_str("\\u(feff)"),
// Don't change anything else in the string
' ' => buf.spaces(1),
'\n' => buf.newline(),
_ => buf.push(c),
}
}
}
fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
use StrSegment::*;
@ -627,10 +643,10 @@ fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
// a line break in the input string
match string.strip_suffix('\n') {
Some(string_without_newline) => {
buf.push_str_allow_spaces(string_without_newline);
fmt_str_body(string_without_newline, buf);
buf.newline();
}
None => buf.push_str_allow_spaces(string),
None => fmt_str_body(string, buf),
}
}
Unicode(loc_str) => {
@ -690,7 +706,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
buf.push_newline_literal();
for line in string.split('\n') {
buf.indent(indent);
buf.push_str_allow_spaces(line);
fmt_str_body(line, buf);
buf.push_newline_literal();
}
buf.indent(indent);
@ -698,7 +714,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
} else {
buf.indent(indent);
buf.push('"');
buf.push_str_allow_spaces(string);
fmt_str_body(string, buf);
buf.push('"');
};
}

View file

@ -6333,6 +6333,52 @@ mod test_fmt {
);
}
#[test]
fn keep_explicit_blank_chars() {
expr_formats_same(indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
));
}
#[test]
fn make_blank_chars_explicit() {
expr_formats_to(
indoc!(
"
x = \"a\u{200A}b\u{200B}c\u{200C}d\u{FEFF}e\"
x
"
),
indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
),
);
}
#[test]
fn make_blank_chars_explicit_when_interpolating() {
expr_formats_to(
indoc!(
"
x = \"foo:\u{200B} $(bar).\"
x
"
),
indoc!(
r#"
x = "foo:\u(200b) $(bar)."
x
"#
),
);
}
// this is a parse error atm
// #[test]
// fn multiline_apply() {