Fix highlighting of byte escape sequences

Currently non-UTF8 escape sequences in byte strings and any escape
sequences in byte literals are ignored.
This commit is contained in:
oxalica 2023-07-17 22:28:57 +08:00
parent 37f84c101b
commit de1f766820
No known key found for this signature in database
GPG key ID: D425CB23CADE82D9
5 changed files with 48 additions and 10 deletions

View file

@ -1,7 +1,7 @@
//! Syntax highlighting for escape sequences
use crate::syntax_highlighting::highlights::Highlights;
use crate::{HlRange, HlTag};
use syntax::ast::{Char, IsString};
use syntax::ast::{Byte, Char, IsString};
use syntax::{AstToken, TextRange, TextSize};
pub(super) fn highlight_escape_string<T: IsString>(
@ -43,3 +43,23 @@ pub(super) fn highlight_escape_char(stack: &mut Highlights, char: &Char, start:
TextRange::new(start + TextSize::from(1), start + TextSize::from(text.len() as u32 + 1));
stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
}
pub(super) fn highlight_escape_byte(stack: &mut Highlights, byte: &Byte, start: TextSize) {
if byte.value().is_none() {
return;
}
let text = byte.text();
if !text.starts_with("b'") || !text.ends_with('\'') {
return;
}
let text = &text[2..text.len() - 1];
if !text.starts_with('\\') {
return;
}
let range =
TextRange::new(start + TextSize::from(2), start + TextSize::from(text.len() as u32 + 2));
stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
}

View file

@ -105,6 +105,8 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x65</span><span class="char_literal">'</span><span class="semicolon">;</span>
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x00</span><span class="char_literal">'</span><span class="semicolon">;</span>
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="byte_literal">b'</span><span class="escape_sequence">\xFF</span><span class="byte_literal">'</span><span class="semicolon">;</span>
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello </span><span class="escape_sequence">{{</span><span class="string_literal macro">Hello</span><span class="escape_sequence">}}</span><span class="string_literal macro">"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
<span class="comment">// from https://doc.rust-lang.org/std/fmt/index.html</span>
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span> <span class="comment">// =&gt; "Hello"</span>
@ -159,8 +161,8 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello</span><span class="escape_sequence">\n</span><span class="string_literal macro">World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="escape_sequence">\u{48}</span><span class="escape_sequence">\x65</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6F</span><span class="string_literal macro"> World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="string_literal">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// invalid non-UTF8 escape sequences</span>
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes</span>
<span class="keyword">let</span> <span class="variable declaration reference">backslash</span> <span class="operator">=</span> <span class="string_literal">r"\\"</span><span class="semicolon">;</span>
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="format_specifier">{</span><span class="escape_sequence">\x41</span><span class="format_specifier">}</span><span class="string_literal macro">"</span><span class="comma macro">,</span> <span class="none macro">A</span> <span class="operator macro">=</span> <span class="numeric_literal macro">92</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>

View file

@ -451,6 +451,8 @@ fn main() {
let a = '\x65';
let a = '\x00';
let a = b'\xFF';
println!("Hello {{Hello}}");
// from https://doc.rust-lang.org/std/fmt/index.html
println!("Hello"); // => "Hello"
@ -505,8 +507,8 @@ fn main() {
println!("Hello\nWorld");
println!("\u{48}\x65\x6C\x6C\x6F World");
let _ = "\x28\x28\x00\x63\n";
let _ = b"\x28\x28\x00\x63\n";
let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences
let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes
let backslash = r"\\";
println!("{\x41}", A = 92);