update to literal-escaper 0.0.4 for better API without unreachable and faster string parsing

2025-10-27 10:17:15 +00:00 · 2025-03-07 11:17:39 +00:00 · 2025-03-07 11:17:39 +00:00 · 13a46eab7d
commit 13a46eab7d
parent e2c3647c6a
7 changed files with 137 additions and 165 deletions
--- a/crates/syntax/src/ast/token_ext.rs
+++ b/crates/syntax/src/ast/token_ext.rs
@ -1,9 +1,11 @@
 //! There are many AstNodes, but only a few tokens, so we hand-write them here.

+use std::ops::Range;
 use std::{borrow::Cow, num::ParseIntError};

 use rustc_literal_escaper::{
-    EscapeError, MixedUnit, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
+    EscapeError, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
+    unescape_str,
 };
 use stdx::always;

@ -150,7 +152,7 @@ impl QuoteOffsets {

 pub trait IsString: AstToken {
    const RAW_PREFIX: &'static str;
-    const MODE: Mode;
+    fn unescape(s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
    fn is_raw(&self) -> bool {
        self.text().starts_with(Self::RAW_PREFIX)
    }
@ -185,7 +187,7 @@ pub trait IsString: AstToken {
        let text = &self.text()[text_range_no_quotes - start];
        let offset = text_range_no_quotes.start() - start;

-        unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
+        Self::unescape(text, &mut |range: Range<usize>, unescaped_char| {
            if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
                cb(TextRange::new(s, e) + offset, unescaped_char);
            }
@ -203,7 +205,9 @@ pub trait IsString: AstToken {

 impl IsString for ast::String {
    const RAW_PREFIX: &'static str = "r";
-    const MODE: Mode = Mode::Str;
+    fn unescape(s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_str(s, cb)
+    }
 }

 impl ast::String {
@ -218,20 +222,19 @@ impl ast::String {
        let mut buf = String::new();
        let mut prev_end = 0;
        let mut has_error = None;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
-            unescaped_char,
-            buf.capacity() == 0,
-        ) {
-            (Ok(c), false) => buf.push(c),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
+        unescape_str(text, |char_range, unescaped_char| {
+            match (unescaped_char, buf.capacity() == 0) {
+                (Ok(c), false) => buf.push(c),
+                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
+                    prev_end = char_range.end
+                }
+                (Ok(c), true) => {
+                    buf.reserve_exact(text.len());
+                    buf.push_str(&text[..prev_end]);
+                    buf.push(c);
+                }
+                (Err(e), _) => has_error = Some(e),
            }
-            (Ok(c), true) => {
-                buf.reserve_exact(text.len());
-                buf.push_str(&text[..prev_end]);
-                buf.push(c);
-            }
-            (Err(e), _) => has_error = Some(e),
        });

        match (has_error, buf.capacity() == 0) {
@ -244,7 +247,9 @@ impl ast::String {

 impl IsString for ast::ByteString {
    const RAW_PREFIX: &'static str = "br";
-    const MODE: Mode = Mode::ByteStr;
+    fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
+    }
 }

 impl ast::ByteString {
@ -259,20 +264,19 @@ impl ast::ByteString {
        let mut buf: Vec<u8> = Vec::new();
        let mut prev_end = 0;
        let mut has_error = None;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
-            unescaped_char,
-            buf.capacity() == 0,
-        ) {
-            (Ok(c), false) => buf.push(c as u8),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
+        unescape_byte_str(text, |char_range, unescaped_byte| {
+            match (unescaped_byte, buf.capacity() == 0) {
+                (Ok(b), false) => buf.push(b),
+                (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
+                    prev_end = char_range.end
+                }
+                (Ok(b), true) => {
+                    buf.reserve_exact(text.len());
+                    buf.extend_from_slice(&text.as_bytes()[..prev_end]);
+                    buf.push(b);
+                }
+                (Err(e), _) => has_error = Some(e),
            }
-            (Ok(c), true) => {
-                buf.reserve_exact(text.len());
-                buf.extend_from_slice(&text.as_bytes()[..prev_end]);
-                buf.push(c as u8);
-            }
-            (Err(e), _) => has_error = Some(e),
        });

        match (has_error, buf.capacity() == 0) {
@ -285,25 +289,10 @@ impl ast::ByteString {

 impl IsString for ast::CString {
    const RAW_PREFIX: &'static str = "cr";
-    const MODE: Mode = Mode::CStr;
-
-    fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
-        let text_range_no_quotes = match self.text_range_between_quotes() {
-            Some(it) => it,
-            None => return,
-        };
-
-        let start = self.syntax().text_range().start();
-        let text = &self.text()[text_range_no_quotes - start];
-        let offset = text_range_no_quotes.start() - start;
-
-        unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| {
-            let text_range =
-                TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
-            // XXX: This method should only be used for highlighting ranges. The unescaped
-            // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
-            cb(text_range + offset, unescaped_char.map(|_| ' '));
-        });
+    // NOTE: This method should only be used for highlighting ranges. The unescaped
+    // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
+    fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
+        unescape_c_str(s, |range, _res| callback(range, Ok('_')))
    }
 }

@ -323,10 +312,7 @@ impl ast::CString {
            MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
            MixedUnit::HighByte(b) => buf.push(b),
        };
-        unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match (
-            unescaped,
-            buf.capacity() == 0,
-        ) {
+        unescape_c_str(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
            (Ok(u), false) => extend_unit(&mut buf, u),
            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
                prev_end = char_range.end
--- a/crates/syntax/src/validation.rs
+++ b/crates/syntax/src/validation.rs
@ -6,7 +6,9 @@ mod block;

 use itertools::Itertools;
 use rowan::Direction;
-use rustc_literal_escaper::{self, EscapeError, Mode, unescape_mixed, unescape_unicode};
+use rustc_literal_escaper::{
+    EscapeError, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
+};

 use crate::{
    AstNode, SyntaxError,
@ -47,7 +49,7 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec<SyntaxError>) {
 }

 fn rustc_unescape_error_to_string(err: EscapeError) -> (&'static str, bool) {
-    use rustc_literal_escaper::EscapeError as EE;
+    use EscapeError as EE;

    #[rustfmt::skip]
    let err_message = match err {
@ -142,7 +144,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
        ast::LiteralKind::String(s) => {
            if !s.is_raw() {
                if let Some(without_quotes) = unquote(text, 1, '"') {
-                    unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
+                    unescape_str(without_quotes, |range, char| {
                        if let Err(err) = char {
                            push_err(1, range.start, err);
                        }
@ -153,7 +155,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
        ast::LiteralKind::ByteString(s) => {
            if !s.is_raw() {
                if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| {
+                    unescape_byte_str(without_quotes, |range, char| {
                        if let Err(err) = char {
                            push_err(1, range.start, err);
                        }
@ -164,7 +166,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
        ast::LiteralKind::CString(s) => {
            if !s.is_raw() {
                if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
+                    unescape_c_str(without_quotes, |range, char| {
                        if let Err(err) = char {
                            push_err(1, range.start, err);
                        }
@ -174,20 +176,16 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
        }
        ast::LiteralKind::Char(_) => {
            if let Some(without_quotes) = unquote(text, 1, '\'') {
-                unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
-                    if let Err(err) = char {
-                        push_err(1, range.start, err);
-                    }
-                });
+                if let Err(err) = unescape_char(without_quotes) {
+                    push_err(1, 0, err);
+                }
            }
        }
        ast::LiteralKind::Byte(_) => {
            if let Some(without_quotes) = unquote(text, 2, '\'') {
-                unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
-                    if let Err(err) = char {
-                        push_err(2, range.start, err);
-                    }
-                });
+                if let Err(err) = unescape_byte(without_quotes) {
+                    push_err(2, 0, err);
+                }
            }
        }
        ast::LiteralKind::IntNumber(_)