update to literal-escaper 0.0.4 for better API without unreachable and faster string parsing

This commit is contained in:
Marijn Schouten 2025-03-07 11:17:39 +00:00 committed by Laurențiu Nicola
parent e2c3647c6a
commit 13a46eab7d
7 changed files with 137 additions and 165 deletions

View file

@ -1,9 +1,11 @@
//! There are many AstNodes, but only a few tokens, so we hand-write them here.
use std::ops::Range;
use std::{borrow::Cow, num::ParseIntError};
use rustc_literal_escaper::{
EscapeError, MixedUnit, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
EscapeError, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
unescape_str,
};
use stdx::always;
@ -150,7 +152,7 @@ impl QuoteOffsets {
pub trait IsString: AstToken {
const RAW_PREFIX: &'static str;
const MODE: Mode;
fn unescape(s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
fn is_raw(&self) -> bool {
self.text().starts_with(Self::RAW_PREFIX)
}
@ -185,7 +187,7 @@ pub trait IsString: AstToken {
let text = &self.text()[text_range_no_quotes - start];
let offset = text_range_no_quotes.start() - start;
unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
Self::unescape(text, &mut |range: Range<usize>, unescaped_char| {
if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
cb(TextRange::new(s, e) + offset, unescaped_char);
}
@ -203,7 +205,9 @@ pub trait IsString: AstToken {
impl IsString for ast::String {
const RAW_PREFIX: &'static str = "r";
const MODE: Mode = Mode::Str;
fn unescape(s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
unescape_str(s, cb)
}
}
impl ast::String {
@ -218,20 +222,19 @@ impl ast::String {
let mut buf = String::new();
let mut prev_end = 0;
let mut has_error = None;
unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
unescaped_char,
buf.capacity() == 0,
) {
(Ok(c), false) => buf.push(c),
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
prev_end = char_range.end
unescape_str(text, |char_range, unescaped_char| {
match (unescaped_char, buf.capacity() == 0) {
(Ok(c), false) => buf.push(c),
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
prev_end = char_range.end
}
(Ok(c), true) => {
buf.reserve_exact(text.len());
buf.push_str(&text[..prev_end]);
buf.push(c);
}
(Err(e), _) => has_error = Some(e),
}
(Ok(c), true) => {
buf.reserve_exact(text.len());
buf.push_str(&text[..prev_end]);
buf.push(c);
}
(Err(e), _) => has_error = Some(e),
});
match (has_error, buf.capacity() == 0) {
@ -244,7 +247,9 @@ impl ast::String {
impl IsString for ast::ByteString {
const RAW_PREFIX: &'static str = "br";
const MODE: Mode = Mode::ByteStr;
fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
}
}
impl ast::ByteString {
@ -259,20 +264,19 @@ impl ast::ByteString {
let mut buf: Vec<u8> = Vec::new();
let mut prev_end = 0;
let mut has_error = None;
unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
unescaped_char,
buf.capacity() == 0,
) {
(Ok(c), false) => buf.push(c as u8),
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
prev_end = char_range.end
unescape_byte_str(text, |char_range, unescaped_byte| {
match (unescaped_byte, buf.capacity() == 0) {
(Ok(b), false) => buf.push(b),
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
prev_end = char_range.end
}
(Ok(b), true) => {
buf.reserve_exact(text.len());
buf.extend_from_slice(&text.as_bytes()[..prev_end]);
buf.push(b);
}
(Err(e), _) => has_error = Some(e),
}
(Ok(c), true) => {
buf.reserve_exact(text.len());
buf.extend_from_slice(&text.as_bytes()[..prev_end]);
buf.push(c as u8);
}
(Err(e), _) => has_error = Some(e),
});
match (has_error, buf.capacity() == 0) {
@ -285,25 +289,10 @@ impl ast::ByteString {
impl IsString for ast::CString {
const RAW_PREFIX: &'static str = "cr";
const MODE: Mode = Mode::CStr;
fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
let text_range_no_quotes = match self.text_range_between_quotes() {
Some(it) => it,
None => return,
};
let start = self.syntax().text_range().start();
let text = &self.text()[text_range_no_quotes - start];
let offset = text_range_no_quotes.start() - start;
unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| {
let text_range =
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
// XXX: This method should only be used for highlighting ranges. The unescaped
// char/byte is not used. For simplicity, we return an arbitrary placeholder char.
cb(text_range + offset, unescaped_char.map(|_| ' '));
});
// NOTE: This method should only be used for highlighting ranges. The unescaped
// char/byte is not used. For simplicity, we return an arbitrary placeholder char.
fn unescape(s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
unescape_c_str(s, |range, _res| callback(range, Ok('_')))
}
}
@ -323,10 +312,7 @@ impl ast::CString {
MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
MixedUnit::HighByte(b) => buf.push(b),
};
unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match (
unescaped,
buf.capacity() == 0,
) {
unescape_c_str(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
(Ok(u), false) => extend_unit(&mut buf, u),
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
prev_end = char_range.end

View file

@ -6,7 +6,9 @@ mod block;
use itertools::Itertools;
use rowan::Direction;
use rustc_literal_escaper::{self, EscapeError, Mode, unescape_mixed, unescape_unicode};
use rustc_literal_escaper::{
EscapeError, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
};
use crate::{
AstNode, SyntaxError,
@ -47,7 +49,7 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec<SyntaxError>) {
}
fn rustc_unescape_error_to_string(err: EscapeError) -> (&'static str, bool) {
use rustc_literal_escaper::EscapeError as EE;
use EscapeError as EE;
#[rustfmt::skip]
let err_message = match err {
@ -142,7 +144,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
ast::LiteralKind::String(s) => {
if !s.is_raw() {
if let Some(without_quotes) = unquote(text, 1, '"') {
unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
unescape_str(without_quotes, |range, char| {
if let Err(err) = char {
push_err(1, range.start, err);
}
@ -153,7 +155,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
ast::LiteralKind::ByteString(s) => {
if !s.is_raw() {
if let Some(without_quotes) = unquote(text, 2, '"') {
unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| {
unescape_byte_str(without_quotes, |range, char| {
if let Err(err) = char {
push_err(1, range.start, err);
}
@ -164,7 +166,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
ast::LiteralKind::CString(s) => {
if !s.is_raw() {
if let Some(without_quotes) = unquote(text, 2, '"') {
unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
unescape_c_str(without_quotes, |range, char| {
if let Err(err) = char {
push_err(1, range.start, err);
}
@ -174,20 +176,16 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
}
ast::LiteralKind::Char(_) => {
if let Some(without_quotes) = unquote(text, 1, '\'') {
unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
if let Err(err) = char {
push_err(1, range.start, err);
}
});
if let Err(err) = unescape_char(without_quotes) {
push_err(1, 0, err);
}
}
}
ast::LiteralKind::Byte(_) => {
if let Some(without_quotes) = unquote(text, 2, '\'') {
unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
if let Err(err) = char {
push_err(2, range.start, err);
}
});
if let Err(err) = unescape_byte(without_quotes) {
push_err(2, 0, err);
}
}
}
ast::LiteralKind::IntNumber(_)