This commit is contained in:
Khashayar Fereidani 2025-12-23 10:35:39 +03:30 committed by GitHub
commit 6329ea0cae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 76 additions and 57 deletions

5
Cargo.lock generated
View file

@ -2839,9 +2839,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "memchr"
version = "2.7.4"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "memmap2"
@ -5141,6 +5141,7 @@ dependencies = [
"bitflags 2.9.4",
"criterion",
"fallible-iterator",
"memchr",
"miette",
"pprof",
"serde",

View file

@ -23,6 +23,7 @@ strum_macros = {workspace = true }
serde = { workspace = true , optional = true, features = ["derive"] }
thiserror = { workspace = true }
turso_macros = { workspace = true }
memchr = "2.7.6"
[dev-dependencies]
fallible-iterator = { workspace = true }

View file

@ -298,6 +298,38 @@ impl<'a> Lexer<'a> {
debug_assert!(f(_value.unwrap()))
}
#[inline]
// Eats up to but not including the specified byte, returns true if found
fn eat_until(&mut self, byte: u8) -> bool {
match memchr::memchr(byte, self.remaining()) {
Some(pos) => {
self.offset += pos;
true
}
None => {
cold();
self.offset = self.input.len();
false
}
}
}
#[inline]
// Eats up to and including the specified byte, returns true if found
fn eat_past(&mut self, byte: u8) -> bool {
match memchr::memchr(byte, self.remaining()) {
Some(pos) => {
self.offset += pos + 1;
true
}
None => {
cold();
self.offset = self.input.len();
false
}
}
}
#[inline]
fn eat_while<F>(&mut self, f: F)
where
@ -420,8 +452,7 @@ impl<'a> Lexer<'a> {
match self.peek() {
Some(b'-') => {
self.eat_and_assert(|b| b == b'-');
self.eat_while(|b| b != b'\n');
if self.peek() == Some(b'\n') {
if self.eat_until(b'\n') {
self.eat_and_assert(|b| b == b'\n');
}
@ -449,21 +480,18 @@ impl<'a> Lexer<'a> {
Some(b'*') => {
self.eat_and_assert(|b| b == b'*');
loop {
self.eat_while(|b| b != b'*');
match self.peek() {
Some(b'*') => {
self.eat_and_assert(|b| b == b'*');
match self.peek() {
Some(b'/') => {
self.eat_and_assert(|b| b == b'/');
break; // End of block comment
}
None => break,
_ => {}
if self.eat_past(b'*') {
match self.peek() {
Some(b'/') => {
self.eat_and_assert(|b| b == b'/');
break; // End of block comment
}
None => break,
_ => {}
}
None => break,
_ => unreachable!(), // We should not reach here
} else {
cold();
break;
}
}
@ -570,29 +598,24 @@ impl<'a> Lexer<'a> {
};
loop {
self.eat_while(|b| b != quote);
match self.peek() {
Some(b) if b == quote => {
self.eat_and_assert(|b| b == quote);
match self.peek() {
Some(b) if b == quote => {
self.eat_and_assert(|b| b == quote);
continue;
}
_ => break,
if self.eat_past(quote) {
match self.peek() {
Some(b) if b == quote => {
self.eat_and_assert(|b| b == quote);
continue;
}
_ => break,
}
None => {
let token_text =
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
return Err(Error::UnterminatedLiteral {
span: (start, self.offset - start).into(),
token_text,
offset: start,
});
}
_ => unreachable!(),
};
} else {
cold();
let token_text =
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
return Err(Error::UnterminatedLiteral {
span: (start, self.offset - start).into(),
token_text,
offset: start,
});
}
}
Ok(Token::new(&self.input[start..self.offset], tt))
@ -751,25 +774,19 @@ impl<'a> Lexer<'a> {
fn eat_bracket(&mut self) -> Result<Token<'a>> {
let start = self.offset;
self.eat_and_assert(|b| b == b'[');
self.eat_while(|b| b != b']');
match self.peek() {
Some(b']') => {
self.eat_and_assert(|b| b == b']');
Ok(Token::new(
&self.input[start..self.offset],
TokenType::TK_ID,
))
}
None => {
let token_text =
String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
Err(Error::UnterminatedBracket {
span: (start, self.offset - start).into(),
token_text,
offset: start,
})
}
_ => unreachable!(), // We should not reach here
if self.eat_past(b']') {
Ok(Token::new(
&self.input[start..self.offset],
TokenType::TK_ID,
))
} else {
cold();
let token_text = String::from_utf8_lossy(&self.input[start..self.offset]).to_string();
Err(Error::UnterminatedBracket {
span: (start, self.offset - start).into(),
token_text,
offset: start,
})
}
}