Merge pull request #3068 from rtfeldman/faster-comment-parsing

faster comment parsing
This commit is contained in:
Richard Feldman 2022-05-15 14:27:31 -04:00 committed by GitHub
commit f1e875103a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 149 additions and 74 deletions

View file

@ -277,79 +277,154 @@ fn eat_line_comment<'a>(
) -> SpaceState<'a> {
use SpaceState::*;
let is_doc_comment = if let Some(b'#') = state.bytes().get(0) {
match state.bytes().get(1) {
Some(b' ') => {
state = state.advance(2);
let mut index = 0;
let bytes = state.bytes();
let length = bytes.len();
true
}
Some(b'\n') => {
// consume the second # and the \n
state = state.advance(1);
state = state.advance_newline();
'outer: loop {
let is_doc_comment = if let Some(b'#') = bytes.get(index) {
match bytes.get(index + 1) {
Some(b' ') => {
state = state.advance(2);
index += 2;
comments_and_newlines.push(CommentOrNewline::DocComment(""));
multiline = true;
return eat_spaces(state, multiline, comments_and_newlines);
}
None => {
// consume the second #
state = state.advance(1);
return Good {
state,
multiline,
comments_and_newlines,
};
}
_ => false,
}
} else {
false
};
let initial = state.bytes();
for c in state.bytes() {
match c {
b'\t' => return HasTab(state),
b'\n' => {
let delta = initial.len() - state.bytes().len();
let comment = unsafe { std::str::from_utf8_unchecked(&initial[..delta]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
true
}
state = state.advance_newline();
multiline = true;
return eat_spaces(state, multiline, comments_and_newlines);
Some(b'\n') => {
// consume the second # and the \n
state = state.advance(1);
state = state.advance_newline();
index += 2;
comments_and_newlines.push(CommentOrNewline::DocComment(""));
multiline = true;
for c in state.bytes() {
match c {
b' ' => {
state = state.advance(1);
}
b'\n' => {
state = state.advance_newline();
index += 1;
multiline = true;
comments_and_newlines.push(CommentOrNewline::Newline);
}
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'#' => {
state = state.advance(1);
index += 1;
continue 'outer;
}
_ => break,
}
index += 1;
}
return Good {
state,
multiline,
comments_and_newlines,
};
}
None => {
// consume the second #
state = state.advance(1);
return Good {
state,
multiline,
comments_and_newlines,
};
}
_ => false,
}
b'\r' => {
state = state.advance_newline();
}
_ => {
state = state.advance(1);
} else {
false
};
let loop_start = index;
while index < length {
match bytes[index] {
b'\t' => return HasTab(state),
b'\n' => {
let comment =
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
state = state.advance_newline();
multiline = true;
index += 1;
while index < length {
match bytes[index] {
b' ' => {
state = state.advance(1);
}
b'\n' => {
state = state.advance_newline();
multiline = true;
comments_and_newlines.push(CommentOrNewline::Newline);
}
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'#' => {
state = state.advance(1);
index += 1;
continue 'outer;
}
_ => break,
}
index += 1;
}
return Good {
state,
multiline,
comments_and_newlines,
};
}
b'\r' => {
state = state.advance_newline();
}
_ => {
state = state.advance(1);
}
}
index += 1;
}
}
// We made it to the end of the bytes. This means there's a comment without a trailing newline.
let delta = initial.len() - state.bytes().len();
let comment = unsafe { std::str::from_utf8_unchecked(&initial[..delta]) };
// We made it to the end of the bytes. This means there's a comment without a trailing newline.
let comment = unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
Good {
state,
multiline,
comments_and_newlines,
return Good {
state,
multiline,
comments_and_newlines,
};
}
}

View file

@ -43,18 +43,18 @@ impl<'a> State<'a> {
}
#[must_use]
pub(crate) fn advance(&self, offset: usize) -> State<'a> {
let mut state = self.clone();
state.offset += offset;
state
#[inline(always)]
pub(crate) const fn advance(mut self, offset: usize) -> State<'a> {
self.offset += offset;
self
}
#[must_use]
pub(crate) fn advance_newline(&self) -> State<'a> {
let mut state = self.clone();
state.offset += 1;
state.line_start = state.pos();
state
#[inline(always)]
pub(crate) const fn advance_newline(mut self) -> State<'a> {
self.offset += 1;
self.line_start = self.pos();
self
}
/// Returns the current position