Merge pull request #3068 from rtfeldman/faster-comment-parsing

faster comment parsing
This commit is contained in:
Richard Feldman 2022-05-15 14:27:31 -04:00 committed by GitHub
commit f1e875103a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 149 additions and 74 deletions

View file

@ -277,79 +277,154 @@ fn eat_line_comment<'a>(
) -> SpaceState<'a> { ) -> SpaceState<'a> {
use SpaceState::*; use SpaceState::*;
let is_doc_comment = if let Some(b'#') = state.bytes().get(0) { let mut index = 0;
match state.bytes().get(1) { let bytes = state.bytes();
Some(b' ') => { let length = bytes.len();
state = state.advance(2);
true 'outer: loop {
} let is_doc_comment = if let Some(b'#') = bytes.get(index) {
Some(b'\n') => { match bytes.get(index + 1) {
// consume the second # and the \n Some(b' ') => {
state = state.advance(1); state = state.advance(2);
state = state.advance_newline(); index += 2;
comments_and_newlines.push(CommentOrNewline::DocComment("")); true
multiline = true;
return eat_spaces(state, multiline, comments_and_newlines);
}
None => {
// consume the second #
state = state.advance(1);
return Good {
state,
multiline,
comments_and_newlines,
};
}
_ => false,
}
} else {
false
};
let initial = state.bytes();
for c in state.bytes() {
match c {
b'\t' => return HasTab(state),
b'\n' => {
let delta = initial.len() - state.bytes().len();
let comment = unsafe { std::str::from_utf8_unchecked(&initial[..delta]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
} }
state = state.advance_newline(); Some(b'\n') => {
multiline = true; // consume the second # and the \n
return eat_spaces(state, multiline, comments_and_newlines); state = state.advance(1);
state = state.advance_newline();
index += 2;
comments_and_newlines.push(CommentOrNewline::DocComment(""));
multiline = true;
for c in state.bytes() {
match c {
b' ' => {
state = state.advance(1);
}
b'\n' => {
state = state.advance_newline();
index += 1;
multiline = true;
comments_and_newlines.push(CommentOrNewline::Newline);
}
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'#' => {
state = state.advance(1);
index += 1;
continue 'outer;
}
_ => break,
}
index += 1;
}
return Good {
state,
multiline,
comments_and_newlines,
};
}
None => {
// consume the second #
state = state.advance(1);
return Good {
state,
multiline,
comments_and_newlines,
};
}
_ => false,
} }
b'\r' => { } else {
state = state.advance_newline(); false
} };
_ => {
state = state.advance(1); let loop_start = index;
while index < length {
match bytes[index] {
b'\t' => return HasTab(state),
b'\n' => {
let comment =
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
state = state.advance_newline();
multiline = true;
index += 1;
while index < length {
match bytes[index] {
b' ' => {
state = state.advance(1);
}
b'\n' => {
state = state.advance_newline();
multiline = true;
comments_and_newlines.push(CommentOrNewline::Newline);
}
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'#' => {
state = state.advance(1);
index += 1;
continue 'outer;
}
_ => break,
}
index += 1;
}
return Good {
state,
multiline,
comments_and_newlines,
};
}
b'\r' => {
state = state.advance_newline();
}
_ => {
state = state.advance(1);
}
} }
index += 1;
} }
}
// We made it to the end of the bytes. This means there's a comment without a trailing newline. // We made it to the end of the bytes. This means there's a comment without a trailing newline.
let delta = initial.len() - state.bytes().len(); let comment = unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
let comment = unsafe { std::str::from_utf8_unchecked(&initial[..delta]) };
if is_doc_comment { if is_doc_comment {
comments_and_newlines.push(CommentOrNewline::DocComment(comment)); comments_and_newlines.push(CommentOrNewline::DocComment(comment));
} else { } else {
comments_and_newlines.push(CommentOrNewline::LineComment(comment)); comments_and_newlines.push(CommentOrNewline::LineComment(comment));
} }
Good { return Good {
state, state,
multiline, multiline,
comments_and_newlines, comments_and_newlines,
};
} }
} }

View file

@ -43,18 +43,18 @@ impl<'a> State<'a> {
} }
#[must_use] #[must_use]
pub(crate) fn advance(&self, offset: usize) -> State<'a> { #[inline(always)]
let mut state = self.clone(); pub(crate) const fn advance(mut self, offset: usize) -> State<'a> {
state.offset += offset; self.offset += offset;
state self
} }
#[must_use] #[must_use]
pub(crate) fn advance_newline(&self) -> State<'a> { #[inline(always)]
let mut state = self.clone(); pub(crate) const fn advance_newline(mut self) -> State<'a> {
state.offset += 1; self.offset += 1;
state.line_start = state.pos(); self.line_start = self.pos();
state self
} }
/// Returns the current position /// Returns the current position