test: add failing example for markdown comment parser (#1948)
Some checks are pending
Binaries / harper-cli - macOS-aarch64 (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-cli - macOS-x86_64 (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-cli - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-cli - Windows-x86_64 (push) Waiting to run
Binaries / harper-ls - macOS-aarch64 (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-aarch64-musl (push) Waiting to run
Binaries / harper-ls - macOS-x86_64 (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-GNU (push) Waiting to run
Binaries / harper-ls - Linux-x86_64-musl (push) Waiting to run
Binaries / harper-ls - Windows-x86_64 (push) Waiting to run
Build Web / build-web (push) Waiting to run
Chrome Plugin / chrome-plugin (push) Waiting to run
Just Checks / just check-js (push) Waiting to run
Just Checks / just check-rust (push) Waiting to run
Just Checks / just test-chrome-plugin (push) Waiting to run
Just Checks / just test-firefox-plugin (push) Waiting to run
Just Checks / just test-harperjs (push) Waiting to run
Just Checks / just test-obsidian (push) Waiting to run
Just Checks / just test-rust (push) Waiting to run
Just Checks / just test-vscode (push) Waiting to run
VS Code Plugin / alpine-arm64 (push) Waiting to run
VS Code Plugin / alpine-x64 (push) Waiting to run
VS Code Plugin / darwin-arm64 (push) Waiting to run
VS Code Plugin / darwin-x64 (push) Waiting to run
VS Code Plugin / linux-arm64 (push) Waiting to run
VS Code Plugin / linux-armhf (push) Waiting to run
VS Code Plugin / linux-x64 (push) Waiting to run
VS Code Plugin / win32-arm64 (push) Waiting to run
VS Code Plugin / win32-x64 (push) Waiting to run
WordPress Plugin / wp-plugin (push) Waiting to run

* test: add failing example for markdown comment parser

* fix(core): crash

* fuzz: add other fuzzing hang

* fix(comments): infinite loop

---------

Co-authored-by: Elijah Potter <me@elijahpotter.dev>
This commit is contained in:
rnxpyke 2025-11-19 00:20:17 +01:00 committed by GitHub
parent 589ca33466
commit 3b1b126dd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 90 additions and 35 deletions

View file

@ -119,3 +119,28 @@ impl Parser for CommentParser {
self.inner.parse(source)
}
}
#[cfg(test)]
mod tests {
use super::CommentParser;
use harper_core::parsers::{MarkdownOptions, StrParser};
#[test]
fn hang() {
use std::sync::mpsc::channel;
use std::thread;
use std::time::Duration;
let (tx, rx) = channel::<()>();
let handle = thread::spawn(move || {
let opts = MarkdownOptions::default();
let parser = CommentParser::new_from_language_id("java", opts).unwrap();
let _res = parser.parse_str("//{@j");
tx.send(()).expect("send failed");
});
rx.recv_timeout(Duration::from_secs(10)).expect("timed out");
handle.join().expect("failed to join");
}
}

View file

@ -145,15 +145,21 @@ fn parse_inline_tag(tokens: &[Token]) -> Option<usize> {
return None;
}
if tokens.len() <= 3 {
return None;
}
let mut cursor = 3;
while !matches!(
tokens.get(cursor),
Some(Token {
kind: TokenKind::Punctuation(Punctuation::CloseCurly),
..
})
) {
while cursor < tokens.len()
&& !matches!(
tokens.get(cursor),
Some(Token {
kind: TokenKind::Punctuation(Punctuation::CloseCurly),
..
})
)
{
cursor += 1;
}

View file

@ -161,35 +161,45 @@ impl Parser for Markdown {
let mut tokens = Vec::new();
let mut traversed_bytes = 0;
let mut traversed_chars = 0;
// Build a mapping from the inner parser's byte-based indexing to Harper's char-based
// indexing
let mut byte_to_char = vec![0; source_str.len() + 1];
let mut char_index = 0;
let mut byte_idx = 0;
for ch in source_str.chars() {
let char_len = ch.len_utf8();
for _ in 0..char_len {
byte_to_char[byte_idx] = char_index;
byte_idx += 1;
}
char_index += 1;
}
byte_to_char[source_str.len()] = char_index;
let mut stack = Vec::new();
// NOTE: the range spits out __byte__ indices, not char indices.
// This is why we keep track above.
for (event, range) in md_parser.into_offset_iter() {
if range.start > traversed_bytes {
traversed_chars += source_str[traversed_bytes..range.start].chars().count();
traversed_bytes = range.start;
}
let span_start = byte_to_char[range.start];
let span_end = byte_to_char[range.end];
match event {
pulldown_cmark::Event::SoftBreak => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 1),
span: Span::new_with_len(span_start, 1),
kind: TokenKind::Newline(1),
});
}
pulldown_cmark::Event::HardBreak => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 1),
span: Span::new_with_len(span_start, 1),
kind: TokenKind::Newline(2),
});
}
pulldown_cmark::Event::Start(pulldown_cmark::Tag::List(v)) => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 0),
span: Span::new_with_len(span_start, 0),
kind: TokenKind::Newline(2),
});
stack.push(pulldown_cmark::Tag::List(v));
@ -201,7 +211,7 @@ impl Parser for Markdown {
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::CodeBlock)
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::TableCell) => {
tokens.push(Token {
// We cannot use `traversed_chars` here, as it will still point to the
// We cannot use `span_start` here, as it will still point to the
// first character of the `Event` at this point. Instead, we use the
// position of the previous token's last character. This ensures the
// paragraph break is placed at the end of the content, not its beginning.
@ -214,38 +224,39 @@ impl Parser for Markdown {
pulldown_cmark::Event::End(_) => {
stack.pop();
}
pulldown_cmark::Event::InlineMath(code)
| pulldown_cmark::Event::DisplayMath(code)
| pulldown_cmark::Event::Code(code) => {
let chunk_len = code.chars().count();
pulldown_cmark::Event::InlineMath(_)
| pulldown_cmark::Event::DisplayMath(_)
| pulldown_cmark::Event::Code(_) => {
let chunk_len = span_end - span_start;
tokens.push(Token {
span: Span::new_with_len(traversed_chars, chunk_len),
span: Span::new_with_len(span_start, chunk_len),
kind: TokenKind::Unlintable,
});
}
pulldown_cmark::Event::Text(text) => {
let chunk_len = text.chars().count();
pulldown_cmark::Event::Text(_text) => {
let chunk_len = span_end - span_start;
if let Some(tag) = stack.last() {
use pulldown_cmark::Tag;
if matches!(tag, Tag::CodeBlock(..)) {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, text.chars().count()),
span: Span::new_with_len(span_start, chunk_len),
kind: TokenKind::Unlintable,
});
continue;
}
if matches!(tag, Tag::Link { .. }) && self.options.ignore_link_title {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, text.chars().count()),
span: Span::new_with_len(span_start, chunk_len),
kind: TokenKind::Unlintable,
});
continue;
}
if !(matches!(tag, Tag::Paragraph)
|| matches!(tag, Tag::Link { .. }) && !self.options.ignore_link_title
|| (matches!(tag, Tag::Link { .. }) && !self.options.ignore_link_title)
|| matches!(tag, Tag::Heading { .. })
|| matches!(tag, Tag::Item)
|| matches!(tag, Tag::TableCell)
@ -257,21 +268,19 @@ impl Parser for Markdown {
}
}
let mut new_tokens =
english_parser.parse(&source[traversed_chars..traversed_chars + chunk_len]);
let mut new_tokens = english_parser.parse(&source[span_start..span_end]);
new_tokens
.iter_mut()
.for_each(|token| token.span.push_by(traversed_chars));
.for_each(|token| token.span.push_by(span_start));
tokens.append(&mut new_tokens);
}
// TODO: Support via `harper-html`
pulldown_cmark::Event::Html(_content)
| pulldown_cmark::Event::InlineHtml(_content) => {
let size = _content.chars().count();
pulldown_cmark::Event::Html(_) | pulldown_cmark::Event::InlineHtml(_) => {
let size = span_end - span_start;
tokens.push(Token {
span: Span::new_with_len(traversed_chars, size),
span: Span::new_with_len(span_start, size),
kind: TokenKind::Unlintable,
});
}
@ -551,4 +560,19 @@ Paragraph.
let tokens = parser.parse_str(source);
assert_ne!(tokens.last().unwrap().span.end, 0);
}
#[test]
fn hang() {
let opts = MarkdownOptions::default();
let parser = Markdown::new(opts);
let _res = parser.parse_str("[[#|]]:A]");
}
#[test]
fn hang2() {
// This seems to only be a java specific problem...
let opts = MarkdownOptions::default();
let parser = Markdown::new(opts);
let _res = parser.parse_str("//{@j");
}
}

View file

@ -6,7 +6,7 @@
# I/Ddem+ NSg/VB+ V3 D/P NSg/VB P NPl/V3+ VP/J R NPr/J/P I/J/R/Dq NPl P NPr🅪Sg/VB/J+ . NSg/C/P NSg/R/C NPr/J NPr🅪Sg/VB/J+ . I/Ddem+ VL3 VP/J P NSg/VB D+ Nᴹ/Vg/J+ NPl+ IPl+ NSg/VB C/P NSg/I+ NPl/V3+ .
>
#
> To achieve this , the filename of this file contains `.US , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect .
> To achieve this , the filename of this file contains `.US.` , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect .
# P VB I/Ddem+ . D NSg P I/Ddem NSg/VB+ V3 Unlintable . I/C+ NPr/VXB NPr/VB D NSg/VB+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/VB/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P R VP/J NSg+ .
>
#