diff --git a/harper-comments/src/comment_parser.rs b/harper-comments/src/comment_parser.rs index a414c2f6..08493e07 100644 --- a/harper-comments/src/comment_parser.rs +++ b/harper-comments/src/comment_parser.rs @@ -119,3 +119,28 @@ impl Parser for CommentParser { self.inner.parse(source) } } + +#[cfg(test)] +mod tests { + use super::CommentParser; + use harper_core::parsers::{MarkdownOptions, StrParser}; + + #[test] + fn hang() { + use std::sync::mpsc::channel; + use std::thread; + use std::time::Duration; + + let (tx, rx) = channel::<()>(); + + let handle = thread::spawn(move || { + let opts = MarkdownOptions::default(); + let parser = CommentParser::new_from_language_id("java", opts).unwrap(); + let _res = parser.parse_str("//{@j"); + tx.send(()).expect("send failed"); + }); + + rx.recv_timeout(Duration::from_secs(10)).expect("timed out"); + handle.join().expect("failed to join"); + } +} diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 2aefb128..4dae289b 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -145,15 +145,21 @@ fn parse_inline_tag(tokens: &[Token]) -> Option { return None; } + if tokens.len() <= 3 { + return None; + } + let mut cursor = 3; - while !matches!( - tokens.get(cursor), - Some(Token { - kind: TokenKind::Punctuation(Punctuation::CloseCurly), - .. - }) - ) { + while cursor < tokens.len() + && !matches!( + tokens.get(cursor), + Some(Token { + kind: TokenKind::Punctuation(Punctuation::CloseCurly), + .. + }) + ) + { cursor += 1; } diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index fcdeb931..320828f8 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -161,35 +161,45 @@ impl Parser for Markdown { let mut tokens = Vec::new(); - let mut traversed_bytes = 0; - let mut traversed_chars = 0; + // Build a mapping from the inner parser's byte-based indexing to Harper's char-based + // indexing + let mut byte_to_char = vec![0; source_str.len() + 1]; + let mut char_index = 0; + let mut byte_idx = 0; + for ch in source_str.chars() { + let char_len = ch.len_utf8(); + for _ in 0..char_len { + byte_to_char[byte_idx] = char_index; + byte_idx += 1; + } + char_index += 1; + } + byte_to_char[source_str.len()] = char_index; let mut stack = Vec::new(); // NOTE: the range spits out __byte__ indices, not char indices. // This is why we keep track above. for (event, range) in md_parser.into_offset_iter() { - if range.start > traversed_bytes { - traversed_chars += source_str[traversed_bytes..range.start].chars().count(); - traversed_bytes = range.start; - } + let span_start = byte_to_char[range.start]; + let span_end = byte_to_char[range.end]; match event { pulldown_cmark::Event::SoftBreak => { tokens.push(Token { - span: Span::new_with_len(traversed_chars, 1), + span: Span::new_with_len(span_start, 1), kind: TokenKind::Newline(1), }); } pulldown_cmark::Event::HardBreak => { tokens.push(Token { - span: Span::new_with_len(traversed_chars, 1), + span: Span::new_with_len(span_start, 1), kind: TokenKind::Newline(2), }); } pulldown_cmark::Event::Start(pulldown_cmark::Tag::List(v)) => { tokens.push(Token { - span: Span::new_with_len(traversed_chars, 0), + span: Span::new_with_len(span_start, 0), kind: TokenKind::Newline(2), }); stack.push(pulldown_cmark::Tag::List(v)); @@ -201,7 +211,7 @@ impl Parser for Markdown { | pulldown_cmark::Event::End(pulldown_cmark::TagEnd::CodeBlock) | pulldown_cmark::Event::End(pulldown_cmark::TagEnd::TableCell) => { tokens.push(Token { - // We cannot use `traversed_chars` here, as it will still point to the + // We cannot use `span_start` here, as it will still point to the // first character of the `Event` at this point. Instead, we use the // position of the previous token's last character. This ensures the // paragraph break is placed at the end of the content, not its beginning. @@ -214,38 +224,39 @@ impl Parser for Markdown { pulldown_cmark::Event::End(_) => { stack.pop(); } - pulldown_cmark::Event::InlineMath(code) - | pulldown_cmark::Event::DisplayMath(code) - | pulldown_cmark::Event::Code(code) => { - let chunk_len = code.chars().count(); + pulldown_cmark::Event::InlineMath(_) + | pulldown_cmark::Event::DisplayMath(_) + | pulldown_cmark::Event::Code(_) => { + let chunk_len = span_end - span_start; tokens.push(Token { - span: Span::new_with_len(traversed_chars, chunk_len), + span: Span::new_with_len(span_start, chunk_len), kind: TokenKind::Unlintable, }); } - pulldown_cmark::Event::Text(text) => { - let chunk_len = text.chars().count(); + pulldown_cmark::Event::Text(_text) => { + let chunk_len = span_end - span_start; if let Some(tag) = stack.last() { use pulldown_cmark::Tag; if matches!(tag, Tag::CodeBlock(..)) { tokens.push(Token { - span: Span::new_with_len(traversed_chars, text.chars().count()), + span: Span::new_with_len(span_start, chunk_len), + kind: TokenKind::Unlintable, }); continue; } if matches!(tag, Tag::Link { .. }) && self.options.ignore_link_title { tokens.push(Token { - span: Span::new_with_len(traversed_chars, text.chars().count()), + span: Span::new_with_len(span_start, chunk_len), kind: TokenKind::Unlintable, }); continue; } if !(matches!(tag, Tag::Paragraph) - || matches!(tag, Tag::Link { .. }) && !self.options.ignore_link_title + || (matches!(tag, Tag::Link { .. }) && !self.options.ignore_link_title) || matches!(tag, Tag::Heading { .. }) || matches!(tag, Tag::Item) || matches!(tag, Tag::TableCell) @@ -257,21 +268,19 @@ impl Parser for Markdown { } } - let mut new_tokens = - english_parser.parse(&source[traversed_chars..traversed_chars + chunk_len]); + let mut new_tokens = english_parser.parse(&source[span_start..span_end]); new_tokens .iter_mut() - .for_each(|token| token.span.push_by(traversed_chars)); + .for_each(|token| token.span.push_by(span_start)); tokens.append(&mut new_tokens); } // TODO: Support via `harper-html` - pulldown_cmark::Event::Html(_content) - | pulldown_cmark::Event::InlineHtml(_content) => { - let size = _content.chars().count(); + pulldown_cmark::Event::Html(_) | pulldown_cmark::Event::InlineHtml(_) => { + let size = span_end - span_start; tokens.push(Token { - span: Span::new_with_len(traversed_chars, size), + span: Span::new_with_len(span_start, size), kind: TokenKind::Unlintable, }); } @@ -551,4 +560,19 @@ Paragraph. let tokens = parser.parse_str(source); assert_ne!(tokens.last().unwrap().span.end, 0); } + + #[test] + fn hang() { + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let _res = parser.parse_str("[[#|]]:A]"); + } + + #[test] + fn hang2() { + // This seems to only be a java specific problem... + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let _res = parser.parse_str("//{@j"); + } } diff --git a/harper-core/tests/text/tagged/Spell.US.md b/harper-core/tests/text/tagged/Spell.US.md index e0a0ef6d..f2d3354c 100644 --- a/harper-core/tests/text/tagged/Spell.US.md +++ b/harper-core/tests/text/tagged/Spell.US.md @@ -6,7 +6,7 @@ # I/Ddem+ NSg/VB+ V3 D/P NSg/VB P NPl/V3+ VP/J R NPr/J/P I/J/R/Dq NPl P NPr🅪Sg/VB/J+ . NSg/C/P NSg/R/C NPr/J NPr🅪Sg/VB/J+ . I/Ddem+ VL3 VP/J P NSg/VB D+ Nᴹ/Vg/J+ NPl+ IPl+ NSg/VB C/P NSg/I+ NPl/V3+ . > # -> To achieve this , the filename of this file contains `.US , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect . +> To achieve this , the filename of this file contains `.US.` , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect . # P VB I/Ddem+ . D NSg P I/Ddem NSg/VB+ V3 Unlintable . I/C+ NPr/VXB NPr/VB D NSg/VB+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/VB/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P R VP/J NSg+ . > #