diff --git a/harper-core/src/dict_word_metadata.rs b/harper-core/src/dict_word_metadata.rs index 5f68a647..ee933f0f 100644 --- a/harper-core/src/dict_word_metadata.rs +++ b/harper-core/src/dict_word_metadata.rs @@ -43,9 +43,9 @@ pub struct DictWordMetadata { pub common: bool, #[serde(default = "default_none")] pub derived_from: Option, - /// Generated by a chunker + /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. pub np_member: Option, - /// Generated by a POS tagger + /// Generated by a POS tagger. Declares what it inferred the word's part of speech to be. pub pos_tag: Option, } diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index c658a70b..7f0ebc07 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -918,6 +918,7 @@ impl TokenStringExt for Document { create_fns_on_doc!(verb); create_fns_on_doc!(word); create_fns_on_doc!(word_like); + create_fns_on_doc!(heading_start); fn first_sentence_word(&self) -> Option<&Token> { self.tokens.first_sentence_word() @@ -947,6 +948,10 @@ impl TokenStringExt for Document { self.tokens.iter_paragraphs() } + fn iter_headings(&self) -> impl Iterator + '_ { + self.tokens.iter_headings() + } + fn iter_sentences(&self) -> impl Iterator + '_ { self.tokens.iter_sentences() } diff --git a/harper-core/src/linting/currency_placement.rs b/harper-core/src/linting/currency_placement.rs index 2e1826bc..1566747d 100644 --- a/harper-core/src/linting/currency_placement.rs +++ b/harper-core/src/linting/currency_placement.rs @@ -110,7 +110,7 @@ mod tests { #[test] fn multiple_dollar() { assert_suggestion_result( - "They were either 25$ 24$ or 23$.", + "They were either 25\\$ 24\\$ or 23\\$.", CurrencyPlacement::default(), "They were either $25 $24 or $23.", ); diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 99081def..d0b54ae3 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -185,6 +185,7 @@ use super::touristic::Touristic; use super::unclosed_quotes::UnclosedQuotes; use super::update_place_names::UpdatePlaceNames; use super::use_genitive::UseGenitive; +use super::use_title_case::UseTitleCase; use super::verb_to_adjective::VerbToAdjective; use super::very_unique::VeryUnique; use super::vice_versa::ViceVersa; @@ -691,6 +692,9 @@ impl LintGroup { out.add("MassPlurals", MassPlurals::new(dictionary.clone())); out.config.set_rule_enabled("MassPlurals", true); + out.add("UseTitleCase", UseTitleCase::new(dictionary.clone())); + out.config.set_rule_enabled("UseTitleCase", true); + out.add_chunk_expr_linter( "DisjointPrefixes", DisjointPrefixes::new(dictionary.clone()), diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index a124bf93..20639a87 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -198,6 +198,7 @@ mod touristic; mod unclosed_quotes; mod update_place_names; mod use_genitive; +mod use_title_case; mod verb_to_adjective; mod very_unique; mod vice_versa; @@ -254,7 +255,8 @@ where #[cfg(test)] pub mod tests { - use crate::{Document, Span, Token, parsers::PlainEnglish}; + use crate::parsers::Markdown; + use crate::{Document, Span, Token}; use hashbrown::HashSet; /// Extension trait for converting spans of tokens back to their original text @@ -479,7 +481,7 @@ pub mod tests { loop { let test = Document::new_from_vec( text_chars.clone().into(), - &PlainEnglish, + &Markdown::default(), &FstDictionary::curated(), ); let lints = linter.lint(&test); diff --git a/harper-core/src/linting/use_title_case.rs b/harper-core/src/linting/use_title_case.rs new file mode 100644 index 00000000..064dd8cd --- /dev/null +++ b/harper-core/src/linting/use_title_case.rs @@ -0,0 +1,69 @@ +use crate::{Document, TokenStringExt, spell::Dictionary, title_case::try_make_title_case}; + +use super::{Lint, LintKind, Linter, Suggestion}; + +pub struct UseTitleCase { + dict: D, +} + +impl UseTitleCase { + pub fn new(dict: D) -> Self { + Self { dict } + } +} + +impl Linter for UseTitleCase { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + for heading in document.iter_headings() { + let Some(span) = heading.span() else { + continue; + }; + + if let Some(title_case) = + try_make_title_case(heading, document.get_source(), &self.dict) + { + lints.push(Lint { + span, + lint_kind: LintKind::Capitalization, + suggestions: vec![Suggestion::ReplaceWith(title_case)], + message: "Try to use title case in headings.".to_owned(), + priority: 127, + }); + } + } + + lints + } + + fn description(&self) -> &str { + "Prompts you to use title case in relevant headings." + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::assert_suggestion_result; + use crate::spell::FstDictionary; + + use super::UseTitleCase; + + #[test] + fn simple_correction() { + assert_suggestion_result( + "# This is a title", + UseTitleCase::new(FstDictionary::curated()), + "# This Is a Title", + ); + } + + #[test] + fn double_correction() { + assert_suggestion_result( + "# This is a title\n\n## This is a subtitle", + UseTitleCase::new(FstDictionary::curated()), + "# This Is a Title\n\n## This Is a Subtitle", + ); + } +} diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index 320828f8..0498b99d 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -204,7 +204,16 @@ impl Parser for Markdown { }); stack.push(pulldown_cmark::Tag::List(v)); } - pulldown_cmark::Event::Start(tag) => stack.push(tag), + pulldown_cmark::Event::Start(tag) => { + if matches!(tag, pulldown_cmark::Tag::Heading { .. }) { + tokens.push(Token { + span: Span::new_with_len(span_start, 0), + kind: TokenKind::HeadingStart, + }); + } + + stack.push(tag) + } pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Paragraph) | pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Item) | pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Heading(_)) @@ -575,4 +584,28 @@ Paragraph. let parser = Markdown::new(opts); let _res = parser.parse_str("//{@j"); } + + #[test] + fn simple_headings_are_marked() { + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let tokens = parser.parse_str("# This is a simple heading"); + + assert_eq!(tokens.iter_heading_starts().count(), 1); + assert_eq!(tokens.iter_headings().count(), 1); + } + + #[test] + fn multiple_headings_are_marked() { + let opts = MarkdownOptions::default(); + let parser = Markdown::new(opts); + let tokens = parser.parse_str( + r#"# This is a simple heading + +## This is a second simple heading"#, + ); + + assert_eq!(tokens.iter_heading_starts().count(), 2); + assert_eq!(tokens.iter_headings().count(), 2); + } } diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index db87dfc8..e316c09c 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -4,6 +4,7 @@ mod collapse_identifiers; mod isolate_english; mod markdown; mod mask; +mod oops_all_headings; mod org_mode; mod plain_english; @@ -12,6 +13,7 @@ pub use collapse_identifiers::CollapseIdentifiers; pub use isolate_english::IsolateEnglish; pub use markdown::{Markdown, MarkdownOptions}; pub use mask::Mask; +pub use oops_all_headings::OopsAllHeadings; pub use org_mode::OrgMode; pub use plain_english::PlainEnglish; diff --git a/harper-core/src/parsers/oops_all_headings.rs b/harper-core/src/parsers/oops_all_headings.rs new file mode 100644 index 00000000..e281531a --- /dev/null +++ b/harper-core/src/parsers/oops_all_headings.rs @@ -0,0 +1,49 @@ +use crate::{Span, Token, TokenKind}; + +use super::Parser; + +/// A parser that wraps another, forcing the entirety of the document to be composed of headings. +pub struct OopsAllHeadings { + inner: P, +} + +impl OopsAllHeadings

{ + pub fn new(inner: P) -> Self { + Self { inner } + } +} + +impl Parser for OopsAllHeadings

{ + fn parse(&self, source: &[char]) -> Vec { + let inner = self.inner.parse(source); + let mut output = Vec::with_capacity(inner.capacity()); + + output.push(Token { + span: Span::default(), + kind: TokenKind::HeadingStart, + }); + + let mut iter = inner.into_iter().peekable(); + + while let Some(tok) = iter.next() { + let heading_start = if tok.kind.is_paragraph_break() + && iter.peek().is_some_and(|t| !t.kind.is_heading_start()) + { + Some(Token { + span: Span::new_with_len(tok.span.end, 0), + kind: TokenKind::HeadingStart, + }) + } else { + None + }; + + output.push(tok); + + if let Some(extra) = heading_start { + output.push(extra); + } + } + + output + } +} diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index 264ca2ed..736f5e00 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -6,6 +6,7 @@ use crate::TokenKind; use hashbrown::HashSet; use lazy_static::lazy_static; +use crate::Punctuation; use crate::spell::Dictionary; use crate::{CharStringExt, Document, TokenStringExt, parsers::Parser}; @@ -27,17 +28,46 @@ pub fn make_title_case_chars( make_title_case(document.get_tokens(), source.as_slice(), dict) } -pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec { +pub fn try_make_title_case( + toks: &[Token], + source: &[char], + dict: &impl Dictionary, +) -> Option> { if toks.is_empty() { - return Vec::new(); + return None; } let start_index = toks.first().unwrap().span.start; + let relevant_text = toks.span().unwrap().get_content(source); - let mut word_likes = toks.iter_word_likes().enumerate().peekable(); - let mut output = toks.span().unwrap().get_content(source).to_vec(); + let mut word_likes = toks.iter_word_like_indices().enumerate().peekable(); + + let mut output = None; + let mut previous_word_index = 0; + + // Checks if the output if the provided char is different from the source. If so, it will + // set the output. The goal here is to avoid allocating if no edits must be made. + let mut set_output_char = |idx: usize, new_char: char| { + if output + .as_ref() + .is_some_and(|o: &Vec| o[idx] != new_char) + || relevant_text[idx] != new_char + { + if output.is_none() { + output = Some(relevant_text.to_vec()) + } + + let Some(mutable) = &mut output else { + panic!("We just set output to `Some`. This should be impossible."); + }; + + mutable[idx] = new_char; + } + }; + + while let Some((index, word_idx)) = word_likes.next() { + let word = &toks[word_idx]; - while let Some((index, word)) = word_likes.next() { if let Some(Some(metadata)) = word.kind.as_word() && metadata.is_proper_noun() { @@ -46,59 +76,87 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) if let Some(correct_caps) = dict.get_correct_capitalization_of(orig_text) { // It should match the dictionary verbatim - output[word.span.start - start_index..word.span.end - start_index] - .iter_mut() - .enumerate() - .for_each(|(idx, c)| *c = correct_caps[idx]); + for (i, c) in correct_caps.iter().enumerate() { + set_output_char(word.span.start - start_index + i, *c); + } } }; - let should_capitalize = should_capitalize_token(word, source, dict) + // Capitalize the first word following a colon to match Chicago style. + let is_after_colon = toks[previous_word_index..word_idx] + .iter() + .any(|tok| matches!(tok.kind, TokenKind::Punctuation(Punctuation::Colon))); + + let should_capitalize = is_after_colon + || should_capitalize_token(word, source) || index == 0 || word_likes.peek().is_none(); if should_capitalize { - output[word.span.start - start_index] = - output[word.span.start - start_index].to_ascii_uppercase(); + set_output_char( + word.span.start - start_index, + relevant_text[word.span.start - start_index].to_ascii_uppercase(), + ); } else { // The whole word should be lowercase. for i in word.span { - output[i - start_index] = output[i - start_index].to_ascii_lowercase(); + set_output_char( + i - start_index, + relevant_text[i - start_index].to_ascii_lowercase(), + ); } } + + previous_word_index = word_idx + } + + if let Some(output) = &output + && output.as_slice() == relevant_text + { + return None; } output } +pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec { + try_make_title_case(toks, source, dict) + .unwrap_or_else(|| toks.span().unwrap_or_default().get_content(source).to_vec()) +} + /// Determines whether a token should be capitalized. /// Is not responsible for capitalization requirements that are dependent on token position. -fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool { +fn should_capitalize_token(tok: &Token, source: &[char]) -> bool { match &tok.kind { TokenKind::Word(Some(metadata)) => { // Only specific conjunctions are not capitalized. lazy_static! { static ref SPECIAL_CONJUNCTIONS: HashSet> = - ["and", "but", "for", "or", "nor"] + ["and", "but", "for", "or", "nor", "as"] .iter() .map(|v| v.chars().collect()) .collect(); + static ref SPECIAL_ARTICLES: HashSet> = ["a", "an", "the"] + .iter() + .map(|v| v.chars().collect()) + .collect(); } let chars = tok.span.get_content(source); let chars_lower = chars.to_lower(); - let mut metadata = Cow::Borrowed(metadata); - - if let Some(metadata_lower) = dict.get_word_metadata(&chars_lower) { - metadata = Cow::Owned(metadata.clone().or(&metadata_lower)); - } + let metadata = Cow::Borrowed(metadata); let is_short_preposition = metadata.preposition && tok.span.len() <= 4; + if chars_lower.as_ref() == ['a', 'l', 'l'] { + return true; + } + !is_short_preposition - && !metadata.is_determiner() + && !metadata.is_non_possessive_determiner() && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_ref()) + && !SPECIAL_ARTICLES.contains(chars_lower.as_ref()) } _ => true, } @@ -267,4 +325,184 @@ mod tests { "I Spoke at WordCamp U.S. in 2025", ); } + + #[test] + fn fixes_your_correctly() { + assert_eq!( + make_title_case_str( + "it is not your friend", + &PlainEnglish, + &FstDictionary::curated() + ), + "It Is Not Your Friend", + ); + } + + #[test] + fn handles_old_man_and_the_sea() { + assert_eq!( + make_title_case_str( + "the old man and the sea", + &PlainEnglish, + &FstDictionary::curated() + ), + "The Old Man and the Sea", + ); + } + + #[test] + fn handles_great_story_with_subtitle() { + assert_eq!( + make_title_case_str( + "the great story: a tale of two cities", + &PlainEnglish, + &FstDictionary::curated() + ), + "The Great Story: A Tale of Two Cities", + ); + } + + #[test] + fn handles_lantern_and_moths() { + assert_eq!( + make_title_case_str( + "lantern flickered; moths began their worship", + &PlainEnglish, + &FstDictionary::curated() + ), + "Lantern Flickered; Moths Began Their Worship", + ); + } + + #[test] + fn handles_static_with_ghosts() { + assert_eq!( + make_title_case_str( + "static filled the room with ghosts", + &PlainEnglish, + &FstDictionary::curated() + ), + "Static Filled the Room with Ghosts", + ); + } + + #[test] + fn handles_glass_trembled_before_thunder() { + assert_eq!( + make_title_case_str( + "glass trembled before thunder arrived.", + &PlainEnglish, + &FstDictionary::curated() + ), + "Glass Trembled Before Thunder Arrived.", + ); + } + + #[test] + fn handles_hepatitis_b_shots() { + assert_eq!( + make_title_case_str( + "an end to hepatitis b shots for all newborns", + &PlainEnglish, + &FstDictionary::curated() + ), + "An End to Hepatitis B Shots for All Newborns", + ); + } + + #[test] + fn handles_trump_approval_rating() { + assert_eq!( + make_title_case_str( + "trump's approval rating dips as views of his handling of the economy sour", + &PlainEnglish, + &FstDictionary::curated() + ), + "Trump's Approval Rating Dips as Views of His Handling of the Economy Sour", + ); + } + + #[test] + fn handles_last_door() { + assert_eq!( + make_title_case_str("the last door", &PlainEnglish, &FstDictionary::curated()), + "The Last Door", + ); + } + + #[test] + fn handles_midnight_river() { + assert_eq!( + make_title_case_str("midnight river", &PlainEnglish, &FstDictionary::curated()), + "Midnight River", + ); + } + + #[test] + fn handles_a_quiet_room() { + assert_eq!( + make_title_case_str("a quiet room", &PlainEnglish, &FstDictionary::curated()), + "A Quiet Room", + ); + } + + #[test] + fn handles_broken_map() { + assert_eq!( + make_title_case_str("broken map", &PlainEnglish, &FstDictionary::curated()), + "Broken Map", + ); + } + + #[test] + fn handles_fire_in_autumn() { + assert_eq!( + make_title_case_str("fire in autumn", &PlainEnglish, &FstDictionary::curated()), + "Fire in Autumn", + ); + } + + #[test] + fn handles_hidden_path() { + assert_eq!( + make_title_case_str("the hidden path", &PlainEnglish, &FstDictionary::curated()), + "The Hidden Path", + ); + } + + #[test] + fn handles_under_blue_skies() { + assert_eq!( + make_title_case_str("under blue skies", &PlainEnglish, &FstDictionary::curated()), + "Under Blue Skies", + ); + } + + #[test] + fn handles_lost_and_found() { + assert_eq!( + make_title_case_str("lost and found", &PlainEnglish, &FstDictionary::curated()), + "Lost and Found", + ); + } + + #[test] + fn handles_silent_watcher() { + assert_eq!( + make_title_case_str( + "the silent watcher", + &PlainEnglish, + &FstDictionary::curated() + ), + "The Silent Watcher", + ); + } + + #[test] + fn handles_winter_road() { + assert_eq!( + make_title_case_str("winter road", &PlainEnglish, &FstDictionary::curated()), + "Winter Road", + ); + } } diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs index a4e4dcee..e4b2989d 100644 --- a/harper-core/src/token_kind.rs +++ b/harper-core/src/token_kind.rs @@ -50,6 +50,7 @@ pub enum TokenKind { Unlintable, ParagraphBreak, Regexish, + HeadingStart, } impl TokenKind { diff --git a/harper-core/src/token_string_ext.rs b/harper-core/src/token_string_ext.rs index 8b384819..3ff3aed0 100644 --- a/harper-core/src/token_string_ext.rs +++ b/harper-core/src/token_string_ext.rs @@ -88,6 +88,7 @@ pub trait TokenStringExt: private::Sealed { create_decl_for!(verb); create_decl_for!(word); create_decl_for!(word_like); + create_decl_for!(heading_start); fn iter_linking_verb_indices(&self) -> impl Iterator + '_; fn iter_linking_verbs(&self) -> impl Iterator + '_; @@ -106,6 +107,12 @@ pub trait TokenStringExt: private::Sealed { /// paragraphs in a document. fn iter_paragraphs(&self) -> impl Iterator + '_; + /// Get an iterator over token slices that represent headings. + /// + /// A heading begins with a [`TokenKind::HeadingStart`] token and ends with + /// the next [`TokenKind::ParagraphBreak`]. + fn iter_headings(&self) -> impl Iterator + '_; + /// Get an iterator over token slices that represent the individual /// sentences in a document. fn iter_sentences(&self) -> impl Iterator + '_; @@ -139,6 +146,7 @@ impl TokenStringExt for [Token] { create_fns_for!(verb); create_fns_for!(word_like); create_fns_for!(word); + create_fns_for!(heading_start); fn first_non_whitespace(&self) -> Option<&Token> { self.iter().find(|t| !t.kind.is_whitespace()) @@ -230,6 +238,17 @@ impl TokenStringExt for [Token] { first_pg.into_iter().chain(rest).chain(last_pg) } + fn iter_headings(&self) -> impl Iterator + '_ { + self.iter_heading_start_indices().map(|start| { + let end = self[start..] + .iter() + .position(|t| t.kind.is_paragraph_break()) + .unwrap_or(self[start..].len() - 1); + + &self[start..=start + end] + }) + } + fn iter_sentences(&self) -> impl Iterator + '_ { let first_sentence = self .iter_sentence_terminator_indices() diff --git a/harper-core/tests/pos_tags.rs b/harper-core/tests/pos_tags.rs index 05fccecc..94a61736 100644 --- a/harper-core/tests/pos_tags.rs +++ b/harper-core/tests/pos_tags.rs @@ -302,6 +302,7 @@ fn format_tag(kind: &TokenKind) -> Cow<'static, str> { TokenKind::Unlintable => Cow::Borrowed("Unlintable"), TokenKind::Regexish => Cow::Borrowed("Regexish"), TokenKind::ParagraphBreak => Cow::Borrowed("ParagraphBreak"), + TokenKind::HeadingStart => Cow::Borrowed("HeadingStart"), } } diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index a67e146a..ed661b51 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -92,6 +92,8 @@ create_test!(issue_1988.md, 0, Dialect::American); create_test!(issue_2054_clean.md, 0, Dialect::British); create_test!(issue_1873.md, 0, Dialect::British); create_test!(issue_2246.md, 0, Dialect::American); +create_test!(title_case_errors.md, 2, Dialect::American); +create_test!(title_case_clean.md, 0, Dialect::American); create_test!(issue_2233.md, 0, Dialect::American); create_test!(issue_2240.md, 0, Dialect::American); // It just matters that it is > 1 diff --git a/harper-core/tests/test_sources/title_case_clean.md b/harper-core/tests/test_sources/title_case_clean.md new file mode 100644 index 00000000..6d6dd7de --- /dev/null +++ b/harper-core/tests/test_sources/title_case_clean.md @@ -0,0 +1,7 @@ +# Here, We Try to Test Our Title-Casing Feature + +It should only pay attention to headings. + +## Maybe It Works? + +There will be a similar file with the corrected headings. diff --git a/harper-core/tests/test_sources/title_case_errors.md b/harper-core/tests/test_sources/title_case_errors.md new file mode 100644 index 00000000..3d19b09b --- /dev/null +++ b/harper-core/tests/test_sources/title_case_errors.md @@ -0,0 +1,7 @@ +# Here, we try to test our title-casing feature + +It should only pay attention to headings. + +## Maybe it works? + +There will be a similar file with the corrected headings. diff --git a/harper-core/tests/test_sources/whack_bullets.md b/harper-core/tests/test_sources/whack_bullets.md index e7f886e2..d9359066 100644 --- a/harper-core/tests/test_sources/whack_bullets.md +++ b/harper-core/tests/test_sources/whack_bullets.md @@ -1,4 +1,4 @@ -# This is a big heading, with a lot of words +# This Is a Big Heading, with a Lot of Words - New here's a list, this part doesn't have as many words - But this part does, it has so many words, more words than you could ever dream of diff --git a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml index b7c0e061..f00c7910 100644 --- a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml +++ b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml @@ -985,6 +985,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 558 | ## CHAPTER IV: The Rabbit Sends in a Little Bill + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## CHAPTER IV: The Rabbit Sends in a little Bill” + + + Lint: Readability (127 priority) Message: | 564 | wonder?” Alice guessed in a moment that it was looking for the fan and the pair @@ -1417,6 +1426,15 @@ Message: | +Lint: Capitalization (127 priority) +Message: | + 796 | ## CHAPTER V: Advice from a Caterpillar + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## CHAPTER v: Advice from a Caterpillar” + + + Lint: Readability (127 priority) Message: | 822 | “Well, perhaps you haven’t found it so yet,” said Alice; “but when you have to @@ -1639,6 +1657,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 1059 | ## CHAPTER VI: Pig and Pepper + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## CHAPTER Vi: Pig and Pepper” + + + Lint: Readability (127 priority) Message: | 1061 | For a minute or two she stood looking at the house, and wondering what to do diff --git a/harper-core/tests/text/linters/Computer science.snap.yml b/harper-core/tests/text/linters/Computer science.snap.yml index 6cf72c81..95d38e27 100644 --- a/harper-core/tests/text/linters/Computer science.snap.yml +++ b/harper-core/tests/text/linters/Computer science.snap.yml @@ -1,3 +1,12 @@ +Lint: Capitalization (127 priority) +Message: | + 6 | # Computer science + | ^~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “# Computer Science” + + + Lint: Style (31 priority) Message: | 27 | problem-solving, decision-making, environmental adaptation, planning and @@ -197,6 +206,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 102 | ## Etymology and scope + | ^~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## Etymology and Scope” + + + Lint: Readability (127 priority) Message: | 104 | Although first proposed in 1956, the term "computer science" appears in a 1959 @@ -594,6 +612,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 178 | ### Epistemology of computer science + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Epistemology of Computer Science” + + + Lint: Spelling (63 priority) Message: | 181 | computer science is a discipline of science, mathematics, or engineering. Allen @@ -682,6 +709,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 211 | ### Paradigms of computer science + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Paradigms of Computer Science” + + + Lint: Spelling (63 priority) Message: | 214 | separate paradigms in computer science. Peter Wegner argued that those paradigms @@ -824,6 +860,24 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 243 | ### Theoretical computer science + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Theoretical Computer Science” + + + +Lint: Capitalization (127 priority) +Message: | + 250 | #### Theory of computation + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Theory of Computation” + + + Lint: Spelling (63 priority) Message: | 252 | According to Peter Denning, the fundamental question underlying computer science @@ -855,6 +909,33 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 265 | #### Information and coding theory + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Information and Coding Theory” + + + +Lint: Capitalization (127 priority) +Message: | + 277 | #### Data structures and algorithms + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Data Structures and Algorithms” + + + +Lint: Capitalization (127 priority) +Message: | + 282 | #### Programming language theory and formal methods + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Programming Language Theory and Formal Methods” + + + Lint: Agreement (31 priority) Message: | 286 | programming languages and their individual features. It falls within the @@ -891,6 +972,33 @@ Message: | +Lint: Capitalization (127 priority) +Message: | + 308 | ### Applied computer science + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Applied Computer Science” + + + +Lint: Capitalization (127 priority) +Message: | + 310 | #### Computer graphics and visualization + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Computer Graphics and Visualization” + + + +Lint: Capitalization (127 priority) +Message: | + 318 | #### Image and sound processing + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Image and Sound Processing” + + + Lint: Style (31 priority) Message: | 320 | Information can take the form of images, sound, video or other multimedia. Bits @@ -920,6 +1028,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 330 | #### Computational science, finance and engineering + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Computational Science, Finance and Engineering” + + + Lint: Style (31 priority) Message: | 330 | #### Computational science, finance and engineering @@ -929,6 +1046,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 344 | #### Human–computer interaction + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Human–Computer Interaction” + + + Lint: Spelling (63 priority) Message: | 346 | Human–computer interaction (HCI) is the field of study and research concerned @@ -952,6 +1078,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 352 | #### Software engineering + | ^~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Software Engineering” + + + Lint: Punctuation (31 priority) Message: | 360 | maintenance. For example software testing, systems engineering, technical debt @@ -961,6 +1096,33 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 363 | #### Artificial intelligence + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Artificial Intelligence” + + + +Lint: Capitalization (127 priority) +Message: | + 383 | ### Computer systems + | ^~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Computer Systems” + + + +Lint: Capitalization (127 priority) +Message: | + 385 | #### Computer architecture and microarchitecture + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Computer Architecture and Microarchitecture” + + + Lint: Spelling (63 priority) Message: | 393 | term "architecture" in computer literature can be traced to the work of Lyle R. @@ -983,6 +1145,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 397 | #### Concurrent, parallel and distributed computing + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Concurrent, Parallel and Distributed Computing” + + + Lint: Spelling (63 priority) Message: | 401 | mathematical models have been developed for general concurrent computation @@ -995,6 +1166,24 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 408 | #### Computer networks + | ^~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Computer Networks” + + + +Lint: Capitalization (127 priority) +Message: | + 413 | #### Computer security and cryptography + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Computer Security and Cryptography” + + + Lint: WordChoice (127 priority) Message: | 421 | Modern cryptography is the scientific study of problems relating to distributed @@ -1005,6 +1194,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 427 | #### Databases and data mining + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Databases and Data Mining” + + + Lint: WordChoice (63 priority) Message: | 432 | languages. Data mining is a process of discovering patterns in large data sets. @@ -1149,6 +1347,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 471 | ## Programming paradigms + | ^~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## Programming Paradigms” + + + Lint: Punctuation (31 priority) Message: | 490 | the data fields of the object with which they are associated. Thus diff --git a/harper-core/tests/text/linters/Difficult sentences.snap.yml b/harper-core/tests/text/linters/Difficult sentences.snap.yml index 4536cf55..2994f33f 100644 --- a/harper-core/tests/text/linters/Difficult sentences.snap.yml +++ b/harper-core/tests/text/linters/Difficult sentences.snap.yml @@ -1,3 +1,12 @@ +Lint: Capitalization (127 priority) +Message: | + 1 | # Difficult sentences + | ^~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “# Difficult Sentences” + + + Lint: Capitalization (31 priority) Message: | 20 | at the bottom of the page; sitting at the table; at church; at sea diff --git a/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml b/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml index 056bb9a6..c98055f1 100644 --- a/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml +++ b/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml @@ -1,3 +1,12 @@ +Lint: Capitalization (127 priority) +Message: | + 6 | # Part-of-speech tagging + | ^~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “# Part-of-Speech Tagging” + + + Lint: Readability (127 priority) Message: | 8 | In corpus linguistics, part-of-speech tagging (POS tagging or PoS tagging or @@ -71,6 +80,15 @@ Message: | +Lint: Capitalization (127 priority) +Message: | + 39 | ### Tag sets + | ^~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Tag Sets” + + + Lint: Spelling (127 priority) Message: | 43 | However, there are clearly many more categories and sub-categories. For nouns, @@ -286,6 +304,15 @@ Message: | +Lint: Capitalization (127 priority) +Message: | + 117 | ### Use of hidden Markov models + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Use of Hidden Markov Models” + + + Lint: Spelling (63 priority) Message: | 119 | In the mid-1980s, researchers in Europe began to use hidden Markov models (HMMs) @@ -390,6 +417,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 157 | ### Dynamic programming methods + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “### Dynamic Programming Methods” + + + Lint: Spelling (63 priority) Message: | 159 | In 1987, Steven DeRose and Kenneth W. Church independently developed dynamic @@ -499,6 +535,15 @@ Message: | +Lint: Capitalization (127 priority) +Message: | + 182 | #### Unsupervised taggers + | ^~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Unsupervised Taggers” + + + Lint: Spelling (127 priority) Message: | 184 | The methods already discussed involve working from a pre-existing corpus to @@ -509,6 +554,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 198 | #### Other taggers and methods + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “#### Other Taggers and Methods” + + + Lint: Spelling (63 priority) Message: | 200 | Some current major algorithms for part-of-speech tagging include the Viterbi diff --git a/harper-core/tests/text/linters/The Constitution of the United States.snap.yml b/harper-core/tests/text/linters/The Constitution of the United States.snap.yml index 907c097a..7724c764 100644 --- a/harper-core/tests/text/linters/The Constitution of the United States.snap.yml +++ b/harper-core/tests/text/linters/The Constitution of the United States.snap.yml @@ -1,3 +1,12 @@ +Lint: Capitalization (127 priority) +Message: | + 3 | # The Constitution Of The United States Of America + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “# The Constitution of the United States of America” + + + Lint: Readability (127 priority) Message: | 5 | **We the People** of the United States, in Order to form a more perfect Union, @@ -1776,6 +1785,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 677 | ## Article. VI. + | ^~~~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## Article. Vi.” + + + Lint: Readability (127 priority) Message: | 683 | This Constitution, and the Laws of the United States which shall be made in diff --git a/harper-core/tests/text/linters/The Great Gatsby.snap.yml b/harper-core/tests/text/linters/The Great Gatsby.snap.yml index cc05052e..a4ae9cd1 100644 --- a/harper-core/tests/text/linters/The Great Gatsby.snap.yml +++ b/harper-core/tests/text/linters/The Great Gatsby.snap.yml @@ -4438,6 +4438,15 @@ Suggest: +Lint: Capitalization (127 priority) +Message: | + 3003 | ## CHAPTER VI + | ^~~~~~~~~~~~~ Try to use title case in headings. +Suggest: + - Replace with: “## CHAPTER Vi” + + + Lint: Readability (127 priority) Message: | 3020 | short of being news. Contemporary legends such as the “underground pipe-line to diff --git a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md index 07897b34..e401c229 100644 --- a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md +++ b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md @@ -1,5 +1,5 @@ -> Alice’s Adventures in Wonderland -# NSg$ NPl/V3 NPr/J/R/P NSg+ +> Alice’s Adventures in Wonderland +# HeadingStart NSg$ NPl/V3 NPr/J/R/P NSg+ > # > by Lewis Carroll @@ -10,8 +10,8 @@ # D NSg+ NSg NSg+ # > # -> CHAPTER I : Down the Rabbit - Hole -# NSg/VB+ ISg/#r+ . N🅪Sg/VB/J/P D NSg/VB+ . NSg/VB+ +> CHAPTER I : Down the Rabbit - Hole +# HeadingStart NSg/VB+ ISg/#r+ . N🅪Sg/VB/J/P D NSg/VB+ . NSg/VB+ > # > Alice was beginning to get very tired of sitting by her sister on the bank , and @@ -370,8 +370,8 @@ # NSg/I/J/R/C ISg+ NPr/VBP/J P N🅪Sg/VB . VB/C J/R J/R VP/J NSg/VB/J/P D+ N🅪Sg/VB+ . > # -> CHAPTER II : The Pool of Tears -# NSg/VB+ #r . D NSg/VB P NPl/V3+ +> CHAPTER II : The Pool of Tears +# HeadingStart NSg/VB+ #r . D NSg/VB P NPl/V3+ > # > “ Curiouser and curiouser ! ” cried Alice ( she was so much surprised , that for the @@ -728,8 +728,8 @@ # NSg/VB/J+ VPt P D+ NSg/VB+ . > # -> CHAPTER III : A Caucus - Race and a Long Tale -# NSg/VB+ #r . D/P NSg/VB+ . N🅪Sg/VB VB/C D/P+ NPr/VB/J+ NSg/VB+ +> CHAPTER III : A Caucus - Race and a Long Tale +# HeadingStart NSg/VB+ #r . D/P NSg/VB+ . N🅪Sg/VB VB/C D/P NPr/VB/J NSg/VB+ > # > They were indeed a queer - looking party that assembled on the bank — the birds with @@ -1114,8 +1114,8 @@ # NSg/VB+ . VB/C VPt Nᴹ/Vg/J NSg/VB/J P NSg/VB ISg/D$+ NSg/VB+ . > # -> CHAPTER IV : The Rabbit Sends in a Little Bill -# NSg/VB+ NSg/J/#r+ . D+ NSg/VB+ NPl/V3 NPr/J/R/P D/P+ NPr/I/J/Dq+ NPr/VB+ +> CHAPTER IV : The Rabbit Sends in a Little Bill +# HeadingStart NSg/VB+ NSg/J/#r+ . D NSg/VB+ NPl/V3 NPr/J/R/P D/P NPr/I/J/Dq NPr/VB+ > # > It was the White Rabbit , trotting slowly back again , and looking anxiously about @@ -1590,8 +1590,8 @@ # D JS NSg/VB P ISg/D$+ NPr/C P NSg/I/VB+ NSg/J/C . > # -> CHAPTER V : Advice from a Caterpillar -# NSg/VB+ NSg/P/#r . Nᴹ+ P D/P NSg/VB +> CHAPTER V : Advice from a Caterpillar +# HeadingStart NSg/VB+ NSg/P/#r . Nᴹ+ P D/P NSg/VB > # > The Caterpillar and Alice looked at each other for some time in silence : at last @@ -2116,8 +2116,8 @@ # VB VP ISg+ N🅪Sg/VB/J/P P NSg NPl/V3+ NSg/VB/J/R . > # -> CHAPTER VI : Pig and Pepper -# NSg/VB+ NPr/#r . NSg/VB VB/C N🅪Sg/VB+ +> CHAPTER VI : Pig and Pepper +# HeadingStart NSg/VB+ NPr/#r . NSg/VB VB/C N🅪Sg/VB+ > # > For a minute or two she stood looking at the house , and wondering what to do @@ -2706,8 +2706,8 @@ # P NSg/I/J/C/Dq . ISg/#r+ R NSg/VB K VPp/J/P P NSg/VB D NSg/VB R . . > # -> CHAPTER VII : A Mad Tea - Party -# NSg/VB+ NSg/#r . D/P NSg/VB/J N🅪Sg/VB+ . NSg/VB/J+ +> CHAPTER VII : A Mad Tea - Party +# HeadingStart NSg/VB+ NSg/#r . D/P NSg/VB/J N🅪Sg/VB+ . NSg/VB/J+ > # > There was a table set out under a tree in front of the house , and the March Hare @@ -3334,8 +3334,8 @@ # NPr/VB/J NSg/VB+ . NPl/V3+ VB/C D NSg/VB/J NPl/V3 . > # -> CHAPTER VIII : The Queen’s Croquet - Ground -# NSg/VB+ #r . D NSg$ NSg/VB . N🅪Sg/VB/J+ +> CHAPTER VIII : The Queen’s Croquet - Ground +# HeadingStart NSg/VB+ #r . D NSg$ NSg/VB . N🅪Sg/VB/J+ > # > A large rose - tree stood near the entrance of the garden : the roses growing on it @@ -3896,8 +3896,8 @@ # NSg/VPt NSg/VB/J P D NSg/VB/J+ . > # -> CHAPTER IX : The Mock Turtle’s Story -# NSg/VB+ #r . D NSg/VB/J NSg$ NSg/VB+ +> CHAPTER IX : The Mock Turtle’s Story +# HeadingStart NSg/VB+ #r . D NSg/VB/J NSg$ NSg/VB+ > # > “ You can’t think how glad I am to see you again , you dear old thing ! ” said the @@ -4486,8 +4486,8 @@ # . NPr/VB ISg/D$+ NSg/I/J+ J/P D NPl/V3+ NSg/J/R/C . . > # -> CHAPTER X : The Lobster Quadrille -# NSg/VB+ NPr/J/#r+ . D+ NSg/VB/J+ NSg/VB/J +> CHAPTER X : The Lobster Quadrille +# HeadingStart NSg/VB+ NPr/J/#r+ . D+ NSg/VB/J+ NSg/VB/J > # > The Mock Turtle sighed deeply , and drew the back of one flapper across his eyes . @@ -5008,8 +5008,8 @@ # . ? . Nᴹ P D NPr/I+ . NPr/I+ . N🅪Sg/Vg/J+ . NSg/J . NSg/J N🅪Sg/VB+ . . > # -> CHAPTER XI : Who Stole the Tarts ? -# NSg/VB+ NSg/#r . NPr/I+ NSg/VPt D NPl/V3 . +> CHAPTER XI : Who Stole the Tarts ? +# HeadingStart NSg/VB+ NSg/#r . NPr/I+ NSg/VPt D NPl/V3 . > # > The King and Queen of Hearts were seated on their throne when they arrived , with @@ -5488,8 +5488,8 @@ # NSg/P D NSg/VB/J P ISg/D$+ NSg/VB/J NPr/I/J/Dq+ NSg/VB+ . D+ NSg/VB+ . NPr+ . . > # -> CHAPTER XII : Alice’s Evidence -# NSg/VB+ #r . NSg$ Nᴹ/VB+ +> CHAPTER XII : Alice’s Evidence +# HeadingStart NSg/VB+ #r . NSg$ Nᴹ/VB+ > # > “ Here ! ” cried Alice , quite forgetting in the flurry of the moment how large she diff --git a/harper-core/tests/text/tagged/Computer science.md b/harper-core/tests/text/tagged/Computer science.md index e9722167..958b5995 100644 --- a/harper-core/tests/text/tagged/Computer science.md +++ b/harper-core/tests/text/tagged/Computer science.md @@ -6,8 +6,8 @@ # Unlintable Unlintable > --> # Unlintable Unlintable -> Computer science -# Unlintable NSg/VB+ N🅪Sg/VB+ +> Computer science +# Unlintable HeadingStart NSg/VB+ N🅪Sg/VB+ > # > Computer science is the study of computation , information , and automation . @@ -66,8 +66,8 @@ # N🅪Sg NPr/J/R/P NSg/VB+ N🅪Sg/VB+ . > # -> History -# N🅪Sg+ +> History +# HeadingStart N🅪Sg+ > # > The earliest foundations of what would become computer science predate the @@ -198,8 +198,8 @@ # VB/J NPl P NSg/VB+ NPr/J/R/P D$+ NSg/VB/J+ NPl/V3+ . > # -> Etymology and scope -# N🅪Sg VB/C NSg/VB+ +> Etymology and scope +# HeadingStart N🅪Sg VB/C NSg/VB+ > # > Although first proposed in 1956 , the term " computer science " appears in a 1959 @@ -346,12 +346,12 @@ # NSg/C NSg/R/C NSg/P NSg/I/J/C/Dq Nᴹ/VB+ . > # -> Philosophy -# N🅪Sg/VB+ +> Philosophy +# HeadingStart N🅪Sg/VB+ > # -> Epistemology of computer science -# Nᴹ P NSg/VB+ N🅪Sg/VB+ +> Epistemology of computer science +# HeadingStart Nᴹ P NSg/VB+ N🅪Sg/VB+ > # > Despite the word science in its name , there is debate over whether or not @@ -416,8 +416,8 @@ # Nᴹ/Vg/J+ NPl/V3+ NSg/R J J NPl+ . > # -> Paradigms of computer science -# NPl P NSg/VB+ N🅪Sg/VB+ +> Paradigms of computer science +# HeadingStart NPl P NSg/VB+ N🅪Sg/VB+ > # > A number of computer scientists have argued for the distinction of three @@ -448,8 +448,8 @@ # NSg/VB/J . VB Nᴹ/Vg/J+ NPl+ . > # -> Fields -# NPrPl/V3+ +> Fields +# HeadingStart NPrPl/V3+ > # > As a discipline , computer science spans a range of topics from theoretical @@ -480,8 +480,8 @@ # NSg NSg/R N🅪Sg/Vg/J/C J NPl P NSg/VB+ N🅪Sg/VB+ . > # -> Theoretical computer science -# J+ NSg/VB+ N🅪Sg/VB+ +> Theoretical computer science +# HeadingStart J+ NSg/VB+ N🅪Sg/VB+ > # > Theoretical computer science is mathematical and abstract in spirit , but it @@ -494,8 +494,8 @@ # N🅪Sg/Vg/J+ . VB NPr/I/J/R/Dq NSg/J NPl . > # -> Theory of computation -# N🅪Sg P NSg +> Theory of computation +# HeadingStart N🅪Sg P NSg > # > According to Peter Denning , the fundamental question underlying computer science @@ -524,8 +524,8 @@ # NSg/J NPr/J/R/P D N🅪Sg P NSg . > # -> Information and coding theory -# Nᴹ VB/C Nᴹ/Vg/J+ N🅪Sg+ +> Information and coding theory +# HeadingStart Nᴹ VB/C Nᴹ/Vg/J+ N🅪Sg+ > # > Information theory , closely related to probability and statistics , is related to @@ -548,8 +548,8 @@ # NSg/J VB/C NSg/J+ N🅪Pl+ N🅪Sg+ NPl/V3+ . > # -> Data structures and algorithms -# N🅪Pl+ NPl/V3 VB/C NPl+ +> Data structures and algorithms +# HeadingStart N🅪Pl+ NPl/V3 VB/C NPl+ > # > Data structures and algorithms are the studies of commonly used computational @@ -558,8 +558,8 @@ # NPl/V3 VB/C D$+ J+ N🅪Sg+ . > # -> Programming language theory and formal methods -# Nᴹ/Vg/J+ N🅪Sg/VB+ N🅪Sg VB/C NSg/J+ NPl/V3+ +> Programming language theory and formal methods +# HeadingStart Nᴹ/Vg/J+ N🅪Sg/VB+ N🅪Sg VB/C NSg/J NPl/V3+ > # > Programming language theory is a branch of computer science that deals with the @@ -610,12 +610,12 @@ # NPl NPr/J/R/P Nᴹ VB/C Nᴹ+ NSg VB/C N🅪Sg+ . > # -> Applied computer science -# VP/J NSg/VB+ N🅪Sg/VB+ +> Applied computer science +# HeadingStart VP/J NSg/VB+ N🅪Sg/VB+ > # -> Computer graphics and visualization -# NSg/VB+ NPl VB/C NSg+ +> Computer graphics and visualization +# HeadingStart NSg/VB+ NPl VB/C NSg+ > # > Computer graphics is the study of digital visual contents and involves the @@ -630,8 +630,8 @@ # VB/C N🅪Sg/VB+ NPl/V3+ . > # -> Image and sound processing -# N🅪Sg/VB VB/C N🅪Sg/VB/J+ Nᴹ/Vg/J+ +> Image and sound processing +# HeadingStart N🅪Sg/VB VB/C N🅪Sg/VB/J+ Nᴹ/Vg/J+ > # > Information can take the form of images , sound , video or other multimedia . Bits @@ -654,8 +654,8 @@ # NSg/I/J P D VP/J NPl NPr/J/R/P J+ NSg/VB+ N🅪Sg/VB+ . > # -> Computational science , finance and engineering -# J N🅪Sg/VB+ . N🅪Sg/VB VB/C Nᴹ/Vg/J+ +> Computational science , finance and engineering +# HeadingStart J N🅪Sg/VB+ . N🅪Sg/VB VB/C Nᴹ/Vg/J+ > # > Scientific computing ( or computational science ) is the field of study concerned @@ -682,8 +682,8 @@ # NPl/V3 . > # -> Human – computer interaction -# NSg/VB/J . NSg/VB+ N🅪Sg+ +> Human – computer interaction +# HeadingStart NSg/VB/J . NSg/VB+ N🅪Sg+ > # > Human – computer interaction ( HCI ) is the field of study and research concerned @@ -698,8 +698,8 @@ # NSg P NPl/V3+ . > # -> Software engineering -# Nᴹ+ Nᴹ/Vg/J+ +> Software engineering +# HeadingStart Nᴹ+ Nᴹ/Vg/J+ > # > Software engineering is the study of designing , implementing , and modifying the @@ -720,8 +720,8 @@ # VB/C Nᴹ+ N🅪Sg+ NPl/V3+ . > # -> Artificial intelligence -# J+ N🅪Sg+ +> Artificial intelligence +# HeadingStart J+ N🅪Sg+ > # > Artificial intelligence ( AI ) aims to or is required to synthesize @@ -760,12 +760,12 @@ # N🅪Pl+ . > # -> Computer systems -# NSg/VB+ NPl+ +> Computer systems +# HeadingStart NSg/VB+ NPl+ > # -> Computer architecture and microarchitecture -# NSg/VB+ N🅪Sg+ VB/C NSg +> Computer architecture and microarchitecture +# HeadingStart NSg/VB+ N🅪Sg+ VB/C NSg > # > Computer architecture , or digital computer organization , is the conceptual @@ -788,8 +788,8 @@ # NSg+ NPr/J/R/P NSg$ NSg/VB/J+ Nᴹ/VB+ NSg/VB/J+ NPr/J/R/P # . > # -> Concurrent , parallel and distributed computing -# NSg/J . NSg/VB/J VB/C VP/J Nᴹ/Vg/J+ +> Concurrent , parallel and distributed computing +# HeadingStart NSg/J . NSg/VB/J VB/C VP/J Nᴹ/Vg/J+ > # > Concurrency is a property of systems in which several computations are executing @@ -810,8 +810,8 @@ # VP/J P VB NSg/VB/J+ NPl/V3+ . > # -> Computer networks -# NSg/VB+ NPl/V3+ +> Computer networks +# HeadingStart NSg/VB+ NPl/V3+ > # > This branch of computer science aims to manage networks between computers @@ -820,8 +820,8 @@ # J . > # -> Computer security and cryptography -# NSg/VB+ Nᴹ+ VB/C Nᴹ +> Computer security and cryptography +# HeadingStart NSg/VB+ Nᴹ+ VB/C Nᴹ > # > Computer security is a branch of computer technology with the objective of @@ -848,8 +848,8 @@ # VP/J NPl/V3 . > # -> Databases and data mining -# NPl/V3 VB/C N🅪Pl+ Nᴹ/Vg/J+ +> Databases and data mining +# HeadingStart NPl/V3 VB/C N🅪Pl+ Nᴹ/Vg/J+ > # > A database is intended to organize , store , and retrieve large amounts of data @@ -862,8 +862,8 @@ # NPl/V3+ . N🅪Pl+ Nᴹ/Vg/J+ VL3 D/P NSg/VB P Nᴹ/Vg/J NPl/V3+ NPr/J/R/P NSg/J N🅪Pl+ NPl/V3 . > # -> Discoveries -# NPl+ +> Discoveries +# HeadingStart NPl+ > # > The philosopher of computing Bill Rapaport noted three Great Insights of @@ -976,8 +976,8 @@ # > # -> Programming paradigms -# Nᴹ/Vg/J+ NPl+ +> Programming paradigms +# HeadingStart Nᴹ/Vg/J+ NPl+ > # > Programming languages can be used to accomplish different tasks in different @@ -1044,8 +1044,8 @@ # D/P N🅪Sg/VB P NSg/VB+ C/P P NSg/J+ NPl+ . > # -> Research -# Nᴹ/VB+ +> Research +# HeadingStart Nᴹ/VB+ > # > Conferences are important events for computer science research . During these diff --git a/harper-core/tests/text/tagged/Difficult sentences.md b/harper-core/tests/text/tagged/Difficult sentences.md index 17ebf9e8..95736b44 100644 --- a/harper-core/tests/text/tagged/Difficult sentences.md +++ b/harper-core/tests/text/tagged/Difficult sentences.md @@ -1,5 +1,5 @@ -> Difficult sentences -# VB/J+ NPl/V3+ +> Difficult sentences +# HeadingStart VB/J+ NPl/V3+ > # > A collection of difficult sentences to test Harper's ability to correctly tag unusual / uncommon but correct sentences . @@ -14,8 +14,8 @@ # NSg/I/J/R/Dq NSg/VB+ NPl/V3+ VB VPp/J P Url NSg/VB+ . NSg/VB/#r+ NSg/J/P . NPr/VB/J+ # . > # -> A -# D/P +> A +# HeadingStart D/P > # > With one attack , he was torn a pieces . @@ -24,12 +24,12 @@ # ISg/#r+ NSg/VB D$+ NPl+ R D/P+ NPr🅪Sg+ . > # -> At -# NSg/P +> At +# HeadingStart NSg/P > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > Caesar was at Rome ; a climate treaty was signed at Kyoto in 1997 . @@ -68,28 +68,28 @@ # ISg+ VL3 NSg/P NPl VB/C NPl P ISg+ . > # -> Noun -# NSg/VB+ +> Noun +# HeadingStart NSg/VB+ > # > The at sign . # D NSg/P NSg/VB+ . > # -> Verb -# NSg/VB+ +> Verb +# HeadingStart NSg/VB+ > # > ( In online chats : ) Don't @ me ! Don't at me ! # . NPr/J/R/P VB/J+ NPl/V3+ . . VB . NPr/ISg+ . VB NSg/P NPr/ISg+ . > # -> By -# NSg/J/P +> By +# HeadingStart NSg/J/P > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > The mailbox is by the bus stop . @@ -176,8 +176,8 @@ # NSg/J/P ? . + NSg/J . . P . P . > # -> Adverb -# NSg/VB+ +> Adverb +# HeadingStart NSg/VB+ > # > I watched the parade as it passed by . @@ -192,8 +192,8 @@ # D+ NPl+ VB/J NSg/I/J/R/Dq N🅪Sg/VB/J+ P NSg/VB+ Nᴹ/Vg/J NPl/V3+ NSg/J/P R/C/P N🅪Sg/VB VB/C N🅪Sg/VB+ . > # -> Adjective -# NSg/VB/J+ +> Adjective +# HeadingStart NSg/VB/J+ > # > a by path ; a by room ( Out of the way , off to one side . ) @@ -202,20 +202,20 @@ # NSg/J/P NSg/VB . D/P NSg/J/P NSg/VB . NSg/J+ . NSg/J . . > # -> For -# R/C/P +> For +# HeadingStart R/C/P > # -> Conjunction -# NSg/VB+ +> Conjunction +# HeadingStart NSg/VB+ > # > I had to stay with my wicked stepmother , for I had nowhere else to go . # ISg/#r+ VB P NSg/VB/J P D$+ VP/J NSg . R/C/P ISg/#r+ VB NSg/J NSg/J/C P NSg/VB/J . > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > The astronauts headed for the moon . @@ -320,8 +320,8 @@ # P NSg/VB R/C/P NSg$+ NSg+ . > # -> From -# P +> From +# HeadingStart P > # > Paul is from New Zealand . @@ -364,12 +364,12 @@ # NPr/ISg+ NPl/V3 NPr/VB/J P NSg/VB/J/R . > # -> In -# NPr/J/R/P +> In +# HeadingStart NPr/J/R/P > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > Who lives in a pineapple under the sea ? @@ -450,16 +450,16 @@ # NSg/J+ NPl/V3+ VXB NSg/VXB NSg/J NPr/J/R/P N🅪Sg/I/VB+ . NSg/C/P NSg/R/C VP/J . > # -> Verb -# NSg/VB+ +> Verb +# HeadingStart NSg/VB+ > # > He that ears my land spares my team and gives me leave to in the crop . # NPr/ISg+ NSg/I/C/Ddem+ NPl/V3+ D$+ NPr🅪Sg/VB+ NPl/V3 D$+ NSg/VB+ VB/C NPl/V3 NPr/ISg+ NSg/VB P NPr/J/R/P D NSg/VB+ . > # -> Adverb -# NSg/VB+ +> Adverb +# HeadingStart NSg/VB+ > # > Suddenly a strange man walked in . @@ -478,16 +478,16 @@ # D NSg/VB NSg/VB/J VB VBPp Vg/J # NPl/V3+ NPr/J/R/P . > # -> Noun -# NSg/VB+ +> Noun +# HeadingStart NSg/VB+ > # > His parents got him an in with the company . # ISg/D$+ NPl/V3+ VP ISg+ D/P NPr/J/R/P P D+ N🅪Sg/VB+ . > # -> Adjective -# NSg/VB/J+ +> Adjective +# HeadingStart NSg/VB/J+ > # > Is Mr . Smith in ? @@ -522,8 +522,8 @@ # ISg/#r+ VPt # NPl NPr/J/R/P NSg/I/C D+ N🅪Sg/VB/J+ VP/J . > # -> Unit -# NSg+ +> Unit +# HeadingStart NSg+ > # > The glass is 8 inches . @@ -532,8 +532,8 @@ # D+ NPr🅪Sg/VB+ VL3 # NPr/J/R/P . > # -> Of -# P +> Of +# HeadingStart P > # > Take the chicken out of the freezer . @@ -582,12 +582,12 @@ # P D/P NSg/VB/J P NSg+ NPl+ . D+ NSg/VB/J+ R VPt NSg/VB/J/P . > # -> On -# J/P +> On +# HeadingStart J/P > # -> Adjective -# NSg/VB/J+ +> Adjective +# HeadingStart NSg/VB/J+ > # > All the lights are on , so they must be home . @@ -624,8 +624,8 @@ # NPr/ISg+ R V3 P NSg/VXB J/P . + NSg/I/J/R/C Nᴹ/Vg/J . > # -> Adverb -# NSg/VB+ +> Adverb +# HeadingStart NSg/VB+ > # > turn the television on @@ -648,8 +648,8 @@ # NSg+ NPl+ J/P . NSg/I/J+ VB VP/J NPr/J/R/P D+ NSg+ . > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > A vase of flowers stood on the table . @@ -768,20 +768,20 @@ # VB VB NPr/VB J/P ISg/D$+ VB/C NSg/VB ISg/D$+ NPr/J/R/P N🅪Sg/VB+ . > # -> Verb -# NSg/VB+ +> Verb +# HeadingStart NSg/VB+ > # > Can you on the light ? ( switch on ) # NPr/VXB ISgPl+ J/P D+ N🅪Sg/VB/J+ . . NSg/VB/J+ J/P . > # -> To -# P +> To +# HeadingStart P > # -> Particle -# NSg+ +> Particle +# HeadingStart NSg+ > # > I want to leave . @@ -804,8 +804,8 @@ # ISg/#r+ NSg/VPt P D+ NPl/V3+ P NSg/VB I/J/R/Dq+ N🅪Sg/VB+ . > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > She looked to the heavens . @@ -852,20 +852,20 @@ # NSg$ D+ N🅪Sg/VB/J+ . . + NSg/VB/J+ P NSg NPr/J/R/P D+ N🅪Sg+ . NPr/C # . # NSg/VB+ . . > # -> Adverb -# NSg/VB+ +> Adverb +# HeadingStart NSg/VB+ > # > Please push the door to . ( close ) # VB NSg/VB D+ NSg/VB+ P . . NSg/VB/J . > # -> With -# P +> With +# HeadingStart P > # -> Preposition -# NSg/VB +> Preposition +# HeadingStart NSg/VB > # > He picked a fight with the class bully . @@ -930,8 +930,8 @@ # NSg/I/C/Ddem+ VPt D/P NPr/VB P VB . VB ISgPl+ NSg/VB/J P NPr/ISg+ . > # -> Adverb -# NSg/VB+ +> Adverb +# HeadingStart NSg/VB+ > # > Do you want to come with ? diff --git a/harper-core/tests/text/tagged/Part-of-speech tagging.md b/harper-core/tests/text/tagged/Part-of-speech tagging.md index 10b52959..d829439c 100644 --- a/harper-core/tests/text/tagged/Part-of-speech tagging.md +++ b/harper-core/tests/text/tagged/Part-of-speech tagging.md @@ -6,8 +6,8 @@ # Unlintable Unlintable > --> # Unlintable Unlintable -> Part - of - speech tagging -# Unlintable NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg +> Part - of - speech tagging +# Unlintable HeadingStart NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg > # > In corpus linguistics , part - of - speech tagging ( POS tagging or PoS tagging or @@ -36,8 +36,8 @@ # NSg/VB/J VB/C NSg/I/J/R/Dq R VP/J NPr🅪Sg/VB/J+ NSg+ . NPl . NPl/V3 NSg/VB+ . VP/J NPl+ . > # -> Principle -# N🅪Sg/VB+ +> Principle +# HeadingStart N🅪Sg/VB+ > # > Part - of - speech tagging is harder than just having a list of words and their @@ -72,8 +72,8 @@ # J NSg/VB+ . R . . . > # -> Tag sets -# NSg/VB+ NPl/V3 +> Tag sets +# HeadingStart NSg/VB+ NPl/V3 > # > Schools commonly teach that there are 9 parts of speech in English : noun , verb , @@ -156,12 +156,12 @@ # J/P D N🅪Sg/VB+ NSg/P NSg/VB+ . NSg/J NSg/Vg VL3 NSg/JC J/P NSg/JC NSg/VB+ . NPl/V3 . > # -> History -# N🅪Sg+ +> History +# HeadingStart N🅪Sg+ > # -> The Brown Corpus -# D+ NPr🅪Sg/VB/J+ NSg+ +> The Brown Corpus +# HeadingStart D+ NPr🅪Sg/VB/J NSg+ > # > Research on part - of - speech tagging has been closely tied to corpus linguistics . @@ -228,8 +228,8 @@ # NPl+ NSg/VB NSg/VXB VP/J R/C/P Dq+ NSg/VB+ . > # -> Use of hidden Markov models -# N🅪Sg/VB P VB/J NPr NPl/V3+ +> Use of hidden Markov models +# HeadingStart N🅪Sg/VB P VB/J NPr NPl/V3+ > # > In the mid - 1980s , researchers in Europe began to use hidden Markov models ( HMMs ) @@ -308,8 +308,8 @@ # NSg . > # -> Dynamic programming methods -# NSg/J+ Nᴹ/Vg/J+ NPl/V3+ +> Dynamic programming methods +# HeadingStart NSg/J+ Nᴹ/Vg/J+ NPl/V3+ > # > In 1987 , Steven DeRose and Kenneth W. Church independently developed dynamic @@ -358,8 +358,8 @@ # NSg/VB/J+ . P . N🅪Sg/VB+ NSg+ . > # -> Unsupervised taggers -# VB/J NPl +> Unsupervised taggers +# HeadingStart VB/J NPl > # > The methods already discussed involve working from a pre - existing corpus to @@ -390,8 +390,8 @@ # J NPl/V3+ . > # -> Other taggers and methods -# NSg/VB/J NPl VB/C NPl/V3+ +> Other taggers and methods +# HeadingStart NSg/VB/J NPl VB/C NPl/V3+ > # > Some current major algorithms for part - of - speech tagging include the Viterbi diff --git a/harper-core/tests/text/tagged/Spell.US.md b/harper-core/tests/text/tagged/Spell.US.md index ca2b4610..0095f299 100644 --- a/harper-core/tests/text/tagged/Spell.US.md +++ b/harper-core/tests/text/tagged/Spell.US.md @@ -1,5 +1,5 @@ -> Spell -# NSg/VB +> Spell +# HeadingStart NSg/VB > # > This document contains a list of words spelled correctly in some dialects of English , but not American English . This is designed to test the spelling suggestions we give for such mistakes . @@ -10,8 +10,8 @@ # P VB I/Ddem+ . D NSg P I/Ddem NSg/VB+ V3 Unlintable . I/C+ NPr/VXB NPr/VB D NSg/VB+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/VB/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P R VP/J NSg+ . > # -> Words -# NPl/V3+ +> Words +# HeadingStart NPl/V3+ > # > diff --git a/harper-core/tests/text/tagged/Spell.md b/harper-core/tests/text/tagged/Spell.md index e5333a0b..d66d8dd4 100644 --- a/harper-core/tests/text/tagged/Spell.md +++ b/harper-core/tests/text/tagged/Spell.md @@ -1,13 +1,13 @@ -> Spell -# NSg/VB +> Spell +# HeadingStart NSg/VB > # > This document contains example sentences with misspelled words that we want to test the spell checker on . # I/Ddem+ NSg/VB+ V3 NSg/VB+ NPl/V3+ P VP/J NPl/V3+ NSg/I/C/Ddem+ IPl+ NSg/VB P NSg/VB D NSg/VB NSg/VB J/P . > # -> Example Sentences -# NSg/VB+ NPl/V3+ +> Example Sentences +# HeadingStart NSg/VB+ NPl/V3+ > # > My favourite color is blu . diff --git a/harper-core/tests/text/tagged/Swear.md b/harper-core/tests/text/tagged/Swear.md index 2337c20a..1ad11835 100644 --- a/harper-core/tests/text/tagged/Swear.md +++ b/harper-core/tests/text/tagged/Swear.md @@ -1,13 +1,13 @@ -> Swears -# NPl/V3 +> Swears +# HeadingStart NPl/V3 > # > This documents tests that different forms / variations of swears are tagged as such . # I/Ddem+ NPl/V3+ NPl/V3+ NSg/I/C/Ddem NSg/J+ NPl/V3+ . NPl P NPl/V3 VB VP/J NSg/R NSg/I . > # -> Examples -# NPl/V3+ +> Examples +# HeadingStart NPl/V3+ > # > One turd , two turds . diff --git a/harper-core/tests/text/tagged/The Constitution of the United States.md b/harper-core/tests/text/tagged/The Constitution of the United States.md index 6d612745..41b0af61 100644 --- a/harper-core/tests/text/tagged/The Constitution of the United States.md +++ b/harper-core/tests/text/tagged/The Constitution of the United States.md @@ -1,7 +1,7 @@ > # Unlintable -> The Constitution Of The United States Of America -# Unlintable D NPr+ P D VP/J NPrPl/V3+ P NPr+ +> The Constitution Of The United States Of America +# Unlintable HeadingStart D NPr+ P D VP/J NPrPl/V3 P NPr+ > # > We the People of the United States , in Order to form a more perfect Union , @@ -16,12 +16,12 @@ # NPrPl/V3 P NPr+ . > # -> Article . I. -# NSg/VB+ . ? +> Article . I. +# HeadingStart NSg/VB+ . ? > # -> Section . 1 . -# NSg/VB+ . # . +> Section . 1 . +# HeadingStart NSg/VB+ . # . > # > All legislative Powers herein granted shall be vested in a @@ -66,8 +66,8 @@ # NPl+ VXB NSg/J/R/C NSg/VB . > # -> Section . 2 . -# NSg/VB+ . # . +> Section . 2 . +# HeadingStart NSg/VB+ . # . > # > The House of Representatives shall be composed of Members @@ -144,8 +144,8 @@ # VXB NSg/VXB D NSg/VB/J N🅪Sg/VB/J+ P N🅪Sg . > # -> Section . 3 . -# NSg/VB+ . # . +> Section . 3 . +# HeadingStart NSg/VB+ . # . > # > The Senate of the United States shall be composed of two @@ -226,8 +226,8 @@ # N🅪Sg/VB . > # -> Section . 4 . -# NSg/VB+ . # . +> Section . 4 . +# HeadingStart NSg/VB+ . # . > # > The Times , Places and Manner of holding Elections for Senators @@ -248,8 +248,8 @@ # NSg/J NPr🅪Sg+ . > # -> Section . 5 . -# NSg/VB+ . # . +> Section . 5 . +# HeadingStart NSg/VB+ . # . > # > Each House shall be the Judge of the Elections , Returns and @@ -288,8 +288,8 @@ # NPr/J/R/P I/C+ D NSg NPl/V3+ VXB NSg/VXB NSg/Vg/J . > # -> Section . 6 . -# NSg/VB+ . # . +> Section . 6 . +# HeadingStart NSg/VB+ . # . > # > The Senators and Representatives shall receive a Compensation @@ -326,8 +326,8 @@ # NSg/VXB VP/J . > # -> Section . 7 . -# NSg/VB+ . # . +> Section . 7 . +# HeadingStart NSg/VB+ . # . > # > All Bills for raising Revenue shall originate in the House of @@ -384,8 +384,8 @@ # P D NPl/V3 VB/C NPl+ VP/J NPr/J/R/P D NPr🅪Sg/VB P D/P NPr/VB+ . > # -> Section . 8 . -# NSg/VB+ . # . +> Section . 8 . +# HeadingStart NSg/VB+ . # . > # > The Congress shall have Power To lay and collect Taxes , Duties , @@ -582,8 +582,8 @@ # > # -> Section . 9 . -# NSg/VB+ . # . +> Section . 9 . +# HeadingStart NSg/VB+ . # . > # > The Migration or Importation of such Persons as any of the @@ -662,8 +662,8 @@ # N🅪Sg/VB NPr/C NSg/VB/J N🅪Sg/VB+ . > # -> Section . 10 . -# NSg/VB+ . # . +> Section . 10 . +# HeadingStart NSg/VB+ . # . > # > No State shall enter into any Treaty , Alliance , or @@ -702,12 +702,12 @@ # VP/J . NPr/C NPr/J/R/P NSg/I J N🅪Sg/VB/JC+ NSg/R NPr/VXB NSg/R/C VB P NSg/VB/J+ . > # -> Article . II . -# NSg/VB+ . #r . +> Article . II . +# HeadingStart NSg/VB+ . #r . > # -> Section . 1 . -# NSg/VB+ . # . +> Section . 1 . +# HeadingStart NSg/VB+ . # . > # > The executive Power shall be vested in a President of the @@ -732,8 +732,8 @@ # NPrPl/V3+ . VXB NSg/VXB VP/J D/P NSg . > # -> SubSection . 1 . -# NSg/VB+ . # . +> SubSection . 1 . +# HeadingStart NSg/VB+ . # . > # > The Electors shall meet in their respective states , and vote @@ -828,8 +828,8 @@ # VP/J NPrPl/V3+ . > # -> SubSection . 2 -# NSg/VB+ . # +> SubSection . 2 +# HeadingStart NSg/VB+ . # > # > No Person except a natural born Citizen , or a Citizen of the @@ -864,8 +864,8 @@ # Nᴹ/Vg/J NSg/R NSg/VB+ VB/P D NSg/VB/J P NSg/I+ NSg/VB/J+ . > # -> SubSection 3 . -# NSg/VB+ # . +> SubSection 3 . +# HeadingStart NSg/VB+ # . > # > In case of the removal of the President from office or of his @@ -942,8 +942,8 @@ # VB/C NPl P ISg/D$+ NSg/VB+ . > # -> SubSection 4 . -# NSg/VB+ # . +> SubSection 4 . +# HeadingStart NSg/VB+ # . > # > The President shall , at stated Times , receive for his @@ -970,8 +970,8 @@ # NPrPl/V3+ . . > # -> SubSection 5 . -# NSg/VB+ # . +> SubSection 5 . +# HeadingStart NSg/VB+ # . > # > The District constituting the seat of Government of the @@ -998,8 +998,8 @@ # NPr+ . > # -> Section . 2 . -# NSg/VB+ . # . +> Section . 2 . +# HeadingStart NSg/VB+ . # . > # > The President shall be Commander in Chief of the Army and Navy @@ -1054,8 +1054,8 @@ # N🅪Sg/VB+ . > # -> Section . 3 . -# NSg/VB+ . # . +> Section . 3 . +# HeadingStart NSg/VB+ . # . > # > He shall from time to time give to the Congress Information of @@ -1076,8 +1076,8 @@ # N🅪Sg/VB NSg/I/J/C/Dq D NPl/V3 P D VP/J NPrPl/V3+ . > # -> Section . 4 . -# NSg/VB+ . # . +> Section . 4 . +# HeadingStart NSg/VB+ . # . > # > The President , Vice President and all civil Officers of the @@ -1088,12 +1088,12 @@ # P . NSg . Nᴹ . NPr/C NSg/VB/J NSg/VB/J/R NPl/V3+ VB/C NPl . > # -> Article . III . -# NSg/VB+ . #r . +> Article . III . +# HeadingStart NSg/VB+ . #r . > # -> Section . 1 . -# NSg/VB+ . # . +> Section . 1 . +# HeadingStart NSg/VB+ . # . > # > The judicial Power of the United States , shall be vested in @@ -1110,8 +1110,8 @@ # VB/P D$+ NSg NPr/J/R/P NSg/VB+ . > # -> Section . 2 . -# NSg/VB+ . # . +> Section . 2 . +# HeadingStart NSg/VB+ . # . > # > The judicial Power shall extend to all Cases , in Law and @@ -1154,8 +1154,8 @@ # N🅪Sg/VB NPr/C NPl/V3+ NSg/R D NPr/VB+ NPr/VXB NSg/J/P N🅪Sg/VB+ NSg/VXB VP/J . > # -> Section . 3 . -# NSg/VB+ . # . +> Section . 3 . +# HeadingStart NSg/VB+ . # . > # > Treason against the United States , shall consist only in @@ -1176,8 +1176,8 @@ # VB/P D N🅪Sg/VB P D NSg/VB+ VP/J . > # -> Section . 4 . -# NSg/VB+ . # . +> Section . 4 . +# HeadingStart NSg/VB+ . # . > # > The right of the people to be secure in their persons , houses , @@ -1242,12 +1242,12 @@ # VB/C NSg/J NPl+ VP/J . > # -> Article . IV . -# NSg/VB+ . NSg/J/#r+ . +> Article . IV . +# HeadingStart NSg/VB+ . NSg/J/#r+ . > # -> Section . 1 . -# NSg/VB+ . # . +> Section . 1 . +# HeadingStart NSg/VB+ . # . > # > Full Faith and Credit shall be given in each State to the @@ -1260,8 +1260,8 @@ # VB/C NPl+ VXB NSg/VXB VP/J . VB/C D+ NSg/VB+ R . > # -> Section . 2 . -# NSg/VB+ . # . +> Section . 2 . +# HeadingStart NSg/VB+ . # . > # > All persons born or naturalized in the United States , and @@ -1314,8 +1314,8 @@ # NSg/VB NPr/C NPr🅪Sg/VB/Comm+ NPr/VXB NSg/VXB NSg/J . > # -> Section . 3 . -# NSg/VB+ . # . +> Section . 3 . +# HeadingStart NSg/VB+ . # . > # > New States may be admitted by the Congress into this Union ; but @@ -1340,8 +1340,8 @@ # I/R/Dq NPl/V3 P D VP/J NPrPl/V3+ . NPr/C P I/R/Dq NSg/J N🅪Sg/VB+ . > # -> Section . 4 . -# NSg/VB+ . # . +> Section . 4 . +# HeadingStart NSg/VB+ . # . > # > The United States shall guarantee to every State in this Union @@ -1354,8 +1354,8 @@ # NSg+ NSg/VB NSg/VXB VP/J . C/P NSg/J Nᴹ/VB+ . > # -> Section . 5 . -# NSg/VB+ . # . +> Section . 5 . +# HeadingStart NSg/VB+ . # . > # > The validity of the public debt of the United States , @@ -1374,8 +1374,8 @@ # NSg/I NPl+ . NPl VB/C NPl/V3+ VXB NSg/VXB VB NSg/J VB/C NSg/VB/J+ . > # -> Article . V. -# NSg/VB+ . ? +> Article . V. +# HeadingStart NSg/VB+ . ? > # > The Congress , whenever two thirds of both Houses shall deem it necessary , shall @@ -1402,8 +1402,8 @@ # C/P ISg/D$+ N🅪Sg/VB . VXB NSg/VXB VP/J P ISg/D$+ NSg/VB/J NSg+ NPr/J/R/P D NPr+ . > # -> Article . VI . -# NSg/VB+ . NPr/#r . +> Article . VI . +# HeadingStart NSg/VB+ . NPr/#r . > # > All Debts contracted and Engagements entered into , before the Adoption of this @@ -1446,8 +1446,8 @@ # NPr/VB/J P D NPl/VB+ P NSg/VB VB/C NSg/VB/J+ NPl/V3+ . VXB NSg/R/C NSg/VXB VP/J . > # -> Section . 1 . -# NSg/VB+ . # . +> Section . 1 . +# HeadingStart NSg/VB+ . # . > # > The enumeration in the Constitution , of certain rights , shall @@ -1464,8 +1464,8 @@ # D NPl/VB+ . > # -> Article . VII . -# NSg/VB+ . NSg/#r . +> Article . VII . +# HeadingStart NSg/VB+ . NSg/#r . > # > The Ratification of the Conventions of nine States , shall be sufficient for the @@ -1498,12 +1498,12 @@ # NSg/J NPr/J/R/P NSg/VB C IPl+ NSg/VXB R VP/J D$+ NPl/V3+ . > # -> Article . VIII . -# NSg/VB+ . #r . +> Article . VIII . +# HeadingStart NSg/VB+ . #r . > # -> Section 1 . -# NSg/VB+ # . +> Section 1 . +# HeadingStart NSg/VB+ # . > # > The transportation or importation into any State , Territory , or diff --git a/harper-core/tests/text/tagged/The Great Gatsby.md b/harper-core/tests/text/tagged/The Great Gatsby.md index c9d4d1b3..e630e51e 100644 --- a/harper-core/tests/text/tagged/The Great Gatsby.md +++ b/harper-core/tests/text/tagged/The Great Gatsby.md @@ -1,13 +1,13 @@ -> The Great Gatsby -# D NSg/J NPr +> The Great Gatsby +# HeadingStart D NSg/J NPr > # > BY F. SCOTT FITZGERALD # NSg/J/P ? NPr+ NPr > # -> CHAPTER I -# NSg/VB+ ISg/#r+ +> CHAPTER I +# HeadingStart NSg/VB+ ISg/#r+ > # > In my younger and more vulnerable years my father gave me some advice that I’ve @@ -1296,8 +1296,8 @@ # NPr/J/R/P D VB/J Nᴹ+ . > # -> CHAPTER II -# NSg/VB+ #r +> CHAPTER II +# HeadingStart NSg/VB+ #r > # > About half way between West Egg and New York the motor road hastily joins the @@ -2310,8 +2310,8 @@ # R NSg/VB+ . > # -> CHAPTER III -# NSg/VB+ #r +> CHAPTER III +# HeadingStart NSg/VB+ #r > # > There was music from my neighbor’s house through the summer nights . In his blue @@ -3656,8 +3656,8 @@ # NSg/I/VB+ . ISg/#r+ NPr/VB/J NSg/I/J P D+ NSg/I/Dq+ VB/JS+ NPl/VB+ NSg/I/C/Ddem+ ISg/#r+ NSg/VXB J/R VPp/J . > # -> CHAPTER IV -# NSg/VB+ NSg/J/#r+ +> CHAPTER IV +# HeadingStart NSg/VB+ NSg/J/#r+ > # > On Sunday morning while church bells rang in the villages alongshore , the world @@ -4944,8 +4944,8 @@ # NSg/VB+ . > # -> CHAPTER V -# NSg/VB+ NSg/P/#r+ +> CHAPTER V +# HeadingStart NSg/VB+ NSg/P/#r+ > # > When I came home to West Egg that night I was afraid for a moment that my house @@ -6002,8 +6002,8 @@ # NSg/VB/J+ NPl/V3+ P D+ N🅪Sg/VB+ . Nᴹ/Vg/J NSg/IPl+ R+ J . > # -> CHAPTER VI -# NSg/VB+ NPr/#r +> CHAPTER VI +# HeadingStart NSg/VB+ NPr/#r > # > About this time an ambitious young reporter from New York arrived one morning at @@ -6982,8 +6982,8 @@ # VP/J VPt ? NSg/J . > # -> CHAPTER VII -# NSg/VB+ NSg/#r +> CHAPTER VII +# HeadingStart NSg/VB+ NSg/#r > # > It was when curiosity about Gatsby was at its highest that the lights in his @@ -9466,8 +9466,8 @@ # VB/J VB/C NPr/VB/J ISg+ Nᴹ/Vg/J R NPr/J/R/P D+ N🅪Sg/VB+ . Nᴹ/Vg/J NSg/J/P NSg/I/J+ . > # -> CHAPTER VIII -# NSg/VB+ #r +> CHAPTER VIII +# HeadingStart NSg/VB+ #r > # > I couldn’t sleep all night ; a fog - horn was groaning incessantly on the Sound , @@ -10448,8 +10448,8 @@ # NSg$ NSg/VB+ D/P NPr/I/J/Dq NSg/J+ NSg/VB/J/P NPr/J/R/P D NPr🅪Sg/VB+ . VB/C D NPr/VB+ VPt NSg/VB/J . > # -> CHAPTER IX -# NSg/VB+ #r +> CHAPTER IX +# HeadingStart NSg/VB+ #r > # > After two years I remember the rest of that day , and that night and the next diff --git a/harper-core/tests/text/tagged/this and that.md b/harper-core/tests/text/tagged/this and that.md index 4fc84069..b3aa54df 100644 --- a/harper-core/tests/text/tagged/this and that.md +++ b/harper-core/tests/text/tagged/this and that.md @@ -10,8 +10,8 @@ # . I/Ddem . NPr/J/R/P NSg/J NPl/V3 P D/P NPr/VB P NSg/VB+ NPl/V3+ . > # -> Examples -# NPl/V3+ +> Examples +# HeadingStart NPl/V3+ > # > This triangle is nice . diff --git a/harper-wasm/src/lib.rs b/harper-wasm/src/lib.rs index a630695c..8c6949e3 100644 --- a/harper-wasm/src/lib.rs +++ b/harper-wasm/src/lib.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use harper_core::DialectFlags; use harper_core::language_detection::is_doc_likely_english; use harper_core::linting::{LintGroup, Linter as _}; -use harper_core::parsers::{IsolateEnglish, Markdown, Parser, PlainEnglish}; +use harper_core::parsers::{IsolateEnglish, Markdown, OopsAllHeadings, Parser, PlainEnglish}; use harper_core::remove_overlaps_map; use harper_core::{ CharString, DictWordMetadata, Document, IgnoredLints, LintContext, Lrc, remove_overlaps, @@ -253,11 +253,20 @@ impl Linter { ctx.default_hash() } - pub fn organized_lints(&mut self, text: String, language: Language) -> Vec { + pub fn organized_lints( + &mut self, + text: String, + language: Language, + all_headings: bool, + ) -> Vec { let source: Vec<_> = text.chars().collect(); let source = Lrc::new(source); - let parser = language.create_parser(); + let mut parser = language.create_parser(); + + if all_headings { + parser = Box::new(OopsAllHeadings::new(parser)); + } let document = Document::new_from_vec(source.clone(), &parser, &self.dictionary); @@ -292,11 +301,15 @@ impl Linter { } /// Perform the configured linting on the provided text. - pub fn lint(&mut self, text: String, language: Language) -> Vec { + pub fn lint(&mut self, text: String, language: Language, all_headings: bool) -> Vec { let source: Vec<_> = text.chars().collect(); let source = Lrc::new(source); - let parser = language.create_parser(); + let mut parser = language.create_parser(); + + if all_headings { + parser = Box::new(OopsAllHeadings::new(parser)); + } let document = Document::new_from_vec(source.clone(), &parser, &self.dictionary); @@ -641,7 +654,7 @@ mod tests { linter.import_words(vec![text.clone()]); dbg!(linter.dictionary.get_word_metadata_str(&text)); - let lints = linter.lint(text, Language::Plain); + let lints = linter.lint(text, Language::Plain, false); assert!(lints.is_empty()); } } diff --git a/packages/chrome-plugin/src/ProtocolClient.ts b/packages/chrome-plugin/src/ProtocolClient.ts index ae54b464..d038d4b0 100644 --- a/packages/chrome-plugin/src/ProtocolClient.ts +++ b/packages/chrome-plugin/src/ProtocolClient.ts @@ -1,4 +1,4 @@ -import type { Dialect, LintConfig } from 'harper.js'; +import type { Dialect, LintConfig, LintOptions } from 'harper.js'; import type { UnpackedLintGroups } from 'lint-framework'; import { LRUCache } from 'lru-cache'; import type { ActivationKey } from './protocol'; @@ -9,16 +9,20 @@ export default class ProtocolClient { ttl: 5_000, }); - private static cacheKey(text: string, domain: string): string { - return `${domain}:${text}`; + private static cacheKey(text: string, domain: string, options?: LintOptions): string { + return `${domain}:${text}:${options?.forceAllHeadings ?? ''}:${options?.language ?? ''}`; } - public static async lint(text: string, domain: string): Promise { - const key = this.cacheKey(text, domain); + public static async lint( + text: string, + domain: string, + options?: LintOptions, + ): Promise { + const key = this.cacheKey(text, domain, options); let p = this.lintCache.get(key); if (!p) { p = chrome.runtime - .sendMessage({ kind: 'lint', text, domain }) + .sendMessage({ kind: 'lint', text, domain, options }) .then((r) => r.lints as UnpackedLintGroups); this.lintCache.set(key, p); } diff --git a/packages/chrome-plugin/src/background/index.ts b/packages/chrome-plugin/src/background/index.ts index bf31801a..25f58713 100644 --- a/packages/chrome-plugin/src/background/index.ts +++ b/packages/chrome-plugin/src/background/index.ts @@ -162,7 +162,7 @@ async function handleLint(req: LintRequest): Promise { return { kind: 'lints', lints: {} }; } - const grouped = await linter.organizedLints(req.text); + const grouped = await linter.organizedLints(req.text, req.options); const unpackedEntries = await Promise.all( Object.entries(grouped).map(async ([source, lints]) => { const unpacked = await Promise.all(lints.map((lint) => unpackLint(req.text, lint, linter))); diff --git a/packages/chrome-plugin/src/contentScript/index.ts b/packages/chrome-plugin/src/contentScript/index.ts index 91f7e568..41b1807b 100644 --- a/packages/chrome-plugin/src/contentScript/index.ts +++ b/packages/chrome-plugin/src/contentScript/index.ts @@ -13,22 +13,25 @@ if (isWordPress()) { ProtocolClient.setDomainEnabled(window.location.hostname, true, false); } -const fw = new LintFramework((text, domain) => ProtocolClient.lint(text, domain), { - ignoreLint: (hash) => ProtocolClient.ignoreHash(hash), - getActivationKey: () => ProtocolClient.getActivationKey(), - openOptions: () => ProtocolClient.openOptions(), - addToUserDictionary: (words) => ProtocolClient.addToUserDictionary(words), - reportError: (lint: UnpackedLint, ruleId: string) => - ProtocolClient.openReportError( - padWithContext(lint.source, lint.span.start, lint.span.end, 15), - ruleId, - '', - ), - setRuleEnabled: async (ruleId, enabled) => { - await ProtocolClient.setRuleEnabled(ruleId, enabled); - fw.update(); +const fw = new LintFramework( + (text, domain, options) => ProtocolClient.lint(text, domain, options), + { + ignoreLint: (hash) => ProtocolClient.ignoreHash(hash), + getActivationKey: () => ProtocolClient.getActivationKey(), + openOptions: () => ProtocolClient.openOptions(), + addToUserDictionary: (words) => ProtocolClient.addToUserDictionary(words), + reportError: (lint: UnpackedLint, ruleId: string) => + ProtocolClient.openReportError( + padWithContext(lint.source, lint.span.start, lint.span.end, 15), + ruleId, + '', + ), + setRuleEnabled: async (ruleId, enabled) => { + await ProtocolClient.setRuleEnabled(ruleId, enabled); + fw.update(); + }, }, -}); +); function padWithContext(source: string, start: number, end: number, contextLength: number): string { const normalizedStart = Math.max(0, Math.min(start, source.length)); @@ -40,7 +43,7 @@ function padWithContext(source: string, start: number, end: number, contextLengt } const keepAliveCallback = () => { - ProtocolClient.lint('', 'example.com'); + ProtocolClient.lint('', 'example.com', {}); setTimeout(keepAliveCallback, 400); }; diff --git a/packages/chrome-plugin/src/protocol.ts b/packages/chrome-plugin/src/protocol.ts index 741b5fb3..e0be9a77 100644 --- a/packages/chrome-plugin/src/protocol.ts +++ b/packages/chrome-plugin/src/protocol.ts @@ -1,4 +1,4 @@ -import type { Dialect, LintConfig } from 'harper.js'; +import type { Dialect, LintConfig, LintOptions } from 'harper.js'; import type { UnpackedLintGroups } from 'lint-framework'; export type Request = @@ -40,6 +40,7 @@ export type LintRequest = { kind: 'lint'; domain: string; text: string; + options: LintOptions; }; export type LintResponse = { diff --git a/packages/harper.js/src/Linter.test.ts b/packages/harper.js/src/Linter.test.ts index 977ab6ac..7835b369 100644 --- a/packages/harper.js/src/Linter.test.ts +++ b/packages/harper.js/src/Linter.test.ts @@ -347,6 +347,34 @@ for (const [linterName, Linter] of Object.entries(linters)) { expect(text.slice(span.start, span.end)).toBe('sdssda'); }); + + test(`${linterName} lints headings when forced to mark them as such`, async () => { + const text = 'This sentences should be forced to title case.'; + + const linter = new LocalLinter({ binary }); + const lints = await linter.lint(text, { forceAllHeadings: true }); + + expect(lints.length).toBe(1); + + const lint = lints[0]; + expect(lint.lint_kind()).toBe('Capitalization'); + expect(lint.get_problem_text()).toBe(text); + }); + + test(`${linterName} lints headings when forced to mark them as such with organized mode`, async () => { + const text = 'This sentences should be forced to title case.'; + + const linter = new LocalLinter({ binary }); + const lints = await linter.organizedLints(text, { forceAllHeadings: true }); + + const titleCaseLints = lints.UseTitleCase; + expect(titleCaseLints).not.toBeUndefined(); + expect(titleCaseLints.length).toBe(1); + + const lint = titleCaseLints[0]; + expect(lint.lint_kind()).toBe('Capitalization'); + expect(lint.get_problem_text()).toBe(text); + }); } test('Linters have the same config format', async () => { diff --git a/packages/harper.js/src/LocalLinter.ts b/packages/harper.js/src/LocalLinter.ts index c2356732..c2c274c8 100644 --- a/packages/harper.js/src/LocalLinter.ts +++ b/packages/harper.js/src/LocalLinter.ts @@ -35,7 +35,7 @@ export default class LocalLinter implements Linter { async lint(text: string, options?: LintOptions): Promise { const inner = await this.inner; const language = options?.language === 'plaintext' ? Language.Plain : Language.Markdown; - const lints = inner.lint(text, language); + const lints = inner.lint(text, language, options?.forceAllHeadings ?? false); return lints; } @@ -43,7 +43,7 @@ export default class LocalLinter implements Linter { async organizedLints(text: string, options?: LintOptions): Promise> { const inner = await this.inner; const language = options?.language === 'plaintext' ? Language.Plain : Language.Markdown; - const lintGroups = inner.organized_lints(text, language); + const lintGroups = inner.organized_lints(text, language, options?.forceAllHeadings ?? false); const output: Record = {}; diff --git a/packages/harper.js/src/main.ts b/packages/harper.js/src/main.ts index 276fd330..1878b36f 100644 --- a/packages/harper.js/src/main.ts +++ b/packages/harper.js/src/main.ts @@ -17,4 +17,7 @@ export type LintConfig = Record; export interface LintOptions { /** The markup language that is being passed. Defaults to `markdown`. */ language?: 'plaintext' | 'markdown'; + + /** Force the entirety of the document to be composed of headings. An undefined value is assumed to be false.*/ + forceAllHeadings?: boolean; } diff --git a/packages/lint-framework/src/lint/LintFramework.ts b/packages/lint-framework/src/lint/LintFramework.ts index 995f05a4..315a3526 100644 --- a/packages/lint-framework/src/lint/LintFramework.ts +++ b/packages/lint-framework/src/lint/LintFramework.ts @@ -1,6 +1,7 @@ +import type { LintOptions } from 'harper.js'; import type { IgnorableLintBox } from './Box'; import computeLintBoxes from './computeLintBoxes'; -import { isVisible } from './domUtils'; +import { isHeading, isVisible } from './domUtils'; import Highlights from './Highlights'; import PopupHandler from './PopupHandler'; import type { UnpackedLint, UnpackedLintGroups } from './unpackLint'; @@ -27,7 +28,11 @@ export default class LintFramework { private updateEventCallback: () => void; /** Function used to fetch lints for a given text/domain. */ - private lintProvider: (text: string, domain: string) => Promise; + private lintProvider: ( + text: string, + domain: string, + options?: LintOptions, + ) => Promise; /** Actions wired by host environment (extension/app). */ private actions: { ignoreLint?: (hash: string) => Promise; @@ -39,7 +44,11 @@ export default class LintFramework { }; constructor( - lintProvider: (text: string, domain: string) => Promise, + lintProvider: ( + text: string, + domain: string, + options?: LintOptions, + ) => Promise, actions: { ignoreLint?: (hash: string) => Promise; getActivationKey?: () => Promise; @@ -120,7 +129,9 @@ export default class LintFramework { return { target: null as HTMLElement | null, lints: {} }; } - const lintsBySource = await this.lintProvider(text, window.location.hostname); + const lintsBySource = await this.lintProvider(text, window.location.hostname, { + forceAllHeadings: isHeading(target), + }); return { target: target as HTMLElement, lints: lintsBySource }; }), ); diff --git a/packages/lint-framework/src/lint/domUtils.ts b/packages/lint-framework/src/lint/domUtils.ts index bbe61e9b..3214e639 100644 --- a/packages/lint-framework/src/lint/domUtils.ts +++ b/packages/lint-framework/src/lint/domUtils.ts @@ -112,6 +112,17 @@ export function getRangeForTextSpan(target: Element, span: Span): Range | null { const sharedRange: Range | null = typeof document !== 'undefined' ? document.createRange() : null; +/** Check if a node represents a heading (native heading tags or role="heading"). */ +export function isHeading(node: Node): boolean { + if (!(node instanceof Element)) return false; + + const tag = node.tagName.toLowerCase(); + if (/^h[1-6]$/.test(tag)) return true; + + const role = node.getAttribute('role'); + return role?.toLowerCase() === 'heading'; +} + /** Check if an element is visible to the user. * * It is coarse and meant for performance improvements, not precision.*/