mirror of
https://github.com/Automattic/harper.git
synced 2025-08-04 18:48:02 +00:00
Added long_sentences
and changed linter API
This commit is contained in:
parent
8f9bcbfecd
commit
c9227e2faa
17 changed files with 162 additions and 30 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -619,6 +619,7 @@ dependencies = [
|
|||
"is-macro",
|
||||
"itertools 0.11.0",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pulldown-cmark",
|
||||
"serde",
|
||||
"smallvec",
|
||||
|
@ -987,6 +988,12 @@ dependencies = [
|
|||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.3.1"
|
||||
|
|
|
@ -10727,6 +10727,7 @@ Zest/M
|
|||
Zeus/M
|
||||
Maia/M
|
||||
Semele/M
|
||||
Chiron/M
|
||||
Katniss/M
|
||||
Everdeen/M
|
||||
Leto/M
|
||||
|
|
|
@ -8,6 +8,7 @@ ahash = "0.8.7"
|
|||
is-macro = "0.3.0"
|
||||
itertools = "0.11.0"
|
||||
once_cell = "1.19.0"
|
||||
paste = "1.0.14"
|
||||
pulldown-cmark = "0.9.3"
|
||||
serde = { version = "1.0.190", features = ["derive"] }
|
||||
smallvec = "1.12.0"
|
||||
|
|
|
@ -4,10 +4,11 @@ use itertools::Itertools;
|
|||
|
||||
use crate::{
|
||||
lex_to_end,
|
||||
linting::Suggestion,
|
||||
linting::{LintSet, Suggestion},
|
||||
parsing::lex_to_end_md,
|
||||
run_lint_set,
|
||||
span::Span,
|
||||
FatToken,
|
||||
Dictionary, FatToken, Lint,
|
||||
Punctuation::{self},
|
||||
Token, TokenKind,
|
||||
};
|
||||
|
@ -48,6 +49,10 @@ impl Document {
|
|||
self.match_quotes();
|
||||
}
|
||||
|
||||
pub fn run_lint_set(&self, lint_set: &LintSet, dictionary: &Dictionary) -> Vec<Lint> {
|
||||
run_lint_set(lint_set, self, dictionary)
|
||||
}
|
||||
|
||||
pub fn iter_quote_indices(&self) -> impl Iterator<Item = usize> + '_ {
|
||||
self.tokens.iter().enumerate().filter_map(|(idx, token)| {
|
||||
if let TokenKind::Punctuation(Punctuation::Quote(_)) = &token.kind {
|
||||
|
|
|
@ -7,7 +7,8 @@ mod span;
|
|||
mod spell;
|
||||
|
||||
pub use document::Document;
|
||||
pub use linting::all_linters;
|
||||
pub use linting::run_lint_set;
|
||||
pub use linting::LintSet;
|
||||
pub use linting::{Lint, LintKind, Suggestion};
|
||||
pub use parsing::{lex_to_end, lex_to_end_str};
|
||||
pub use parsing::{FatToken, Punctuation, Token, TokenKind};
|
||||
|
|
|
@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
use crate::{document::Document, span::Span, Dictionary};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct Lint {
|
||||
pub span: Span,
|
||||
pub lint_kind: LintKind,
|
||||
|
@ -13,13 +13,16 @@ pub struct Lint {
|
|||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is, Default)]
|
||||
pub enum LintKind {
|
||||
Spelling,
|
||||
Capitalization,
|
||||
UnmatchedQuote,
|
||||
WrongQuotes,
|
||||
Repetition,
|
||||
Readability,
|
||||
#[default]
|
||||
Miscellaneous,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Is)]
|
||||
|
|
80
harper-core/src/linting/lint_set.rs
Normal file
80
harper-core/src/linting/lint_set.rs
Normal file
|
@ -0,0 +1,80 @@
|
|||
use super::{
|
||||
lint::Linter, long_sentences, repeated_words, sentence_capitalization, spell_check,
|
||||
unclosed_quotes, wrong_quotes,
|
||||
};
|
||||
use paste::paste;
|
||||
|
||||
use super::{
|
||||
long_sentences::long_sentences, repeated_words::repeated_words,
|
||||
sentence_capitalization::sentence_capitalization, spell_check::spell_check,
|
||||
unclosed_quotes::unclosed_quotes, wrong_quotes::wrong_quotes,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LintSet {
|
||||
pub(super) linters: Vec<Linter>,
|
||||
}
|
||||
|
||||
impl LintSet {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
linters: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LintSet {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
.with_spell_check()
|
||||
.with_repeated_words()
|
||||
.with_long_sentences()
|
||||
.with_unclosed_quotes()
|
||||
.with_sentence_capitalization()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! create_builder {
|
||||
($($linter:ident),*) => {
|
||||
impl LintSet {
|
||||
pub fn add_all(&mut self) -> &mut Self {
|
||||
self.linters.extend_from_slice(&[
|
||||
$(
|
||||
$linter
|
||||
),*
|
||||
]);
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
paste! {
|
||||
$(
|
||||
#[doc = "Modifies self, adding the `" $linter "` linter to the set."]
|
||||
pub fn [<add_$linter>](&mut self) -> &mut Self{
|
||||
self.linters.push($linter);
|
||||
self
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
paste! {
|
||||
$(
|
||||
#[doc = "Consumes self, adding the `" $linter "` linter to the set."]
|
||||
pub fn [<with_$linter>](mut self) -> Self{
|
||||
self.linters.push($linter);
|
||||
self
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
create_builder!(
|
||||
spell_check,
|
||||
sentence_capitalization,
|
||||
unclosed_quotes,
|
||||
wrong_quotes,
|
||||
repeated_words,
|
||||
long_sentences
|
||||
);
|
21
harper-core/src/linting/long_sentences.rs
Normal file
21
harper-core/src/linting/long_sentences.rs
Normal file
|
@ -0,0 +1,21 @@
|
|||
use crate::{parsing::TokenStringExt, Dictionary, Document, Lint, LintKind, Span};
|
||||
|
||||
/// Detect and warn that the sentence is too long.
|
||||
pub fn long_sentences(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
|
||||
let mut output = Vec::new();
|
||||
|
||||
for sentence in document.sentences() {
|
||||
let word_count = sentence.iter_words().count();
|
||||
|
||||
if word_count > 40 {
|
||||
output.push(Lint {
|
||||
span: Span::new(sentence[0].span.start, sentence.last().unwrap().span.end),
|
||||
lint_kind: LintKind::Readability,
|
||||
message: format!("This sentence is {} words long.", word_count),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
mod lint;
|
||||
mod lint_set;
|
||||
mod long_sentences;
|
||||
mod repeated_words;
|
||||
mod sentence_capitalization;
|
||||
mod spell_check;
|
||||
|
@ -6,23 +8,14 @@ mod unclosed_quotes;
|
|||
mod wrong_quotes;
|
||||
|
||||
pub use lint::{Lint, LintKind, Suggestion};
|
||||
pub use lint_set::LintSet;
|
||||
|
||||
use crate::{Dictionary, Document};
|
||||
|
||||
use self::lint::Linter;
|
||||
|
||||
pub fn all_linters(document: &Document, dictionary: &Dictionary) -> Vec<Lint> {
|
||||
pub fn run_lint_set(lint_set: &LintSet, document: &Document, dictionary: &Dictionary) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
|
||||
let linters: [Linter; 5] = [
|
||||
spell_check::spell_check,
|
||||
sentence_capitalization::sentence_capitalization_lint,
|
||||
unclosed_quotes::unclosed_quotes,
|
||||
wrong_quotes::wrong_quotes,
|
||||
repeated_words::repeated_words_lint,
|
||||
];
|
||||
|
||||
for linter in linters {
|
||||
for linter in &lint_set.linters {
|
||||
lints.append(&mut linter(document, dictionary));
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ use crate::{
|
|||
};
|
||||
|
||||
/// A linter that checks to make sure the first word of each sentence is capitalized.
|
||||
pub fn repeated_words_lint(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
|
||||
pub fn repeated_words(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
let set = create_match_set();
|
||||
|
||||
|
@ -49,6 +49,7 @@ pub fn repeated_words_lint(document: &Document, _dictionary: &Dictionary) -> Vec
|
|||
lints
|
||||
}
|
||||
|
||||
/// The set of words that can be considered for repetition checking.
|
||||
fn create_match_set() -> HashSet<Vec<char>> {
|
||||
let mut output = HashSet::default();
|
||||
|
||||
|
@ -56,20 +57,41 @@ fn create_match_set() -> HashSet<Vec<char>> {
|
|||
output.insert(vec!['T', 'h', 'e']);
|
||||
output.insert(vec!['a']);
|
||||
output.insert(vec!['A']);
|
||||
output.insert(vec!['a', 'n']);
|
||||
output.insert(vec!['A', 'n']);
|
||||
output.insert(vec!['i', 's']);
|
||||
output.insert(vec!['I', 's']);
|
||||
output.insert(vec!['w', 'i', 'l', 'l']);
|
||||
output.insert(vec!['W', 'i', 'l', 'l']);
|
||||
output.insert(vec!['l', 'i', 'k', 'e']);
|
||||
output.insert(vec!['L', 'i', 'k', 'e']);
|
||||
output.insert(vec!['t', 'h', 'a', 't']);
|
||||
output.insert(vec!['T', 'h', 'a', 't']);
|
||||
output.insert(vec!['w', 'h', 'a', 't']);
|
||||
output.insert(vec!['W', 'h', 'a', 't']);
|
||||
output.insert(vec!['w', 'h', 'i', 'c', 'h']);
|
||||
output.insert(vec!['W', 'h', 'i', 'c', 'h']);
|
||||
output.insert(vec!['b', 'e']);
|
||||
output.insert(vec!['B', 'e']);
|
||||
output.insert(vec!['a', 'n', 'd']);
|
||||
output.insert(vec!['A', 'n', 'd']);
|
||||
output.insert(vec!['I']);
|
||||
output.insert(vec!['a', 't']);
|
||||
output.insert(vec!['A', 't']);
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::repeated_words_lint;
|
||||
use super::repeated_words;
|
||||
use crate::{Dictionary, Document};
|
||||
|
||||
#[test]
|
||||
fn catches_basic() {
|
||||
let dictionary = Dictionary::new();
|
||||
let test = Document::new("I wanted the the banana.", false);
|
||||
let lints = repeated_words_lint(&test, dictionary);
|
||||
let lints = repeated_words(&test, dictionary);
|
||||
assert!(lints.len() == 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::{document::Document, parsing::TokenStringExt, Dictionary, Lint, LintK
|
|||
use super::lint::Suggestion;
|
||||
|
||||
/// A linter that checks to make sure the first word of each sentence is capitalized.
|
||||
pub fn sentence_capitalization_lint(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
|
||||
pub fn sentence_capitalization(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
|
||||
let mut lints = Vec::new();
|
||||
|
||||
for sentence in document.sentences() {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A window in a [char].
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
|
|
|
@ -2,7 +2,6 @@ use std::hash::Hasher;
|
|||
|
||||
use ahash::{AHashSet, AHasher};
|
||||
use once_cell::sync::Lazy;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use super::hunspell::{parse_default_attribute_list, parse_default_word_list};
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use cached::proc_macro::cached;
|
||||
use harper_core::{all_linters, Dictionary, Document, Lint, Span, Suggestion};
|
||||
use harper_core::{Dictionary, Document, Lint, LintSet, Span, Suggestion};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::read;
|
||||
use tower_lsp::jsonrpc::{ErrorCode, Result};
|
||||
|
@ -80,7 +80,7 @@ fn open_url(url: &Url) -> Result<String> {
|
|||
fn lint_string(text: String) -> Vec<Lint> {
|
||||
let document = Document::new(&text, true);
|
||||
let dictionary = Dictionary::new();
|
||||
all_linters(&document, dictionary)
|
||||
document.run_lint_set(&LintSet::default(), dictionary)
|
||||
}
|
||||
|
||||
fn lint_to_diagnostic(lint: Lint, source: &[char]) -> Diagnostic {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use harper_core::{all_linters, Dictionary, Document, FatToken, Lint, Span, Suggestion};
|
||||
use harper_core::{Dictionary, Document, FatToken, Lint, LintSet, Span, Suggestion};
|
||||
use std::net::SocketAddr;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{info, Level};
|
||||
|
@ -92,8 +92,7 @@ async fn lint(Json(payload): Json<LintRequest>) -> (StatusCode, Json<LintRespons
|
|||
|
||||
let dictionary = Dictionary::new();
|
||||
let document = Document::new(&text, true);
|
||||
|
||||
let lints = all_linters(&document, dictionary);
|
||||
let lints = document.run_lint_set(&LintSet::default(), dictionary);
|
||||
|
||||
(StatusCode::ACCEPTED, Json(LintResponse { lints }))
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use harper_core::{all_linters, Dictionary, Document};
|
||||
use harper_core::{Dictionary, Document, LintSet};
|
||||
use serde::Serialize;
|
||||
use wasm_bindgen::{prelude::wasm_bindgen, JsValue};
|
||||
|
||||
|
@ -22,7 +22,7 @@ pub fn lint(text: String) -> Vec<JsValue> {
|
|||
let dictionary = Dictionary::new();
|
||||
let document = Document::new(&text, true);
|
||||
|
||||
let lints = all_linters(&document, dictionary);
|
||||
let lints = document.run_lint_set(&LintSet::default(), dictionary);
|
||||
|
||||
lints
|
||||
.into_iter()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue