Added long_sentences and changed linter API

This commit is contained in:
Elijah Potter 2024-01-20 19:43:59 -07:00
parent 8f9bcbfecd
commit c9227e2faa
17 changed files with 162 additions and 30 deletions

7
Cargo.lock generated
View file

@ -619,6 +619,7 @@ dependencies = [
"is-macro",
"itertools 0.11.0",
"once_cell",
"paste",
"pulldown-cmark",
"serde",
"smallvec",
@ -987,6 +988,12 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]]
name = "percent-encoding"
version = "2.3.1"

View file

@ -10727,6 +10727,7 @@ Zest/M
Zeus/M
Maia/M
Semele/M
Chiron/M
Katniss/M
Everdeen/M
Leto/M

View file

@ -8,6 +8,7 @@ ahash = "0.8.7"
is-macro = "0.3.0"
itertools = "0.11.0"
once_cell = "1.19.0"
paste = "1.0.14"
pulldown-cmark = "0.9.3"
serde = { version = "1.0.190", features = ["derive"] }
smallvec = "1.12.0"

View file

@ -4,10 +4,11 @@ use itertools::Itertools;
use crate::{
lex_to_end,
linting::Suggestion,
linting::{LintSet, Suggestion},
parsing::lex_to_end_md,
run_lint_set,
span::Span,
FatToken,
Dictionary, FatToken, Lint,
Punctuation::{self},
Token, TokenKind,
};
@ -48,6 +49,10 @@ impl Document {
self.match_quotes();
}
pub fn run_lint_set(&self, lint_set: &LintSet, dictionary: &Dictionary) -> Vec<Lint> {
run_lint_set(lint_set, self, dictionary)
}
pub fn iter_quote_indices(&self) -> impl Iterator<Item = usize> + '_ {
self.tokens.iter().enumerate().filter_map(|(idx, token)| {
if let TokenKind::Punctuation(Punctuation::Quote(_)) = &token.kind {

View file

@ -7,7 +7,8 @@ mod span;
mod spell;
pub use document::Document;
pub use linting::all_linters;
pub use linting::run_lint_set;
pub use linting::LintSet;
pub use linting::{Lint, LintKind, Suggestion};
pub use parsing::{lex_to_end, lex_to_end_str};
pub use parsing::{FatToken, Punctuation, Token, TokenKind};

View file

@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use crate::{document::Document, span::Span, Dictionary};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Lint {
pub span: Span,
pub lint_kind: LintKind,
@ -13,13 +13,16 @@ pub struct Lint {
pub message: String,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is, Default)]
pub enum LintKind {
Spelling,
Capitalization,
UnmatchedQuote,
WrongQuotes,
Repetition,
Readability,
#[default]
Miscellaneous,
}
#[derive(Debug, Clone, Serialize, Deserialize, Is)]

View file

@ -0,0 +1,80 @@
use super::{
lint::Linter, long_sentences, repeated_words, sentence_capitalization, spell_check,
unclosed_quotes, wrong_quotes,
};
use paste::paste;
use super::{
long_sentences::long_sentences, repeated_words::repeated_words,
sentence_capitalization::sentence_capitalization, spell_check::spell_check,
unclosed_quotes::unclosed_quotes, wrong_quotes::wrong_quotes,
};
#[derive(Debug, Clone)]
pub struct LintSet {
pub(super) linters: Vec<Linter>,
}
impl LintSet {
pub fn new() -> Self {
Self {
linters: Vec::new(),
}
}
}
impl Default for LintSet {
fn default() -> Self {
Self::new()
.with_spell_check()
.with_repeated_words()
.with_long_sentences()
.with_unclosed_quotes()
.with_sentence_capitalization()
}
}
macro_rules! create_builder {
($($linter:ident),*) => {
impl LintSet {
pub fn add_all(&mut self) -> &mut Self {
self.linters.extend_from_slice(&[
$(
$linter
),*
]);
self
}
paste! {
$(
#[doc = "Modifies self, adding the `" $linter "` linter to the set."]
pub fn [<add_$linter>](&mut self) -> &mut Self{
self.linters.push($linter);
self
}
)*
}
paste! {
$(
#[doc = "Consumes self, adding the `" $linter "` linter to the set."]
pub fn [<with_$linter>](mut self) -> Self{
self.linters.push($linter);
self
}
)*
}
}
};
}
create_builder!(
spell_check,
sentence_capitalization,
unclosed_quotes,
wrong_quotes,
repeated_words,
long_sentences
);

View file

@ -0,0 +1,21 @@
use crate::{parsing::TokenStringExt, Dictionary, Document, Lint, LintKind, Span};
/// Detect and warn that the sentence is too long.
pub fn long_sentences(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
let mut output = Vec::new();
for sentence in document.sentences() {
let word_count = sentence.iter_words().count();
if word_count > 40 {
output.push(Lint {
span: Span::new(sentence[0].span.start, sentence.last().unwrap().span.end),
lint_kind: LintKind::Readability,
message: format!("This sentence is {} words long.", word_count),
..Default::default()
})
}
}
output
}

View file

@ -1,4 +1,6 @@
mod lint;
mod lint_set;
mod long_sentences;
mod repeated_words;
mod sentence_capitalization;
mod spell_check;
@ -6,23 +8,14 @@ mod unclosed_quotes;
mod wrong_quotes;
pub use lint::{Lint, LintKind, Suggestion};
pub use lint_set::LintSet;
use crate::{Dictionary, Document};
use self::lint::Linter;
pub fn all_linters(document: &Document, dictionary: &Dictionary) -> Vec<Lint> {
pub fn run_lint_set(lint_set: &LintSet, document: &Document, dictionary: &Dictionary) -> Vec<Lint> {
let mut lints = Vec::new();
let linters: [Linter; 5] = [
spell_check::spell_check,
sentence_capitalization::sentence_capitalization_lint,
unclosed_quotes::unclosed_quotes,
wrong_quotes::wrong_quotes,
repeated_words::repeated_words_lint,
];
for linter in linters {
for linter in &lint_set.linters {
lints.append(&mut linter(document, dictionary));
}

View file

@ -6,7 +6,7 @@ use crate::{
};
/// A linter that checks to make sure the first word of each sentence is capitalized.
pub fn repeated_words_lint(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
pub fn repeated_words(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
let mut lints = Vec::new();
let set = create_match_set();
@ -49,6 +49,7 @@ pub fn repeated_words_lint(document: &Document, _dictionary: &Dictionary) -> Vec
lints
}
/// The set of words that can be considered for repetition checking.
fn create_match_set() -> HashSet<Vec<char>> {
let mut output = HashSet::default();
@ -56,20 +57,41 @@ fn create_match_set() -> HashSet<Vec<char>> {
output.insert(vec!['T', 'h', 'e']);
output.insert(vec!['a']);
output.insert(vec!['A']);
output.insert(vec!['a', 'n']);
output.insert(vec!['A', 'n']);
output.insert(vec!['i', 's']);
output.insert(vec!['I', 's']);
output.insert(vec!['w', 'i', 'l', 'l']);
output.insert(vec!['W', 'i', 'l', 'l']);
output.insert(vec!['l', 'i', 'k', 'e']);
output.insert(vec!['L', 'i', 'k', 'e']);
output.insert(vec!['t', 'h', 'a', 't']);
output.insert(vec!['T', 'h', 'a', 't']);
output.insert(vec!['w', 'h', 'a', 't']);
output.insert(vec!['W', 'h', 'a', 't']);
output.insert(vec!['w', 'h', 'i', 'c', 'h']);
output.insert(vec!['W', 'h', 'i', 'c', 'h']);
output.insert(vec!['b', 'e']);
output.insert(vec!['B', 'e']);
output.insert(vec!['a', 'n', 'd']);
output.insert(vec!['A', 'n', 'd']);
output.insert(vec!['I']);
output.insert(vec!['a', 't']);
output.insert(vec!['A', 't']);
output
}
#[cfg(test)]
mod tests {
use super::repeated_words_lint;
use super::repeated_words;
use crate::{Dictionary, Document};
#[test]
fn catches_basic() {
let dictionary = Dictionary::new();
let test = Document::new("I wanted the the banana.", false);
let lints = repeated_words_lint(&test, dictionary);
let lints = repeated_words(&test, dictionary);
assert!(lints.len() == 1);
}
}

View file

@ -5,7 +5,7 @@ use crate::{document::Document, parsing::TokenStringExt, Dictionary, Lint, LintK
use super::lint::Suggestion;
/// A linter that checks to make sure the first word of each sentence is capitalized.
pub fn sentence_capitalization_lint(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
pub fn sentence_capitalization(document: &Document, _dictionary: &Dictionary) -> Vec<Lint> {
let mut lints = Vec::new();
for sentence in document.sentences() {

View file

@ -1,7 +1,7 @@
use serde::{Deserialize, Serialize};
/// A window in a [char].
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
pub struct Span {
pub start: usize,
pub end: usize,

View file

@ -2,7 +2,6 @@ use std::hash::Hasher;
use ahash::{AHashSet, AHasher};
use once_cell::sync::Lazy;
use smallvec::SmallVec;
use super::hunspell::{parse_default_attribute_list, parse_default_word_list};

View file

@ -1,5 +1,5 @@
use cached::proc_macro::cached;
use harper_core::{all_linters, Dictionary, Document, Lint, Span, Suggestion};
use harper_core::{Dictionary, Document, Lint, LintSet, Span, Suggestion};
use std::collections::HashMap;
use std::fs::read;
use tower_lsp::jsonrpc::{ErrorCode, Result};
@ -80,7 +80,7 @@ fn open_url(url: &Url) -> Result<String> {
fn lint_string(text: String) -> Vec<Lint> {
let document = Document::new(&text, true);
let dictionary = Dictionary::new();
all_linters(&document, dictionary)
document.run_lint_set(&LintSet::default(), dictionary)
}
fn lint_to_diagnostic(lint: Lint, source: &[char]) -> Diagnostic {

View file

@ -1,6 +1,6 @@
#![allow(dead_code)]
use harper_core::{all_linters, Dictionary, Document, FatToken, Lint, Span, Suggestion};
use harper_core::{Dictionary, Document, FatToken, Lint, LintSet, Span, Suggestion};
use std::net::SocketAddr;
use tokio::time::Instant;
use tracing::{info, Level};
@ -92,8 +92,7 @@ async fn lint(Json(payload): Json<LintRequest>) -> (StatusCode, Json<LintRespons
let dictionary = Dictionary::new();
let document = Document::new(&text, true);
let lints = all_linters(&document, dictionary);
let lints = document.run_lint_set(&LintSet::default(), dictionary);
(StatusCode::ACCEPTED, Json(LintResponse { lints }))
}

View file

@ -1,4 +1,4 @@
use harper_core::{all_linters, Dictionary, Document};
use harper_core::{Dictionary, Document, LintSet};
use serde::Serialize;
use wasm_bindgen::{prelude::wasm_bindgen, JsValue};
@ -22,7 +22,7 @@ pub fn lint(text: String) -> Vec<JsValue> {
let dictionary = Dictionary::new();
let document = Document::new(&text, true);
let lints = all_linters(&document, dictionary);
let lints = document.run_lint_set(&LintSet::default(), dictionary);
lints
.into_iter()