mirror of
https://github.com/Automattic/harper.git
synced 2025-07-07 21:15:01 +00:00
feat(core): added config for dialect and consume it from SpellCheck
This commit is contained in:
parent
6f54df6f69
commit
e00b554077
32 changed files with 292 additions and 87 deletions
21
Cargo.lock
generated
21
Cargo.lock
generated
|
@ -754,6 +754,8 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 2.0.12",
|
||||
"unicode-blocks",
|
||||
"unicode-script",
|
||||
|
@ -1714,6 +1716,25 @@ version = "0.11.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.96",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
|
|
|
@ -11,7 +11,8 @@ use harper_comments::CommentParser;
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::parsers::{Markdown, MarkdownOptions};
|
||||
use harper_core::{
|
||||
remove_overlaps, CharStringExt, Dictionary, Document, FstDictionary, TokenKind, TokenStringExt,
|
||||
remove_overlaps, CharStringExt, Dialect, Dictionary, Document, FstDictionary, TokenKind,
|
||||
TokenStringExt,
|
||||
};
|
||||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
use hashbrown::HashMap;
|
||||
|
@ -33,6 +34,8 @@ enum Args {
|
|||
/// If omitted, `harper-cli` will run every rule.
|
||||
#[arg(short, long)]
|
||||
only_lint_with: Option<Vec<String>>,
|
||||
#[arg(short, long)]
|
||||
dialect: Dialect,
|
||||
},
|
||||
/// Parse a provided document and print the detected symbols.
|
||||
Parse {
|
||||
|
@ -70,10 +73,11 @@ fn main() -> anyhow::Result<()> {
|
|||
file,
|
||||
count,
|
||||
only_lint_with,
|
||||
dialect,
|
||||
} => {
|
||||
let (doc, source) = load_file(&file, markdown_options)?;
|
||||
|
||||
let mut linter = LintGroup::new_curated(dictionary);
|
||||
let mut linter = LintGroup::new_curated(dictionary, dialect);
|
||||
|
||||
if let Some(rules) = only_lint_with {
|
||||
linter.set_all_rules_to(Some(false));
|
||||
|
@ -201,7 +205,7 @@ fn main() -> anyhow::Result<()> {
|
|||
description: String,
|
||||
}
|
||||
|
||||
let linter = LintGroup::new_curated(dictionary);
|
||||
let linter = LintGroup::new_curated(dictionary, Dialect::American);
|
||||
|
||||
let default_config: HashMap<String, bool> =
|
||||
serde_json::from_str(&serde_json::to_string(&linter.config).unwrap()).unwrap();
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::path::Path;
|
|||
use harper_comments::CommentParser;
|
||||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::parsers::MarkdownOptions;
|
||||
use harper_core::{Document, FstDictionary};
|
||||
use harper_core::{Dialect, Document, FstDictionary};
|
||||
|
||||
/// Creates a unit test checking that the linting of a source file in
|
||||
/// `language_support_sources` produces the expected number of lints.
|
||||
|
@ -25,7 +25,7 @@ macro_rules! create_test {
|
|||
let dict = FstDictionary::curated();
|
||||
let document = Document::new(&source, &parser, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict);
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
|
|
|
@ -28,6 +28,8 @@ levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
|||
cached = "0.55.1"
|
||||
lru = "0.13.0"
|
||||
foldhash = "0.1.4"
|
||||
strum_macros = "0.27.1"
|
||||
strum = "0.27.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", default-features = false }
|
||||
|
|
|
@ -753,6 +753,26 @@
|
|||
"is_auxiliary": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"<": {
|
||||
"#": "American property",
|
||||
"suffix": true,
|
||||
"cross_product": true,
|
||||
"replacements": [],
|
||||
"adds_metadata": {},
|
||||
"gifts_metadata": {
|
||||
"dialect": "American"
|
||||
}
|
||||
},
|
||||
"!": {
|
||||
"#": "GB property",
|
||||
"suffix": true,
|
||||
"cross_product": true,
|
||||
"replacements": [],
|
||||
"adds_metadata": {},
|
||||
"gifts_metadata": {
|
||||
"dialect": "British"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17545,8 +17545,9 @@ colonnade/~1MDS
|
|||
colonoscopy/1SM
|
||||
colony/~14SM
|
||||
colophon/1SM
|
||||
color's
|
||||
color/~154AEGDS
|
||||
color's/<
|
||||
color/~154AEGDS<
|
||||
colour/~154AEGDS!
|
||||
colorant/1SM
|
||||
coloration/~1EM
|
||||
coloratura/15MS
|
||||
|
|
|
@ -66,7 +66,7 @@ mod tests {
|
|||
|
||||
use super::IgnoredLints;
|
||||
use crate::{
|
||||
Document, FstDictionary,
|
||||
Dialect, Document, FstDictionary,
|
||||
linting::{LintGroup, Linter},
|
||||
};
|
||||
|
||||
|
@ -74,7 +74,8 @@ mod tests {
|
|||
fn can_ignore_all(text: String) -> bool {
|
||||
let document = Document::new_markdown_default_curated(&text);
|
||||
|
||||
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
|
||||
let mut lints =
|
||||
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
|
||||
|
||||
let mut ignored = IgnoredLints::new();
|
||||
|
||||
|
@ -90,7 +91,8 @@ mod tests {
|
|||
fn can_ignore_first(text: String) -> TestResult {
|
||||
let document = Document::new_markdown_default_curated(&text);
|
||||
|
||||
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
|
||||
let mut lints =
|
||||
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
|
||||
|
||||
let Some(first) = lints.first().cloned() else {
|
||||
return TestResult::discard();
|
||||
|
@ -108,7 +110,8 @@ mod tests {
|
|||
fn assert_ignore_lint_reduction(source: &str, nth_lint: usize) {
|
||||
let document = Document::new_markdown_default_curated(source);
|
||||
|
||||
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
|
||||
let mut lints =
|
||||
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
|
||||
|
||||
let nth = lints.get(nth_lint).cloned().unwrap_or_else(|| {
|
||||
panic!("If ignoring the lint at {nth_lint}, make sure there are enough problems.")
|
||||
|
|
|
@ -46,7 +46,7 @@ pub use token_kind::TokenKind;
|
|||
pub use token_string_ext::TokenStringExt;
|
||||
pub use vec_ext::VecExt;
|
||||
pub use word_metadata::{
|
||||
AdverbData, ConjunctionData, NounData, PronounData, Tense, VerbData, WordMetadata,
|
||||
AdverbData, ConjunctionData, Dialect, NounData, PronounData, Tense, VerbData, WordMetadata,
|
||||
};
|
||||
|
||||
/// A utility function that removes overlapping lints in a vector,
|
||||
|
@ -77,7 +77,7 @@ pub fn remove_overlaps(lints: &mut Vec<Lint>) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
Document, FstDictionary,
|
||||
Dialect, Document, FstDictionary,
|
||||
linting::{LintGroup, Linter},
|
||||
remove_overlaps,
|
||||
};
|
||||
|
@ -86,7 +86,7 @@ mod tests {
|
|||
fn keeps_space_lint() {
|
||||
let doc = Document::new_plain_english_curated("Ths tet");
|
||||
|
||||
let mut linter = LintGroup::new_curated(FstDictionary::curated());
|
||||
let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
|
||||
|
||||
let mut lints = linter.lint(&doc);
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ use super::wrong_quotes::WrongQuotes;
|
|||
use super::{CurrencyPlacement, Linter, NoOxfordComma, OxfordComma};
|
||||
use super::{Lint, PatternLinter};
|
||||
use crate::linting::{closed_compounds, phrase_corrections};
|
||||
use crate::{CharString, Document, TokenStringExt};
|
||||
use crate::{CharString, Dialect, Document, TokenStringExt};
|
||||
use crate::{Dictionary, MutableDictionary};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
|
||||
|
@ -76,8 +76,8 @@ pub struct LintGroupConfig {
|
|||
|
||||
#[cached]
|
||||
fn curated_config() -> LintGroupConfig {
|
||||
// Dictionary does not matter, we're just after the config.
|
||||
let group = LintGroup::new_curated(MutableDictionary::new().into());
|
||||
// The Dictionary and Dialect do not matter, we're just after the config.
|
||||
let group = LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American);
|
||||
group.config
|
||||
}
|
||||
|
||||
|
@ -264,7 +264,7 @@ impl LintGroup {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn new_curated(dictionary: Arc<impl Dictionary + 'static>) -> Self {
|
||||
pub fn new_curated(dictionary: Arc<impl Dictionary + 'static>, dialect: Dialect) -> Self {
|
||||
let mut out = Self::empty();
|
||||
|
||||
macro_rules! insert_struct_rule {
|
||||
|
@ -342,15 +342,18 @@ impl LintGroup {
|
|||
insert_pattern_rule!(ExpandTimeShorthands, true);
|
||||
insert_pattern_rule!(ModalOf, true);
|
||||
|
||||
out.add("SpellCheck", Box::new(SpellCheck::new(dictionary)));
|
||||
out.add("SpellCheck", Box::new(SpellCheck::new(dictionary, dialect)));
|
||||
out.config.set_rule_enabled("SpellCheck", true);
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Create a new curated group with all config values cleared out.
|
||||
pub fn new_curated_empty_config(dictionary: Arc<impl Dictionary + 'static>) -> Self {
|
||||
let mut group = Self::new_curated(dictionary);
|
||||
pub fn new_curated_empty_config(
|
||||
dictionary: Arc<impl Dictionary + 'static>,
|
||||
dialect: Dialect,
|
||||
) -> Self {
|
||||
let mut group = Self::new_curated(dictionary, dialect);
|
||||
group.config.clear();
|
||||
group
|
||||
}
|
||||
|
@ -412,19 +415,20 @@ impl Linter for LintGroup {
|
|||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{Document, FstDictionary, MutableDictionary, linting::Linter};
|
||||
use crate::{Dialect, Document, FstDictionary, MutableDictionary, linting::Linter};
|
||||
|
||||
use super::LintGroup;
|
||||
|
||||
#[test]
|
||||
fn can_get_all_descriptions() {
|
||||
let group = LintGroup::new_curated(Arc::new(MutableDictionary::default()));
|
||||
let group =
|
||||
LintGroup::new_curated(Arc::new(MutableDictionary::default()), Dialect::American);
|
||||
group.all_descriptions();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lint_descriptions_are_clean() {
|
||||
let mut group = LintGroup::new_curated(FstDictionary::curated());
|
||||
let mut group = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
|
||||
let pairs: Vec<_> = group
|
||||
.all_descriptions()
|
||||
.into_iter()
|
||||
|
|
|
@ -151,7 +151,12 @@ mod tests {
|
|||
let test = Document::new_markdown_default_curated(text);
|
||||
let lints = linter.lint(&test);
|
||||
dbg!(&lints);
|
||||
assert_eq!(lints.len(), count);
|
||||
if lints.len() != count {
|
||||
panic!(
|
||||
"Expected \"{text}\" to create {count} lints, but it created {}.",
|
||||
lints.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
|
||||
|
@ -173,18 +178,26 @@ mod tests {
|
|||
let test = Document::new_markdown_default_curated(text);
|
||||
let lints = linter.lint(&test);
|
||||
|
||||
let mut text: Vec<char> = text.chars().collect();
|
||||
let mut text_chars: Vec<char> = text.chars().collect();
|
||||
|
||||
if lints.is_empty() && expected_result != text {
|
||||
panic!("Expected lints, but none were created.");
|
||||
}
|
||||
|
||||
for lint in lints {
|
||||
dbg!(&lint);
|
||||
if let Some(sug) = lint.suggestions.first() {
|
||||
sug.apply(lint.span, &mut text);
|
||||
sug.apply(lint.span, &mut text_chars);
|
||||
}
|
||||
}
|
||||
|
||||
let transformed_str: String = text.iter().collect();
|
||||
let transformed_str: String = text_chars.iter().collect();
|
||||
|
||||
assert_eq!(transformed_str.as_str(), expected_result);
|
||||
if transformed_str.as_str() != expected_result {
|
||||
panic!(
|
||||
"Expected \"{transformed_str}\" to be \"{expected_result}\" after applying the computed suggestions."
|
||||
);
|
||||
}
|
||||
|
||||
// Applying the suggestions should fix all the lints.
|
||||
assert_lint_count(&transformed_str, linter, 0);
|
||||
|
|
|
@ -7,7 +7,7 @@ use super::Suggestion;
|
|||
use super::{Lint, LintKind, Linter};
|
||||
use crate::document::Document;
|
||||
use crate::spell::suggest_correct_spelling;
|
||||
use crate::{CharString, CharStringExt, Dictionary, TokenStringExt};
|
||||
use crate::{CharString, CharStringExt, Dialect, Dictionary, TokenStringExt};
|
||||
|
||||
pub struct SpellCheck<T>
|
||||
where
|
||||
|
@ -15,13 +15,15 @@ where
|
|||
{
|
||||
dictionary: T,
|
||||
word_cache: LruCache<CharString, Vec<CharString>>,
|
||||
dialect: Dialect,
|
||||
}
|
||||
|
||||
impl<T: Dictionary> SpellCheck<T> {
|
||||
pub fn new(dictionary: T) -> Self {
|
||||
pub fn new(dictionary: T, dialect: Dialect) -> Self {
|
||||
Self {
|
||||
dictionary,
|
||||
word_cache: LruCache::new(NonZero::new(10000).unwrap()),
|
||||
dialect,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +47,15 @@ impl<T: Dictionary> SpellCheck<T> {
|
|||
dist += 1;
|
||||
}
|
||||
|
||||
// Remove entries outside the configured dialect
|
||||
suggestions.retain(|v| {
|
||||
self.dictionary
|
||||
.get_word_metadata(v)
|
||||
.unwrap()
|
||||
.dialect
|
||||
.is_none_or(|d| d == self.dialect)
|
||||
});
|
||||
|
||||
self.word_cache.put(word.into(), suggestions.clone());
|
||||
|
||||
suggestions
|
||||
|
@ -57,11 +68,15 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
|
|||
|
||||
for word in document.iter_words() {
|
||||
let word_chars = document.get_span_content(&word.span);
|
||||
if self.dictionary.contains_exact_word(word_chars)
|
||||
|| self.dictionary.contains_exact_word(&word_chars.to_lower())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(metadata) = word.kind.as_word().unwrap() {
|
||||
if metadata.dialect.is_none_or(|d| d == self.dialect)
|
||||
&& (self.dictionary.contains_exact_word(word_chars)
|
||||
|| self.dictionary.contains_exact_word(&word_chars.to_lower()))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut possibilities = self.cached_suggest_correct_spelling(word_chars);
|
||||
|
||||
|
@ -115,7 +130,7 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
FstDictionary,
|
||||
Dialect, FstDictionary,
|
||||
linting::tests::{assert_lint_count, assert_suggestion_result},
|
||||
};
|
||||
|
||||
|
@ -125,7 +140,7 @@ mod tests {
|
|||
fn markdown_capitalized() {
|
||||
assert_suggestion_result(
|
||||
"The word markdown should be capitalized.",
|
||||
SpellCheck::new(FstDictionary::curated()),
|
||||
SpellCheck::new(FstDictionary::curated(), Dialect::American),
|
||||
"The word Markdown should be capitalized.",
|
||||
);
|
||||
}
|
||||
|
@ -134,8 +149,17 @@ mod tests {
|
|||
fn harper_automattic_capitalized() {
|
||||
assert_lint_count(
|
||||
"So should harper and automattic.",
|
||||
SpellCheck::new(FstDictionary::curated()),
|
||||
SpellCheck::new(FstDictionary::curated(), Dialect::American),
|
||||
2,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn american_color_in_british_dialect() {
|
||||
assert_lint_count(
|
||||
"Do you like the color?",
|
||||
SpellCheck::new(FstDictionary::curated(), Dialect::British),
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use is_macro::Is;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum_macros::EnumString;
|
||||
|
||||
use crate::WordId;
|
||||
|
||||
|
@ -350,7 +351,9 @@ impl ConjunctionData {
|
|||
}
|
||||
|
||||
/// A regional dialect.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
|
||||
#[derive(
|
||||
Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, EnumString,
|
||||
)]
|
||||
pub enum Dialect {
|
||||
American,
|
||||
Canadian,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::{Document, FstDictionary};
|
||||
use harper_core::{Dialect, Document, FstDictionary};
|
||||
|
||||
/// Creates a unit test checking that the linting of a Markdown document (in
|
||||
/// `tests_sources`) produces the expected number of lints.
|
||||
|
@ -18,7 +18,7 @@ macro_rules! create_test {
|
|||
let dict = FstDictionary::curated();
|
||||
let document = Document::new_markdown_default(&source, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict);
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::{Document, FstDictionary};
|
||||
use harper_core::{Dialect, Document, FstDictionary};
|
||||
|
||||
/// Creates a unit test checking that the linting of a Markdown document (in
|
||||
/// `tests_sources`) produces the expected number of lints.
|
||||
|
@ -18,7 +18,7 @@ macro_rules! create_test {
|
|||
let dict = FstDictionary::curated();
|
||||
let document = Document::new_markdown_default(&source, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict);
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::parsers::MarkdownOptions;
|
||||
use harper_core::{Document, FstDictionary};
|
||||
use harper_core::{Dialect, Document, FstDictionary};
|
||||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
|
||||
/// Creates a unit test checking that the linting of a Markdown document (in
|
||||
|
@ -20,7 +20,7 @@ macro_rules! create_test {
|
|||
let dict = FstDictionary::curated();
|
||||
let document = Document::new_curated(&source, &LiterateHaskellParser::new_markdown(MarkdownOptions::default()));
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict);
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
|
|
|
@ -7,7 +7,7 @@ use harper_comments::CommentParser;
|
|||
use harper_core::linting::{LintGroup, LintGroupConfig};
|
||||
use harper_core::parsers::{CollapseIdentifiers, IsolateEnglish, Markdown, Parser, PlainEnglish};
|
||||
use harper_core::{
|
||||
Dictionary, Document, FstDictionary, MergedDictionary, MutableDictionary, WordMetadata,
|
||||
Dialect, Dictionary, Document, FstDictionary, MergedDictionary, MutableDictionary, WordMetadata,
|
||||
};
|
||||
use harper_html::HtmlParser;
|
||||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
|
@ -140,12 +140,13 @@ impl Backend {
|
|||
self.pull_config().await;
|
||||
|
||||
// Copy necessary configuration to avoid holding lock.
|
||||
let (lint_config, markdown_options, isolate_english) = {
|
||||
let (lint_config, markdown_options, isolate_english, dialect) = {
|
||||
let config = self.config.read().await;
|
||||
(
|
||||
config.lint_config.clone(),
|
||||
config.markdown_options,
|
||||
config.isolate_english,
|
||||
config.dialect,
|
||||
)
|
||||
};
|
||||
|
||||
|
@ -161,7 +162,8 @@ impl Backend {
|
|||
info!("Constructing new LintGroup for new document.");
|
||||
|
||||
DocumentState {
|
||||
linter: LintGroup::new_curated(dict.clone()).with_lint_config(lint_config.clone()),
|
||||
linter: LintGroup::new_curated(dict.clone(), dialect)
|
||||
.with_lint_config(lint_config.clone()),
|
||||
language_id: language_id.map(|v| v.to_string()),
|
||||
dict: dict.clone(),
|
||||
url: url.clone(),
|
||||
|
@ -173,7 +175,7 @@ impl Backend {
|
|||
doc_state.dict = dict.clone();
|
||||
info!("Constructing new linter because of modified dictionary.");
|
||||
doc_state.linter =
|
||||
LintGroup::new_curated(dict.clone()).with_lint_config(lint_config.clone());
|
||||
LintGroup::new_curated(dict.clone(), dialect).with_lint_config(lint_config.clone());
|
||||
}
|
||||
|
||||
let Some(language_id) = &doc_state.language_id else {
|
||||
|
@ -188,6 +190,7 @@ impl Backend {
|
|||
url: &'a Url,
|
||||
doc_state: &'a mut DocumentState,
|
||||
lint_config: &LintGroupConfig,
|
||||
dialect: Dialect,
|
||||
) -> Result<Box<dyn Parser>> {
|
||||
if doc_state.ident_dict != new_dict {
|
||||
info!("Constructing new linter because of modified ident dictionary.");
|
||||
|
@ -197,8 +200,8 @@ impl Backend {
|
|||
merged.add_dictionary(new_dict);
|
||||
let merged = Arc::new(merged);
|
||||
|
||||
doc_state.linter =
|
||||
LintGroup::new_curated(merged.clone()).with_lint_config(lint_config.clone());
|
||||
doc_state.linter = LintGroup::new_curated(merged.clone(), dialect)
|
||||
.with_lint_config(lint_config.clone());
|
||||
doc_state.dict = merged.clone();
|
||||
}
|
||||
|
||||
|
@ -223,6 +226,7 @@ impl Backend {
|
|||
url,
|
||||
doc_state,
|
||||
&lint_config,
|
||||
dialect,
|
||||
)
|
||||
.await?,
|
||||
)
|
||||
|
@ -244,6 +248,7 @@ impl Backend {
|
|||
url,
|
||||
doc_state,
|
||||
&lint_config,
|
||||
dialect,
|
||||
)
|
||||
.await?,
|
||||
)
|
||||
|
@ -597,7 +602,7 @@ impl LanguageServer for Backend {
|
|||
|
||||
for doc in doc_lock.values_mut() {
|
||||
info!("Constructing new LintGroup for updated configuration.");
|
||||
doc.linter = LintGroup::new_curated(doc.dict.clone())
|
||||
doc.linter = LintGroup::new_curated(doc.dict.clone(), config_lock.dialect)
|
||||
.with_lint_config(config_lock.lint_config.clone());
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::path::PathBuf;
|
|||
|
||||
use anyhow::{Result, bail};
|
||||
use dirs::{config_dir, data_local_dir};
|
||||
use harper_core::{linting::LintGroupConfig, parsers::MarkdownOptions};
|
||||
use harper_core::{Dialect, linting::LintGroupConfig, parsers::MarkdownOptions};
|
||||
use resolve_path::PathResolveExt;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
@ -70,6 +70,7 @@ pub struct Config {
|
|||
pub code_action_config: CodeActionConfig,
|
||||
pub isolate_english: bool,
|
||||
pub markdown_options: MarkdownOptions,
|
||||
pub dialect: Dialect,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
@ -114,6 +115,10 @@ impl Config {
|
|||
base.diagnostic_severity = serde_json::from_value(v.clone())?;
|
||||
}
|
||||
|
||||
if let Some(v) = value.get("dialect") {
|
||||
base.dialect = serde_json::from_value(v.clone())?;
|
||||
}
|
||||
|
||||
if let Some(v) = value.get("codeActions") {
|
||||
base.code_action_config = CodeActionConfig::from_lsp_config(v.clone())?;
|
||||
}
|
||||
|
@ -148,6 +153,7 @@ impl Default for Config {
|
|||
code_action_config: CodeActionConfig::default(),
|
||||
isolate_english: false,
|
||||
markdown_options: MarkdownOptions::default(),
|
||||
dialect: Dialect::American,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::{Document, FstDictionary};
|
||||
use harper_core::{Dialect, Document, FstDictionary};
|
||||
use harper_typst::Typst;
|
||||
|
||||
/// Creates a unit test checking that the linting of a document in
|
||||
|
@ -19,7 +19,7 @@ macro_rules! create_test {
|
|||
let dict = FstDictionary::curated();
|
||||
let document = Document::new(&source, &Typst, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict);
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
|
|
|
@ -62,6 +62,26 @@ impl Language {
|
|||
}
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
|
||||
pub enum Dialect {
|
||||
American,
|
||||
British,
|
||||
Australian,
|
||||
Canadian,
|
||||
}
|
||||
|
||||
impl Into<harper_core::Dialect> for Dialect {
|
||||
fn into(self) -> harper_core::Dialect {
|
||||
match self {
|
||||
Dialect::American => harper_core::Dialect::American,
|
||||
Dialect::Canadian => harper_core::Dialect::Canadian,
|
||||
Dialect::Australian => harper_core::Dialect::Australian,
|
||||
Dialect::British => harper_core::Dialect::British,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub struct Linter {
|
||||
lint_group: LintGroup,
|
||||
|
@ -71,6 +91,7 @@ pub struct Linter {
|
|||
user_dictionary: MutableDictionary,
|
||||
dictionary: Arc<MergedDictionary>,
|
||||
ignored_lints: IgnoredLints,
|
||||
dialect: Dialect,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
|
@ -78,15 +99,16 @@ impl Linter {
|
|||
/// Construct a new `Linter`.
|
||||
/// Note that this can mean constructing the curated dictionary, which is the most expensive operation
|
||||
/// in Harper.
|
||||
pub fn new() -> Self {
|
||||
pub fn new(dialect: Dialect) -> Self {
|
||||
let dictionary = Self::construct_merged_dict(MutableDictionary::default());
|
||||
let lint_group = LintGroup::new_curated_empty_config(dictionary.clone());
|
||||
let lint_group = LintGroup::new_curated_empty_config(dictionary.clone(), dialect.into());
|
||||
|
||||
Self {
|
||||
lint_group,
|
||||
user_dictionary: MutableDictionary::new(),
|
||||
dictionary,
|
||||
ignored_lints: IgnoredLints::default(),
|
||||
dialect,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,7 +117,8 @@ impl Linter {
|
|||
fn synchronize_lint_dict(&mut self) {
|
||||
let mut lint_config = self.lint_group.config.clone();
|
||||
self.dictionary = Self::construct_merged_dict(self.user_dictionary.clone());
|
||||
self.lint_group = LintGroup::new_curated_empty_config(self.dictionary.clone());
|
||||
self.lint_group =
|
||||
LintGroup::new_curated_empty_config(self.dictionary.clone(), self.dialect.into());
|
||||
self.lint_group.config.merge_from(&mut lint_config);
|
||||
}
|
||||
|
||||
|
@ -248,12 +271,6 @@ impl Linter {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for Linter {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn to_title_case(text: String) -> String {
|
||||
harper_core::make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated())
|
||||
|
@ -385,14 +402,16 @@ impl Lint {
|
|||
|
||||
#[wasm_bindgen]
|
||||
pub fn get_default_lint_config_as_json() -> String {
|
||||
let config = LintGroup::new_curated(MutableDictionary::new().into()).config;
|
||||
let config =
|
||||
LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American.into()).config;
|
||||
|
||||
serde_json::to_string(&config).unwrap()
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn get_default_lint_config() -> JsValue {
|
||||
let config = LintGroup::new_curated(MutableDictionary::new().into()).config;
|
||||
let config =
|
||||
LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American.into()).config;
|
||||
|
||||
// Important for downstream JSON serialization
|
||||
let serializer = serde_wasm_bindgen::Serializer::json_compatible();
|
||||
|
|
2
justfile
2
justfile
|
@ -99,6 +99,8 @@ test-vscode:
|
|||
mkdir "$bin_dir"
|
||||
fi
|
||||
|
||||
cargo build --release
|
||||
|
||||
cp "{{justfile_directory()}}/target/release/harper-ls"* "$bin_dir"
|
||||
|
||||
cd "$ext_dir"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import type { Lint, Span, Suggestion } from 'harper-wasm';
|
||||
import type { Dialect, Lint, Span, Suggestion } from 'harper-wasm';
|
||||
import { LintConfig, LintOptions } from './main';
|
||||
import { BinaryModule } from './binary';
|
||||
|
||||
|
@ -76,4 +76,6 @@ export default interface Linter {
|
|||
export interface LinterInit {
|
||||
/** The module or path to the WebAssembly binary. */
|
||||
binary: BinaryModule;
|
||||
/** The dialect of English Harper should use. If omitted, Harper will default to American English. */
|
||||
dialect?: Dialect;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ export default class LocalLinter implements Linter {
|
|||
this.binary = init.binary;
|
||||
this.inner = LazyPromise.from(async () => {
|
||||
await this.binary.setup();
|
||||
return this.binary.createLinter();
|
||||
return this.binary.createLinter(init.dialect);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import type { Lint, Suggestion, Span } from 'harper-wasm';
|
||||
import type { Lint, Suggestion, Span, Dialect } from 'harper-wasm';
|
||||
import Linter, { LinterInit } from '../Linter';
|
||||
import Worker from './worker.ts?worker&inline';
|
||||
import { LintConfig, LintOptions } from '../main';
|
||||
|
@ -17,12 +17,14 @@ export interface RequestItem {
|
|||
* NOTE: This class will not work properly in Node. In that case, just use `LocalLinter`. */
|
||||
export default class WorkerLinter implements Linter {
|
||||
private binary: BinaryModule;
|
||||
private dialect?: Dialect;
|
||||
private worker: Worker;
|
||||
private requestQueue: RequestItem[];
|
||||
private working = true;
|
||||
|
||||
constructor(init: LinterInit) {
|
||||
this.binary = init.binary;
|
||||
this.dialect = init.dialect;
|
||||
this.worker = new Worker();
|
||||
this.requestQueue = [];
|
||||
|
||||
|
@ -30,7 +32,7 @@ export default class WorkerLinter implements Linter {
|
|||
this.worker.onmessage = () => {
|
||||
this.setupMainEventListeners();
|
||||
|
||||
this.worker.postMessage(this.binary.url);
|
||||
this.worker.postMessage([this.binary.url, this.dialect]);
|
||||
|
||||
this.working = false;
|
||||
this.submitRemainingRequests();
|
||||
|
|
|
@ -7,12 +7,12 @@ import LocalLinter from '../LocalLinter';
|
|||
self.postMessage('ready');
|
||||
|
||||
self.onmessage = (e) => {
|
||||
const binaryUrl = e.data;
|
||||
const [binaryUrl, dialect] = e.data;
|
||||
if (typeof binaryUrl !== 'string') {
|
||||
throw new TypeError(`Expected binary to be a string of url but got ${typeof binaryUrl}.`);
|
||||
}
|
||||
const binary = new BinaryModule(binaryUrl);
|
||||
const linter = new LocalLinter({ binary });
|
||||
const linter = new LocalLinter({ binary, dialect });
|
||||
|
||||
async function processRequest(v: SerializedRequest) {
|
||||
const { procName, args } = await binary.deserialize(v);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { default as binaryUrl } from 'harper-wasm/harper_wasm_bg.wasm?no-inline';
|
||||
import { default as binaryInlinedUrl } from 'harper-wasm/harper_wasm_bg.wasm?inline';
|
||||
import type { InitInput, Span, Suggestion, Linter as WasmLinter } from 'harper-wasm';
|
||||
import { Dialect, InitInput, Span, Suggestion, Linter as WasmLinter } from 'harper-wasm';
|
||||
import pMemoize from 'p-memoize';
|
||||
import LazyPromise from 'p-lazy';
|
||||
import { assert } from './utils';
|
||||
|
@ -101,9 +101,9 @@ export class BinaryModule {
|
|||
exported.setup();
|
||||
}
|
||||
|
||||
async createLinter(): Promise<WasmLinter> {
|
||||
async createLinter(dialect?: Dialect): Promise<WasmLinter> {
|
||||
const exported = await this.inner;
|
||||
return exported.Linter.new();
|
||||
return exported.Linter.new(dialect ?? Dialect.American);
|
||||
}
|
||||
|
||||
async serializeArg(arg: any): Promise<RequestArg> {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
export type { Lint, Span, Suggestion } from 'harper-wasm';
|
||||
export { SuggestionKind } from 'harper-wasm';
|
||||
export { SuggestionKind, Dialect } from 'harper-wasm';
|
||||
export type { default as Linter, LinterInit } from './Linter';
|
||||
export { default as LocalLinter } from './LocalLinter';
|
||||
export { default as WorkerLinter } from './WorkerLinter';
|
||||
|
|
|
@ -83,6 +83,18 @@
|
|||
"default": false,
|
||||
"description": "Make code actions appear in \"stable\" positions by placing code actions that should always be available, like adding misspelled words in the dictionary, first."
|
||||
},
|
||||
"harper.dialect": {
|
||||
"scope": "resource",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"British",
|
||||
"American",
|
||||
"Canadian",
|
||||
"Australian"
|
||||
],
|
||||
"default": "American",
|
||||
"description": "Set which dialect of English Harper should expect."
|
||||
},
|
||||
"harper.diagnosticSeverity": {
|
||||
"scope": "resource",
|
||||
"type": "string",
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"git-commit": "git-commit"
|
||||
},
|
||||
"harper.linters.SpellCheck": true,
|
||||
"harper.linters.RepeatedWords": true
|
||||
"files.associations": {
|
||||
"git-commit": "git-commit"
|
||||
},
|
||||
"harper.linters.SpellCheck": true,
|
||||
"harper.linters.RepeatedWords": true,
|
||||
"harper.dialect": "American"
|
||||
}
|
||||
|
|
1
packages/vscode-plugin/src/tests/fixtures/integrationBritish.md
vendored
Normal file
1
packages/vscode-plugin/src/tests/fixtures/integrationBritish.md
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
This document uses British words, unlike color.
|
|
@ -1,3 +1,4 @@
|
|||
import exp from 'node:constants';
|
||||
import type { Diagnostic, Extension } from 'vscode';
|
||||
|
||||
import {
|
||||
|
@ -41,7 +42,10 @@ export function compareActualVsExpectedDiagnostics(
|
|||
actual: Diagnostic[],
|
||||
expected: Diagnostic[]
|
||||
): void {
|
||||
expect(actual.length).toBe(expected.length);
|
||||
if (actual.length != expected.length) {
|
||||
throw new Error(`Expected ${expected.length} diagnostics, got ${actual.length}.`);
|
||||
}
|
||||
|
||||
for (let i = 0; i < actual.length; i++) {
|
||||
expect(actual[i].source).toBe(expected[i].source);
|
||||
expect(actual[i].message).toBe(expected[i].message);
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
import type { Extension } from 'vscode';
|
||||
|
||||
import { commands, ConfigurationTarget, Uri, workspace } from 'vscode';
|
||||
|
||||
import {
|
||||
activateHarper,
|
||||
compareActualVsExpectedDiagnostics,
|
||||
createExpectedDiagnostics,
|
||||
createRange,
|
||||
getActualDiagnostics,
|
||||
openFile,
|
||||
sleep
|
||||
} from './helper';
|
||||
|
||||
describe('IntegrationDialect >', () => {
|
||||
let harper: Extension<void>;
|
||||
let markdownUri: Uri;
|
||||
|
||||
beforeAll(async () => {
|
||||
harper = await activateHarper();
|
||||
// Open test file so diagnostics can occur
|
||||
markdownUri = await openFile('integrationBritish.md');
|
||||
// Wait for `harper-ls` to start
|
||||
await sleep(500);
|
||||
});
|
||||
|
||||
it('runs', () => {
|
||||
expect(harper.isActive).toBe(true);
|
||||
});
|
||||
|
||||
it('gives correct diagnostics for default config', () => {
|
||||
compareActualVsExpectedDiagnostics(
|
||||
getActualDiagnostics(markdownUri),
|
||||
createExpectedDiagnostics()
|
||||
);
|
||||
});
|
||||
|
||||
it('marks error when set to British English', async () => {
|
||||
const config = workspace.getConfiguration('harper');
|
||||
await config.update('dialect', 'British', ConfigurationTarget.Workspace);
|
||||
// Wait for `harper-ls` to update diagnostics
|
||||
await sleep(300);
|
||||
|
||||
compareActualVsExpectedDiagnostics(
|
||||
getActualDiagnostics(markdownUri),
|
||||
createExpectedDiagnostics({
|
||||
message: 'Did you mean to spell “color” this way?',
|
||||
range: createRange(0, 41, 0, 46)
|
||||
})
|
||||
);
|
||||
|
||||
// Set config back to default value
|
||||
await config.update('dialect', 'American', ConfigurationTarget.Workspace);
|
||||
});
|
||||
});
|
|
@ -214,10 +214,11 @@ These configs are under the `markdown` key:
|
|||
|
||||
### Other Configs
|
||||
|
||||
| Config | Type | Default Value | Description |
|
||||
| -------------------- | ------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `diagnosticSeverity` | `"error"`, `"hint"`, `"information"`, `"warning"` | `"hint"` | Configures how severe diagnostics appear in your editor |
|
||||
| `isolateEnglish` | `boolean` | `false` | In documents that are a mixture of English and another language, only lint English text. This feature is incredibly new and unstable. Do not expect it to work perfectly. |
|
||||
| Config | Type | Default Value | Description |
|
||||
| -------------------- | ----------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `diagnosticSeverity` | `"error"`, `"hint"`, `"information"`, `"warning"` | `"hint"` | Configures how severe diagnostics appear in your editor |
|
||||
| `isolateEnglish` | `boolean` | `false` | In documents that are a mixture of English and another language, only lint English text. This feature is incredibly new and unstable. Do not expect it to work perfectly. |
|
||||
| `dialect` | `"American"`, `"British"`, `Australian`, `"Canadian"` | `american` | Set the dialect of English Harper should expect. |
|
||||
|
||||
## Supported Languages
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue