feat(core): added config for dialect and consume it from SpellCheck

This commit is contained in:
Elijah Potter 2025-03-17 14:16:09 -06:00
parent 6f54df6f69
commit e00b554077
32 changed files with 292 additions and 87 deletions

21
Cargo.lock generated
View file

@ -754,6 +754,8 @@ dependencies = [
"serde",
"serde_json",
"smallvec",
"strum",
"strum_macros",
"thiserror 2.0.12",
"unicode-blocks",
"unicode-script",
@ -1714,6 +1716,25 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
[[package]]
name = "strum_macros"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.96",
]
[[package]]
name = "syn"
version = "1.0.109"

View file

@ -11,7 +11,8 @@ use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::{Markdown, MarkdownOptions};
use harper_core::{
remove_overlaps, CharStringExt, Dictionary, Document, FstDictionary, TokenKind, TokenStringExt,
remove_overlaps, CharStringExt, Dialect, Dictionary, Document, FstDictionary, TokenKind,
TokenStringExt,
};
use harper_literate_haskell::LiterateHaskellParser;
use hashbrown::HashMap;
@ -33,6 +34,8 @@ enum Args {
/// If omitted, `harper-cli` will run every rule.
#[arg(short, long)]
only_lint_with: Option<Vec<String>>,
#[arg(short, long)]
dialect: Dialect,
},
/// Parse a provided document and print the detected symbols.
Parse {
@ -70,10 +73,11 @@ fn main() -> anyhow::Result<()> {
file,
count,
only_lint_with,
dialect,
} => {
let (doc, source) = load_file(&file, markdown_options)?;
let mut linter = LintGroup::new_curated(dictionary);
let mut linter = LintGroup::new_curated(dictionary, dialect);
if let Some(rules) = only_lint_with {
linter.set_all_rules_to(Some(false));
@ -201,7 +205,7 @@ fn main() -> anyhow::Result<()> {
description: String,
}
let linter = LintGroup::new_curated(dictionary);
let linter = LintGroup::new_curated(dictionary, Dialect::American);
let default_config: HashMap<String, bool> =
serde_json::from_str(&serde_json::to_string(&linter.config).unwrap()).unwrap();

View file

@ -3,7 +3,7 @@ use std::path::Path;
use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::MarkdownOptions;
use harper_core::{Document, FstDictionary};
use harper_core::{Dialect, Document, FstDictionary};
/// Creates a unit test checking that the linting of a source file in
/// `language_support_sources` produces the expected number of lints.
@ -25,7 +25,7 @@ macro_rules! create_test {
let dict = FstDictionary::curated();
let document = Document::new(&source, &parser, &dict);
let mut linter = LintGroup::new_curated(dict);
let mut linter = LintGroup::new_curated(dict, Dialect::American);
let lints = linter.lint(&document);
dbg!(&lints);

View file

@ -28,6 +28,8 @@ levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
cached = "0.55.1"
lru = "0.13.0"
foldhash = "0.1.4"
strum_macros = "0.27.1"
strum = "0.27.1"
[dev-dependencies]
criterion = { version = "0.5.1", default-features = false }

View file

@ -753,6 +753,26 @@
"is_auxiliary": true
}
}
},
"<": {
"#": "American property",
"suffix": true,
"cross_product": true,
"replacements": [],
"adds_metadata": {},
"gifts_metadata": {
"dialect": "American"
}
},
"!": {
"#": "GB property",
"suffix": true,
"cross_product": true,
"replacements": [],
"adds_metadata": {},
"gifts_metadata": {
"dialect": "British"
}
}
}
}

View file

@ -17545,8 +17545,9 @@ colonnade/~1MDS
colonoscopy/1SM
colony/~14SM
colophon/1SM
color's
color/~154AEGDS
color's/<
color/~154AEGDS<
colour/~154AEGDS!
colorant/1SM
coloration/~1EM
coloratura/15MS

View file

@ -66,7 +66,7 @@ mod tests {
use super::IgnoredLints;
use crate::{
Document, FstDictionary,
Dialect, Document, FstDictionary,
linting::{LintGroup, Linter},
};
@ -74,7 +74,8 @@ mod tests {
fn can_ignore_all(text: String) -> bool {
let document = Document::new_markdown_default_curated(&text);
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
let mut lints =
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
let mut ignored = IgnoredLints::new();
@ -90,7 +91,8 @@ mod tests {
fn can_ignore_first(text: String) -> TestResult {
let document = Document::new_markdown_default_curated(&text);
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
let mut lints =
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
let Some(first) = lints.first().cloned() else {
return TestResult::discard();
@ -108,7 +110,8 @@ mod tests {
fn assert_ignore_lint_reduction(source: &str, nth_lint: usize) {
let document = Document::new_markdown_default_curated(source);
let mut lints = LintGroup::new_curated(FstDictionary::curated()).lint(&document);
let mut lints =
LintGroup::new_curated(FstDictionary::curated(), Dialect::American).lint(&document);
let nth = lints.get(nth_lint).cloned().unwrap_or_else(|| {
panic!("If ignoring the lint at {nth_lint}, make sure there are enough problems.")

View file

@ -46,7 +46,7 @@ pub use token_kind::TokenKind;
pub use token_string_ext::TokenStringExt;
pub use vec_ext::VecExt;
pub use word_metadata::{
AdverbData, ConjunctionData, NounData, PronounData, Tense, VerbData, WordMetadata,
AdverbData, ConjunctionData, Dialect, NounData, PronounData, Tense, VerbData, WordMetadata,
};
/// A utility function that removes overlapping lints in a vector,
@ -77,7 +77,7 @@ pub fn remove_overlaps(lints: &mut Vec<Lint>) {
#[cfg(test)]
mod tests {
use crate::{
Document, FstDictionary,
Dialect, Document, FstDictionary,
linting::{LintGroup, Linter},
remove_overlaps,
};
@ -86,7 +86,7 @@ mod tests {
fn keeps_space_lint() {
let doc = Document::new_plain_english_curated("Ths tet");
let mut linter = LintGroup::new_curated(FstDictionary::curated());
let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
let mut lints = linter.lint(&doc);

View file

@ -65,7 +65,7 @@ use super::wrong_quotes::WrongQuotes;
use super::{CurrencyPlacement, Linter, NoOxfordComma, OxfordComma};
use super::{Lint, PatternLinter};
use crate::linting::{closed_compounds, phrase_corrections};
use crate::{CharString, Document, TokenStringExt};
use crate::{CharString, Dialect, Document, TokenStringExt};
use crate::{Dictionary, MutableDictionary};
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
@ -76,8 +76,8 @@ pub struct LintGroupConfig {
#[cached]
fn curated_config() -> LintGroupConfig {
// Dictionary does not matter, we're just after the config.
let group = LintGroup::new_curated(MutableDictionary::new().into());
// The Dictionary and Dialect do not matter, we're just after the config.
let group = LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American);
group.config
}
@ -264,7 +264,7 @@ impl LintGroup {
self
}
pub fn new_curated(dictionary: Arc<impl Dictionary + 'static>) -> Self {
pub fn new_curated(dictionary: Arc<impl Dictionary + 'static>, dialect: Dialect) -> Self {
let mut out = Self::empty();
macro_rules! insert_struct_rule {
@ -342,15 +342,18 @@ impl LintGroup {
insert_pattern_rule!(ExpandTimeShorthands, true);
insert_pattern_rule!(ModalOf, true);
out.add("SpellCheck", Box::new(SpellCheck::new(dictionary)));
out.add("SpellCheck", Box::new(SpellCheck::new(dictionary, dialect)));
out.config.set_rule_enabled("SpellCheck", true);
out
}
/// Create a new curated group with all config values cleared out.
pub fn new_curated_empty_config(dictionary: Arc<impl Dictionary + 'static>) -> Self {
let mut group = Self::new_curated(dictionary);
pub fn new_curated_empty_config(
dictionary: Arc<impl Dictionary + 'static>,
dialect: Dialect,
) -> Self {
let mut group = Self::new_curated(dictionary, dialect);
group.config.clear();
group
}
@ -412,19 +415,20 @@ impl Linter for LintGroup {
mod tests {
use std::sync::Arc;
use crate::{Document, FstDictionary, MutableDictionary, linting::Linter};
use crate::{Dialect, Document, FstDictionary, MutableDictionary, linting::Linter};
use super::LintGroup;
#[test]
fn can_get_all_descriptions() {
let group = LintGroup::new_curated(Arc::new(MutableDictionary::default()));
let group =
LintGroup::new_curated(Arc::new(MutableDictionary::default()), Dialect::American);
group.all_descriptions();
}
#[test]
fn lint_descriptions_are_clean() {
let mut group = LintGroup::new_curated(FstDictionary::curated());
let mut group = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
let pairs: Vec<_> = group
.all_descriptions()
.into_iter()

View file

@ -151,7 +151,12 @@ mod tests {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
dbg!(&lints);
assert_eq!(lints.len(), count);
if lints.len() != count {
panic!(
"Expected \"{text}\" to create {count} lints, but it created {}.",
lints.len()
);
}
}
/// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
@ -173,18 +178,26 @@ mod tests {
let test = Document::new_markdown_default_curated(text);
let lints = linter.lint(&test);
let mut text: Vec<char> = text.chars().collect();
let mut text_chars: Vec<char> = text.chars().collect();
if lints.is_empty() && expected_result != text {
panic!("Expected lints, but none were created.");
}
for lint in lints {
dbg!(&lint);
if let Some(sug) = lint.suggestions.first() {
sug.apply(lint.span, &mut text);
sug.apply(lint.span, &mut text_chars);
}
}
let transformed_str: String = text.iter().collect();
let transformed_str: String = text_chars.iter().collect();
assert_eq!(transformed_str.as_str(), expected_result);
if transformed_str.as_str() != expected_result {
panic!(
"Expected \"{transformed_str}\" to be \"{expected_result}\" after applying the computed suggestions."
);
}
// Applying the suggestions should fix all the lints.
assert_lint_count(&transformed_str, linter, 0);

View file

@ -7,7 +7,7 @@ use super::Suggestion;
use super::{Lint, LintKind, Linter};
use crate::document::Document;
use crate::spell::suggest_correct_spelling;
use crate::{CharString, CharStringExt, Dictionary, TokenStringExt};
use crate::{CharString, CharStringExt, Dialect, Dictionary, TokenStringExt};
pub struct SpellCheck<T>
where
@ -15,13 +15,15 @@ where
{
dictionary: T,
word_cache: LruCache<CharString, Vec<CharString>>,
dialect: Dialect,
}
impl<T: Dictionary> SpellCheck<T> {
pub fn new(dictionary: T) -> Self {
pub fn new(dictionary: T, dialect: Dialect) -> Self {
Self {
dictionary,
word_cache: LruCache::new(NonZero::new(10000).unwrap()),
dialect,
}
}
}
@ -45,6 +47,15 @@ impl<T: Dictionary> SpellCheck<T> {
dist += 1;
}
// Remove entries outside the configured dialect
suggestions.retain(|v| {
self.dictionary
.get_word_metadata(v)
.unwrap()
.dialect
.is_none_or(|d| d == self.dialect)
});
self.word_cache.put(word.into(), suggestions.clone());
suggestions
@ -57,11 +68,15 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
for word in document.iter_words() {
let word_chars = document.get_span_content(&word.span);
if self.dictionary.contains_exact_word(word_chars)
|| self.dictionary.contains_exact_word(&word_chars.to_lower())
{
continue;
}
if let Some(metadata) = word.kind.as_word().unwrap() {
if metadata.dialect.is_none_or(|d| d == self.dialect)
&& (self.dictionary.contains_exact_word(word_chars)
|| self.dictionary.contains_exact_word(&word_chars.to_lower()))
{
continue;
}
};
let mut possibilities = self.cached_suggest_correct_spelling(word_chars);
@ -115,7 +130,7 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
#[cfg(test)]
mod tests {
use crate::{
FstDictionary,
Dialect, FstDictionary,
linting::tests::{assert_lint_count, assert_suggestion_result},
};
@ -125,7 +140,7 @@ mod tests {
fn markdown_capitalized() {
assert_suggestion_result(
"The word markdown should be capitalized.",
SpellCheck::new(FstDictionary::curated()),
SpellCheck::new(FstDictionary::curated(), Dialect::American),
"The word Markdown should be capitalized.",
);
}
@ -134,8 +149,17 @@ mod tests {
fn harper_automattic_capitalized() {
assert_lint_count(
"So should harper and automattic.",
SpellCheck::new(FstDictionary::curated()),
SpellCheck::new(FstDictionary::curated(), Dialect::American),
2,
);
}
#[test]
fn american_color_in_british_dialect() {
assert_lint_count(
"Do you like the color?",
SpellCheck::new(FstDictionary::curated(), Dialect::British),
1,
);
}
}

View file

@ -1,6 +1,7 @@
use is_macro::Is;
use paste::paste;
use serde::{Deserialize, Serialize};
use strum_macros::EnumString;
use crate::WordId;
@ -350,7 +351,9 @@ impl ConjunctionData {
}
/// A regional dialect.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)]
#[derive(
Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, EnumString,
)]
pub enum Dialect {
American,
Canadian,

View file

@ -1,5 +1,5 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Document, FstDictionary};
use harper_core::{Dialect, Document, FstDictionary};
/// Creates a unit test checking that the linting of a Markdown document (in
/// `tests_sources`) produces the expected number of lints.
@ -18,7 +18,7 @@ macro_rules! create_test {
let dict = FstDictionary::curated();
let document = Document::new_markdown_default(&source, &dict);
let mut linter = LintGroup::new_curated(dict);
let mut linter = LintGroup::new_curated(dict, Dialect::American);
let lints = linter.lint(&document);
dbg!(&lints);

View file

@ -1,5 +1,5 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Document, FstDictionary};
use harper_core::{Dialect, Document, FstDictionary};
/// Creates a unit test checking that the linting of a Markdown document (in
/// `tests_sources`) produces the expected number of lints.
@ -18,7 +18,7 @@ macro_rules! create_test {
let dict = FstDictionary::curated();
let document = Document::new_markdown_default(&source, &dict);
let mut linter = LintGroup::new_curated(dict);
let mut linter = LintGroup::new_curated(dict, Dialect::American);
let lints = linter.lint(&document);
dbg!(&lints);

View file

@ -1,6 +1,6 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::MarkdownOptions;
use harper_core::{Document, FstDictionary};
use harper_core::{Dialect, Document, FstDictionary};
use harper_literate_haskell::LiterateHaskellParser;
/// Creates a unit test checking that the linting of a Markdown document (in
@ -20,7 +20,7 @@ macro_rules! create_test {
let dict = FstDictionary::curated();
let document = Document::new_curated(&source, &LiterateHaskellParser::new_markdown(MarkdownOptions::default()));
let mut linter = LintGroup::new_curated(dict);
let mut linter = LintGroup::new_curated(dict, Dialect::American);
let lints = linter.lint(&document);
dbg!(&lints);

View file

@ -7,7 +7,7 @@ use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, LintGroupConfig};
use harper_core::parsers::{CollapseIdentifiers, IsolateEnglish, Markdown, Parser, PlainEnglish};
use harper_core::{
Dictionary, Document, FstDictionary, MergedDictionary, MutableDictionary, WordMetadata,
Dialect, Dictionary, Document, FstDictionary, MergedDictionary, MutableDictionary, WordMetadata,
};
use harper_html::HtmlParser;
use harper_literate_haskell::LiterateHaskellParser;
@ -140,12 +140,13 @@ impl Backend {
self.pull_config().await;
// Copy necessary configuration to avoid holding lock.
let (lint_config, markdown_options, isolate_english) = {
let (lint_config, markdown_options, isolate_english, dialect) = {
let config = self.config.read().await;
(
config.lint_config.clone(),
config.markdown_options,
config.isolate_english,
config.dialect,
)
};
@ -161,7 +162,8 @@ impl Backend {
info!("Constructing new LintGroup for new document.");
DocumentState {
linter: LintGroup::new_curated(dict.clone()).with_lint_config(lint_config.clone()),
linter: LintGroup::new_curated(dict.clone(), dialect)
.with_lint_config(lint_config.clone()),
language_id: language_id.map(|v| v.to_string()),
dict: dict.clone(),
url: url.clone(),
@ -173,7 +175,7 @@ impl Backend {
doc_state.dict = dict.clone();
info!("Constructing new linter because of modified dictionary.");
doc_state.linter =
LintGroup::new_curated(dict.clone()).with_lint_config(lint_config.clone());
LintGroup::new_curated(dict.clone(), dialect).with_lint_config(lint_config.clone());
}
let Some(language_id) = &doc_state.language_id else {
@ -188,6 +190,7 @@ impl Backend {
url: &'a Url,
doc_state: &'a mut DocumentState,
lint_config: &LintGroupConfig,
dialect: Dialect,
) -> Result<Box<dyn Parser>> {
if doc_state.ident_dict != new_dict {
info!("Constructing new linter because of modified ident dictionary.");
@ -197,8 +200,8 @@ impl Backend {
merged.add_dictionary(new_dict);
let merged = Arc::new(merged);
doc_state.linter =
LintGroup::new_curated(merged.clone()).with_lint_config(lint_config.clone());
doc_state.linter = LintGroup::new_curated(merged.clone(), dialect)
.with_lint_config(lint_config.clone());
doc_state.dict = merged.clone();
}
@ -223,6 +226,7 @@ impl Backend {
url,
doc_state,
&lint_config,
dialect,
)
.await?,
)
@ -244,6 +248,7 @@ impl Backend {
url,
doc_state,
&lint_config,
dialect,
)
.await?,
)
@ -597,7 +602,7 @@ impl LanguageServer for Backend {
for doc in doc_lock.values_mut() {
info!("Constructing new LintGroup for updated configuration.");
doc.linter = LintGroup::new_curated(doc.dict.clone())
doc.linter = LintGroup::new_curated(doc.dict.clone(), config_lock.dialect)
.with_lint_config(config_lock.lint_config.clone());
}

View file

@ -2,7 +2,7 @@ use std::path::PathBuf;
use anyhow::{Result, bail};
use dirs::{config_dir, data_local_dir};
use harper_core::{linting::LintGroupConfig, parsers::MarkdownOptions};
use harper_core::{Dialect, linting::LintGroupConfig, parsers::MarkdownOptions};
use resolve_path::PathResolveExt;
use serde::{Deserialize, Serialize};
use serde_json::Value;
@ -70,6 +70,7 @@ pub struct Config {
pub code_action_config: CodeActionConfig,
pub isolate_english: bool,
pub markdown_options: MarkdownOptions,
pub dialect: Dialect,
}
impl Config {
@ -114,6 +115,10 @@ impl Config {
base.diagnostic_severity = serde_json::from_value(v.clone())?;
}
if let Some(v) = value.get("dialect") {
base.dialect = serde_json::from_value(v.clone())?;
}
if let Some(v) = value.get("codeActions") {
base.code_action_config = CodeActionConfig::from_lsp_config(v.clone())?;
}
@ -148,6 +153,7 @@ impl Default for Config {
code_action_config: CodeActionConfig::default(),
isolate_english: false,
markdown_options: MarkdownOptions::default(),
dialect: Dialect::American,
}
}
}

View file

@ -1,5 +1,5 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Document, FstDictionary};
use harper_core::{Dialect, Document, FstDictionary};
use harper_typst::Typst;
/// Creates a unit test checking that the linting of a document in
@ -19,7 +19,7 @@ macro_rules! create_test {
let dict = FstDictionary::curated();
let document = Document::new(&source, &Typst, &dict);
let mut linter = LintGroup::new_curated(dict);
let mut linter = LintGroup::new_curated(dict, Dialect::American);
let lints = linter.lint(&document);
dbg!(&lints);

View file

@ -62,6 +62,26 @@ impl Language {
}
}
#[wasm_bindgen]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Dialect {
American,
British,
Australian,
Canadian,
}
impl Into<harper_core::Dialect> for Dialect {
fn into(self) -> harper_core::Dialect {
match self {
Dialect::American => harper_core::Dialect::American,
Dialect::Canadian => harper_core::Dialect::Canadian,
Dialect::Australian => harper_core::Dialect::Australian,
Dialect::British => harper_core::Dialect::British,
}
}
}
#[wasm_bindgen]
pub struct Linter {
lint_group: LintGroup,
@ -71,6 +91,7 @@ pub struct Linter {
user_dictionary: MutableDictionary,
dictionary: Arc<MergedDictionary>,
ignored_lints: IgnoredLints,
dialect: Dialect,
}
#[wasm_bindgen]
@ -78,15 +99,16 @@ impl Linter {
/// Construct a new `Linter`.
/// Note that this can mean constructing the curated dictionary, which is the most expensive operation
/// in Harper.
pub fn new() -> Self {
pub fn new(dialect: Dialect) -> Self {
let dictionary = Self::construct_merged_dict(MutableDictionary::default());
let lint_group = LintGroup::new_curated_empty_config(dictionary.clone());
let lint_group = LintGroup::new_curated_empty_config(dictionary.clone(), dialect.into());
Self {
lint_group,
user_dictionary: MutableDictionary::new(),
dictionary,
ignored_lints: IgnoredLints::default(),
dialect,
}
}
@ -95,7 +117,8 @@ impl Linter {
fn synchronize_lint_dict(&mut self) {
let mut lint_config = self.lint_group.config.clone();
self.dictionary = Self::construct_merged_dict(self.user_dictionary.clone());
self.lint_group = LintGroup::new_curated_empty_config(self.dictionary.clone());
self.lint_group =
LintGroup::new_curated_empty_config(self.dictionary.clone(), self.dialect.into());
self.lint_group.config.merge_from(&mut lint_config);
}
@ -248,12 +271,6 @@ impl Linter {
}
}
impl Default for Linter {
fn default() -> Self {
Self::new()
}
}
#[wasm_bindgen]
pub fn to_title_case(text: String) -> String {
harper_core::make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated())
@ -385,14 +402,16 @@ impl Lint {
#[wasm_bindgen]
pub fn get_default_lint_config_as_json() -> String {
let config = LintGroup::new_curated(MutableDictionary::new().into()).config;
let config =
LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American.into()).config;
serde_json::to_string(&config).unwrap()
}
#[wasm_bindgen]
pub fn get_default_lint_config() -> JsValue {
let config = LintGroup::new_curated(MutableDictionary::new().into()).config;
let config =
LintGroup::new_curated(MutableDictionary::new().into(), Dialect::American.into()).config;
// Important for downstream JSON serialization
let serializer = serde_wasm_bindgen::Serializer::json_compatible();

View file

@ -99,6 +99,8 @@ test-vscode:
mkdir "$bin_dir"
fi
cargo build --release
cp "{{justfile_directory()}}/target/release/harper-ls"* "$bin_dir"
cd "$ext_dir"

View file

@ -1,4 +1,4 @@
import type { Lint, Span, Suggestion } from 'harper-wasm';
import type { Dialect, Lint, Span, Suggestion } from 'harper-wasm';
import { LintConfig, LintOptions } from './main';
import { BinaryModule } from './binary';
@ -76,4 +76,6 @@ export default interface Linter {
export interface LinterInit {
/** The module or path to the WebAssembly binary. */
binary: BinaryModule;
/** The dialect of English Harper should use. If omitted, Harper will default to American English. */
dialect?: Dialect;
}

View file

@ -14,7 +14,7 @@ export default class LocalLinter implements Linter {
this.binary = init.binary;
this.inner = LazyPromise.from(async () => {
await this.binary.setup();
return this.binary.createLinter();
return this.binary.createLinter(init.dialect);
});
}

View file

@ -1,4 +1,4 @@
import type { Lint, Suggestion, Span } from 'harper-wasm';
import type { Lint, Suggestion, Span, Dialect } from 'harper-wasm';
import Linter, { LinterInit } from '../Linter';
import Worker from './worker.ts?worker&inline';
import { LintConfig, LintOptions } from '../main';
@ -17,12 +17,14 @@ export interface RequestItem {
* NOTE: This class will not work properly in Node. In that case, just use `LocalLinter`. */
export default class WorkerLinter implements Linter {
private binary: BinaryModule;
private dialect?: Dialect;
private worker: Worker;
private requestQueue: RequestItem[];
private working = true;
constructor(init: LinterInit) {
this.binary = init.binary;
this.dialect = init.dialect;
this.worker = new Worker();
this.requestQueue = [];
@ -30,7 +32,7 @@ export default class WorkerLinter implements Linter {
this.worker.onmessage = () => {
this.setupMainEventListeners();
this.worker.postMessage(this.binary.url);
this.worker.postMessage([this.binary.url, this.dialect]);
this.working = false;
this.submitRemainingRequests();

View file

@ -7,12 +7,12 @@ import LocalLinter from '../LocalLinter';
self.postMessage('ready');
self.onmessage = (e) => {
const binaryUrl = e.data;
const [binaryUrl, dialect] = e.data;
if (typeof binaryUrl !== 'string') {
throw new TypeError(`Expected binary to be a string of url but got ${typeof binaryUrl}.`);
}
const binary = new BinaryModule(binaryUrl);
const linter = new LocalLinter({ binary });
const linter = new LocalLinter({ binary, dialect });
async function processRequest(v: SerializedRequest) {
const { procName, args } = await binary.deserialize(v);

View file

@ -1,6 +1,6 @@
import { default as binaryUrl } from 'harper-wasm/harper_wasm_bg.wasm?no-inline';
import { default as binaryInlinedUrl } from 'harper-wasm/harper_wasm_bg.wasm?inline';
import type { InitInput, Span, Suggestion, Linter as WasmLinter } from 'harper-wasm';
import { Dialect, InitInput, Span, Suggestion, Linter as WasmLinter } from 'harper-wasm';
import pMemoize from 'p-memoize';
import LazyPromise from 'p-lazy';
import { assert } from './utils';
@ -101,9 +101,9 @@ export class BinaryModule {
exported.setup();
}
async createLinter(): Promise<WasmLinter> {
async createLinter(dialect?: Dialect): Promise<WasmLinter> {
const exported = await this.inner;
return exported.Linter.new();
return exported.Linter.new(dialect ?? Dialect.American);
}
async serializeArg(arg: any): Promise<RequestArg> {

View file

@ -1,5 +1,5 @@
export type { Lint, Span, Suggestion } from 'harper-wasm';
export { SuggestionKind } from 'harper-wasm';
export { SuggestionKind, Dialect } from 'harper-wasm';
export type { default as Linter, LinterInit } from './Linter';
export { default as LocalLinter } from './LocalLinter';
export { default as WorkerLinter } from './WorkerLinter';

View file

@ -83,6 +83,18 @@
"default": false,
"description": "Make code actions appear in \"stable\" positions by placing code actions that should always be available, like adding misspelled words in the dictionary, first."
},
"harper.dialect": {
"scope": "resource",
"type": "string",
"enum": [
"British",
"American",
"Canadian",
"Australian"
],
"default": "American",
"description": "Set which dialect of English Harper should expect."
},
"harper.diagnosticSeverity": {
"scope": "resource",
"type": "string",

View file

@ -1,7 +1,8 @@
{
"files.associations": {
"git-commit": "git-commit"
},
"harper.linters.SpellCheck": true,
"harper.linters.RepeatedWords": true
"files.associations": {
"git-commit": "git-commit"
},
"harper.linters.SpellCheck": true,
"harper.linters.RepeatedWords": true,
"harper.dialect": "American"
}

View file

@ -0,0 +1 @@
This document uses British words, unlike color.

View file

@ -1,3 +1,4 @@
import exp from 'node:constants';
import type { Diagnostic, Extension } from 'vscode';
import {
@ -41,7 +42,10 @@ export function compareActualVsExpectedDiagnostics(
actual: Diagnostic[],
expected: Diagnostic[]
): void {
expect(actual.length).toBe(expected.length);
if (actual.length != expected.length) {
throw new Error(`Expected ${expected.length} diagnostics, got ${actual.length}.`);
}
for (let i = 0; i < actual.length; i++) {
expect(actual[i].source).toBe(expected[i].source);
expect(actual[i].message).toBe(expected[i].message);

View file

@ -0,0 +1,55 @@
import type { Extension } from 'vscode';
import { commands, ConfigurationTarget, Uri, workspace } from 'vscode';
import {
activateHarper,
compareActualVsExpectedDiagnostics,
createExpectedDiagnostics,
createRange,
getActualDiagnostics,
openFile,
sleep
} from './helper';
describe('IntegrationDialect >', () => {
let harper: Extension<void>;
let markdownUri: Uri;
beforeAll(async () => {
harper = await activateHarper();
// Open test file so diagnostics can occur
markdownUri = await openFile('integrationBritish.md');
// Wait for `harper-ls` to start
await sleep(500);
});
it('runs', () => {
expect(harper.isActive).toBe(true);
});
it('gives correct diagnostics for default config', () => {
compareActualVsExpectedDiagnostics(
getActualDiagnostics(markdownUri),
createExpectedDiagnostics()
);
});
it('marks error when set to British English', async () => {
const config = workspace.getConfiguration('harper');
await config.update('dialect', 'British', ConfigurationTarget.Workspace);
// Wait for `harper-ls` to update diagnostics
await sleep(300);
compareActualVsExpectedDiagnostics(
getActualDiagnostics(markdownUri),
createExpectedDiagnostics({
message: 'Did you mean to spell “color” this way?',
range: createRange(0, 41, 0, 46)
})
);
// Set config back to default value
await config.update('dialect', 'American', ConfigurationTarget.Workspace);
});
});

View file

@ -214,10 +214,11 @@ These configs are under the `markdown` key:
### Other Configs
| Config | Type | Default Value | Description |
| -------------------- | ------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `diagnosticSeverity` | `"error"`, `"hint"`, `"information"`, `"warning"` | `"hint"` | Configures how severe diagnostics appear in your editor |
| `isolateEnglish` | `boolean` | `false` | In documents that are a mixture of English and another language, only lint English text. This feature is incredibly new and unstable. Do not expect it to work perfectly. |
| Config | Type | Default Value | Description |
| -------------------- | ----------------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `diagnosticSeverity` | `"error"`, `"hint"`, `"information"`, `"warning"` | `"hint"` | Configures how severe diagnostics appear in your editor |
| `isolateEnglish` | `boolean` | `false` | In documents that are a mixture of English and another language, only lint English text. This feature is incredibly new and unstable. Do not expect it to work perfectly. |
| `dialect` | `"American"`, `"British"`, `Australian`, `"Canadian"` | `american` | Set the dialect of English Harper should expect. |
## Supported Languages