feat(core): create tagged linting function (#1998)

This commit is contained in:
Elijah Potter 2025-09-29 09:18:46 -06:00 committed by GitHub
parent 7b8ad10fc5
commit 43fcaf47f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 301 additions and 66 deletions

View file

@ -30,7 +30,7 @@ mod token_string_ext;
mod vec_ext;
use render_markdown::render_markdown;
use std::collections::VecDeque;
use std::collections::{BTreeMap, VecDeque};
pub use char_string::{CharString, CharStringExt};
pub use currency::Currency;
@ -83,6 +83,65 @@ pub fn remove_overlaps(lints: &mut Vec<Lint>) {
lints.remove_indices(remove_indices);
}
/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
///
/// The map is treated as if all contained lints were in a single flat collection, ensuring the
/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
let total: usize = lint_map.values().map(Vec::len).sum();
if total < 2 {
return;
}
struct IndexedSpan {
rule_idx: usize,
lint_idx: usize,
start: usize,
end: usize,
}
let mut removal_flags: Vec<Vec<bool>> = lint_map
.values()
.map(|lints| vec![false; lints.len()])
.collect();
let mut spans = Vec::with_capacity(total);
for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
for (lint_idx, lint) in lints.iter().enumerate() {
spans.push(IndexedSpan {
rule_idx,
lint_idx,
start: lint.span.start,
end: lint.span.end,
});
}
}
spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
let mut cur = 0;
for span in spans {
if span.start < cur {
removal_flags[span.rule_idx][span.lint_idx] = true;
} else {
cur = span.end;
}
}
for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
if removal_flags[rule_idx].iter().all(|flag| !*flag) {
continue;
}
let mut idx = 0;
lints.retain(|_| {
let remove = removal_flags[rule_idx][idx];
idx += 1;
!remove
});
}
}
#[cfg(test)]
mod tests {
use crate::spell::FstDictionary;

View file

@ -257,7 +257,7 @@ impl Hash for LintGroupConfig {
}
/// A struct for collecting the output of a number of individual [Linter]s.
/// Each child can be toggled via the public, mutable [Self::config] object.
/// Each child can be toggled via the public, mutable `Self::config` object.
pub struct LintGroup {
pub config: LintGroupConfig,
/// We use a binary map here so the ordering is stable.
@ -270,7 +270,7 @@ pub struct LintGroup {
///
/// Since the pattern linter results also depend on the config, we hash it and pass it as part
/// of the key.
chunk_expr_cache: LruCache<(CharString, u64), Vec<Lint>>,
chunk_expr_cache: LruCache<(CharString, u64), BTreeMap<String, Vec<Lint>>>,
hasher_builder: RandomState,
}
@ -588,22 +588,14 @@ impl LintGroup {
group.config.clear();
group
}
}
impl Default for LintGroup {
fn default() -> Self {
Self::empty()
}
}
impl Linter for LintGroup {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut results = Vec::new();
pub fn organized_lints(&mut self, document: &Document) -> BTreeMap<String, Vec<Lint>> {
let mut results = BTreeMap::new();
// Normal linters
for (key, linter) in &mut self.linters {
if self.config.is_rule_enabled(key) {
results.extend(linter.lint(document));
results.insert(key.clone(), linter.lint(document));
}
}
@ -615,38 +607,58 @@ impl Linter for LintGroup {
let chunk_chars = document.get_span_content(&chunk_span);
let config_hash = self.hasher_builder.hash_one(&self.config);
let key = (chunk_chars.into(), config_hash);
let cache_key = (chunk_chars.into(), config_hash);
let mut chunk_results = if let Some(hit) = self.chunk_expr_cache.get(&key) {
let mut chunk_results = if let Some(hit) = self.chunk_expr_cache.get(&cache_key) {
hit.clone()
} else {
let mut pattern_lints = Vec::new();
let mut pattern_lints = BTreeMap::new();
for (key, linter) in &mut self.expr_linters {
if self.config.is_rule_enabled(key) {
pattern_lints.extend(run_on_chunk(linter, chunk, document.get_source()));
let lints =
run_on_chunk(linter, chunk, document.get_source()).map(|mut l| {
l.span.pull_by(chunk_span.start);
l
});
pattern_lints.insert(key.clone(), lints.collect());
}
}
// Make the spans relative to the chunk start
for lint in &mut pattern_lints {
lint.span.pull_by(chunk_span.start);
}
self.chunk_expr_cache.put(key, pattern_lints.clone());
self.chunk_expr_cache.put(cache_key, pattern_lints.clone());
pattern_lints
};
// Bring the spans back into document-space
for lint in &mut chunk_results {
lint.span.push_by(chunk_span.start);
for value in chunk_results.values_mut() {
for lint in value {
lint.span.push_by(chunk_span.start);
}
}
results.append(&mut chunk_results);
for (key, mut vec) in chunk_results {
results.entry(key).or_default().append(&mut vec);
}
}
results
}
}
impl Default for LintGroup {
fn default() -> Self {
Self::empty()
}
}
impl Linter for LintGroup {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
self.organized_lints(document)
.into_values()
.flatten()
.collect()
}
fn description(&self) -> &str {
"A collection of linters that can be run as one."

View file

@ -409,6 +409,7 @@ pub mod tests {
assert_lint_count(&transformed_str, linter, 0);
}
#[track_caller]
pub fn assert_top3_suggestion_result(
text: &str,
mut linter: impl Linter,

View file

@ -58,7 +58,7 @@ pub fn lint_group() -> LintGroup {
"Corrects `an` to `and` after `ahead`."
),
"AllOfASudden" => (
["all of the sudden"],
["all of the sudden", "all of sudden"],
["all of a sudden"],
"The phrase is `all of a sudden`, meaning `unexpectedly`.",
"Corrects `all of the sudden` to `all of a sudden`.",

View file

@ -7,6 +7,7 @@ use std::sync::Arc;
use harper_core::language_detection::is_doc_likely_english;
use harper_core::linting::{LintGroup, Linter as _};
use harper_core::parsers::{IsolateEnglish, Markdown, Parser, PlainEnglish};
use harper_core::remove_overlaps_map;
use harper_core::{
CharString, DictWordMetadata, Document, IgnoredLints, LintContext, Lrc, remove_overlaps,
spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary},
@ -250,6 +251,41 @@ impl Linter {
ctx.default_hash()
}
pub fn organized_lints(&mut self, text: String, language: Language) -> Vec<OrganizedGroup> {
let source: Vec<_> = text.chars().collect();
let source = Lrc::new(source);
let parser = language.create_parser();
let document = Document::new_from_vec(source.clone(), &parser, &self.dictionary);
let temp = self.lint_group.config.clone();
self.lint_group.config.fill_with_curated();
let mut lints = self.lint_group.organized_lints(&document);
self.lint_group.config = temp;
remove_overlaps_map(&mut lints);
for value in lints.values_mut() {
self.ignored_lints.remove_ignored(value, &document);
}
lints
.into_iter()
.map(|(s, ls)| OrganizedGroup {
group: s,
lints: ls
.into_iter()
.map(|l| {
let problem_text = l.span.get_content_string(&source);
Lint::new(l, problem_text, language)
})
.collect(),
})
.collect()
}
/// Perform the configured linting on the provided text.
pub fn lint(&mut self, text: String, language: Language) -> Vec<Lint> {
let source: Vec<_> = text.chars().collect();
@ -267,7 +303,6 @@ impl Linter {
self.lint_group.config = temp;
remove_overlaps(&mut lints);
self.ignored_lints.remove_ignored(&mut lints, &document);
lints
@ -425,7 +460,7 @@ impl Suggestion {
/// An error found in provided text.
///
/// May include zero or more suggestions that may fix the problematic text.
#[derive(Debug, Deserialize, Serialize)]
#[derive(Debug, Deserialize, Serialize, Clone)]
#[wasm_bindgen]
pub struct Lint {
inner: harper_core::linting::Lint,
@ -546,3 +581,13 @@ impl From<harper_core::Span<char>> for Span {
Span::new(value.start, value.end)
}
}
/// Used exclusively for [`Linter::organized_lints`]
#[wasm_bindgen]
#[derive(Serialize, Deserialize, Clone)]
pub struct OrganizedGroup {
#[wasm_bindgen(getter_with_clone)]
pub group: String,
#[wasm_bindgen(getter_with_clone)]
pub lints: Vec<Lint>,
}

View file

@ -1,10 +1,10 @@
import type { Dialect, LintConfig } from 'harper.js';
import type { UnpackedLint } from 'lint-framework';
import type { UnpackedLintGroups } from 'lint-framework';
import { LRUCache } from 'lru-cache';
import type { ActivationKey } from './protocol';
export default class ProtocolClient {
private static readonly lintCache = new LRUCache<string, Promise<any>>({
private static readonly lintCache = new LRUCache<string, Promise<UnpackedLintGroups>>({
max: 5000,
ttl: 5_000,
});
@ -13,11 +13,13 @@ export default class ProtocolClient {
return `${domain}:${text}`;
}
public static async lint(text: string, domain: string): Promise<UnpackedLint[]> {
public static async lint(text: string, domain: string): Promise<UnpackedLintGroups> {
const key = this.cacheKey(text, domain);
let p = this.lintCache.get(key);
if (!p) {
p = chrome.runtime.sendMessage({ kind: 'lint', text, domain }).then((r) => r.lints);
p = chrome.runtime
.sendMessage({ kind: 'lint', text, domain })
.then((r) => r.lints as UnpackedLintGroups);
this.lintCache.set(key, p);
}
return p;

View file

@ -1,5 +1,5 @@
import { BinaryModule, Dialect, type LintConfig, LocalLinter } from 'harper.js';
import { unpackLint } from 'lint-framework';
import { type UnpackedLintGroups, unpackLint } from 'lint-framework';
import {
ActivationKey,
type AddToUserDictionaryRequest,
@ -148,12 +148,20 @@ function handleRequest(message: Request): Promise<Response> {
/** Handle a request for linting. */
async function handleLint(req: LintRequest): Promise<LintResponse> {
if (!(await enabledForDomain(req.domain))) {
return { kind: 'lints', lints: [] };
return { kind: 'lints', lints: {} };
}
const lints = await linter.lint(req.text);
const unpackedLints = await Promise.all(lints.map((l) => unpackLint(req.text, l, linter)));
return { kind: 'lints', lints: unpackedLints };
const grouped = await linter.organizedLints(req.text);
const unpackedEntries = await Promise.all(
Object.entries(grouped).map(async ([source, lints]) => {
const unpacked = await Promise.all(
lints.map((lint) => unpackLint(req.text, lint, linter, source)),
);
return [source, unpacked] as const;
}),
);
const unpackedBySource = Object.fromEntries(unpackedEntries) as UnpackedLintGroups;
return { kind: 'lints', lints: unpackedBySource };
}
async function handleGetConfig(req: GetConfigRequest): Promise<GetConfigResponse> {

View file

@ -1,5 +1,5 @@
import type { Dialect, LintConfig, Summary } from 'harper.js';
import type { UnpackedLint, UnpackedSuggestion } from 'lint-framework';
import type { UnpackedLintGroups } from 'lint-framework';
export type Request =
| LintRequest
@ -41,7 +41,7 @@ export type LintRequest = {
export type LintResponse = {
kind: 'lints';
lints: UnpackedLint[];
lints: UnpackedLintGroups;
};
export type GetConfigRequest = {

View file

@ -29,8 +29,8 @@ test('Wraps correctly', async ({ page }) => {
await page.waitForTimeout(6000);
await assertHarperHighlightBoxes(page, [
{ height: 18, width: 25.21875, x: 512.28125, y: 63 },
{ height: 18, width: 67.21875, x: 260.234375, y: 103 },
{ x: 260.234375, y: 103, width: 67.21875, height: 18 },
{ x: 512.28125, y: 63, width: 25.21875, height: 18 },
]);
});

View file

@ -26,15 +26,15 @@ test('Wraps correctly', async ({ page }, testInfo) => {
if (testInfo.project.name == 'chromium') {
await assertHarperHighlightBoxes(page, [
{ height: 19, width: 24, x: 241.90625, y: 27 },
{ x: 233.90625, y: 44, width: 48, height: 19 },
{ x: 281.90625, y: 44, width: 8, height: 19 },
{ x: 10, y: 61, width: 8, height: 19 },
{ x: 241.90625, y: 27, width: 24, height: 19 },
]);
} else {
await assertHarperHighlightBoxes(page, [
{ x: 218.8000030517578, y: 26, width: 21.600006103515625, height: 17 },
{ x: 10, y: 71, width: 57.599998474121094, height: 17 },
{ x: 218.8000030517578, y: 26, width: 21.600006103515625, height: 17 },
]);
}
});

View file

@ -17,6 +17,28 @@ for (const [linterName, Linter] of Object.entries(linters)) {
expect(lints.length).toBe(1);
});
test(`${linterName} emits organized lints the same as it emits normal lints`, async () => {
const linter = new Linter({ binary });
const source = 'The the problem is...';
const lints = await linter.lint(source);
expect(lints.length).toBeGreaterThan(0);
const organized = await linter.organizedLints(source);
const normal = await linter.lint(source);
const flattened = [];
for (const [_, value] of Object.entries(organized)) {
flattened.push(...value);
}
expect(flattened.length).toBe(1);
expect(flattened.length).toBe(normal.length);
const item = flattened[0];
expect(item.message().length).not.toBe(0);
});
test(`${linterName} detects repeated words with multiple synchronous requests`, async () => {
const linter = new Linter({ binary });

View file

@ -13,6 +13,9 @@ export default interface Linter {
/** Lint the provided text. */
lint(text: string, options?: LintOptions): Promise<Lint[]>;
/** Lint the provided text, maintaining the relationship with the source rule. */
organizedLints(text: string, options?: LintOptions): Promise<Record<string, Lint[]>>;
/** Apply a suggestion from a lint to text, returning the changed text. */
applySuggestion(text: string, lint: Lint, suggestion: Suggestion): Promise<string>;

View file

@ -38,6 +38,20 @@ export default class LocalLinter implements Linter {
return lints;
}
async organizedLints(text: string, options?: LintOptions): Promise<Record<string, Lint[]>> {
const inner = await this.inner;
const language = options?.language === 'plaintext' ? Language.Plain : Language.Markdown;
const lintGroups = inner.organized_lints(text, language);
const output: Record<string, Lint[]> = {};
for (const { group, lints } of lintGroups) {
output[group] = lints;
}
return output;
}
async applySuggestion(text: string, lint: Lint, suggestion: Suggestion): Promise<string> {
const inner = await this.inner;
return inner.apply_suggestion(text, lint, suggestion);

View file

@ -69,6 +69,10 @@ export default class WorkerLinter implements Linter {
return this.rpc('lint', [text, options]);
}
organizedLints(text: string, options?: LintOptions): Promise<Record<string, Lint[]>> {
return this.rpc('organizedLints', [text, options]);
}
applySuggestion(text: string, lint: Lint, suggestion: Suggestion): Promise<string> {
return this.rpc('applySuggestion', [text, lint, suggestion]);
}

View file

@ -144,7 +144,14 @@ export class BinaryModule {
}
if (argType == 'object') {
return { json: JSON.stringify(arg), type: 'object' };
return {
json: JSON.stringify(
await Promise.all(
Object.entries(arg).map(([key, value]) => this.serializeArg([key, value])),
),
),
type: 'object',
};
}
throw new Error(`Unhandled case: ${arg}`);
@ -182,7 +189,9 @@ export class BinaryModule {
}
case 'object': {
const parsed = JSON.parse(requestArg.json);
return parsed;
return Object.fromEntries(
await Promise.all(parsed.map((val: any) => this.deserializeArg(val))),
);
}
default:
throw new Error(`Unhandled case: ${requestArg.type}`);

View file

@ -21,6 +21,8 @@ export type LintBox = Box & {
};
export type IgnorableLintBox = LintBox & {
/** The rule that produced the lint */
rule: string;
ignoreLint?: () => Promise<void>;
};

View file

@ -2,7 +2,7 @@ import computeLintBoxes from './computeLintBoxes';
import { isVisible } from './domUtils';
import Highlights from './Highlights';
import PopupHandler from './PopupHandler';
import type { UnpackedLint } from './unpackLint';
import type { UnpackedLintGroups } from './unpackLint';
type ActivationKey = 'off' | 'shift' | 'control';
@ -19,13 +19,13 @@ export default class LintFramework {
private scrollableAncestors: Set<HTMLElement>;
private lintRequested = false;
private renderRequested = false;
private lastLints: { target: HTMLElement; lints: UnpackedLint[] }[] = [];
private lastLints: { target: HTMLElement; lints: UnpackedLintGroups }[] = [];
/** The function to be called to re-render the highlights. This is a variable because it is used to register/deregister event listeners. */
private updateEventCallback: () => void;
/** Function used to fetch lints for a given text/domain. */
private lintProvider: (text: string, domain: string) => Promise<UnpackedLint[]>;
private lintProvider: (text: string, domain: string) => Promise<UnpackedLintGroups>;
/** Actions wired by host environment (extension/app). */
private actions: {
ignoreLint?: (hash: string) => Promise<void>;
@ -35,7 +35,7 @@ export default class LintFramework {
};
constructor(
lintProvider: (text: string, domain: string) => Promise<UnpackedLint[]>,
lintProvider: (text: string, domain: string) => Promise<UnpackedLintGroups>,
actions: {
ignoreLint?: (hash: string) => Promise<void>;
getActivationKey?: () => Promise<ActivationKey>;
@ -100,7 +100,7 @@ export default class LintFramework {
this.onScreenTargets().map(async (target) => {
if (!document.contains(target)) {
this.targets.delete(target);
return { target: null as HTMLElement | null, lints: [] as UnpackedLint[] };
return { target: null as HTMLElement | null, lints: {} };
}
const text =
@ -109,11 +109,11 @@ export default class LintFramework {
: target.textContent;
if (!text || text.length > 120000) {
return { target: null as HTMLElement | null, lints: [] as UnpackedLint[] };
return { target: null as HTMLElement | null, lints: {} };
}
const lints = await this.lintProvider(text, window.location.hostname);
return { target: target as HTMLElement, lints };
const lintsBySource = await this.lintProvider(text, window.location.hostname);
return { target: target as HTMLElement, lints: lintsBySource };
}),
);
@ -189,8 +189,12 @@ export default class LintFramework {
requestAnimationFrame(() => {
const boxes = this.lastLints.flatMap(({ target, lints }) =>
target
? lints.flatMap((l) =>
computeLintBoxes(target, l as any, { ignoreLint: this.actions.ignoreLint }),
? Object.entries(lints).flatMap(([ruleName, ls]) =>
ls.flatMap((l) =>
computeLintBoxes(target, l as any, ruleName, {
ignoreLint: this.actions.ignoreLint,
}),
),
)
: [],
);

View file

@ -50,6 +50,7 @@ function header(
color: string,
onClose: () => void,
openOptions?: () => Promise<void>,
rule?: string,
): any {
const closeButton = h(
'button',
@ -79,7 +80,23 @@ function header(
const controlsChildren = settingsButton ? [settingsButton, closeButton] : [closeButton];
const controls = h('div', { className: 'harper-controls' }, controlsChildren);
const titleEl = h('span', {}, title);
const trimmedRule = rule?.trim();
const titleChildren = [title] as any[];
if (trimmedRule) {
titleChildren.push(
h(
'span',
{
className: 'harper-info-icon',
title: trimmedRule,
'aria-label': `Grammar rule: ${trimmedRule}`,
role: 'img',
},
'i',
),
);
}
const titleEl = h('span', { className: 'harper-title' }, titleChildren);
return h(
'div',
@ -201,6 +218,24 @@ function styleTag() {
margin-bottom:4px;
user-select:none
}
.harper-title{
display:flex;
align-items:center;
gap:6px;
}
.harper-info-icon{
display:inline-flex;
align-items:center;
justify-content:center;
width:16px;
height:16px;
border-radius:50%;
background:#eaeef2;
color:#1f2328;
font-size:11px;
font-weight:700;
cursor:default;
}
.harper-body{
font-size:14px;
line-height:20px;
@ -286,6 +321,7 @@ function styleTag() {
box-shadow:0 4px 12px rgba(1,4,9,0.85)
}
.harper-header{color:#e6edf3}
.harper-info-icon{background:#30363d;color:#c9d1d9}
.harper-body{color:#8b949e}
.harper-btn{
background:#21262d;
@ -355,6 +391,7 @@ export default function SuggestionBox(
lintKindColor(box.lint.lint_kind),
close,
actions.openOptions,
box.rule,
),
body(box.lint.message_html),
footer(

View file

@ -18,6 +18,7 @@ function isFormEl(el: HTMLElement): el is HTMLTextAreaElement | HTMLInputElement
export default function computeLintBoxes(
el: HTMLElement,
lint: UnpackedLint,
rule: string,
opts: { ignoreLint?: (hash: string) => Promise<void> },
): IgnorableLintBox[] {
try {
@ -67,6 +68,7 @@ export default function computeLintBoxes(
height: shrunkBox.height,
lint,
source,
rule,
range: range instanceof Range ? range : undefined,
applySuggestion: (sug: UnpackedSuggestion) => {
const current = isFormEl(el)

View file

@ -14,8 +14,11 @@ export type UnpackedLint = {
lint_kind_pretty: string;
suggestions: UnpackedSuggestion[];
context_hash: string;
source: string;
};
export type UnpackedLintGroups = Record<string, UnpackedLint[]>;
export type UnpackedSuggestion = {
kind: SuggestionKind;
/// An empty string if replacement text is not applicable.
@ -23,7 +26,7 @@ export type UnpackedSuggestion = {
};
export default async function unpackLint(
source: string,
text: string,
lint: Lint,
linter: Linter,
): Promise<UnpackedLint> {
@ -38,7 +41,8 @@ export default async function unpackLint(
suggestions: lint.suggestions().map((sug) => {
return { kind: sug.kind(), replacement_text: sug.get_replacement_text() };
}),
context_hash: (await linter.contextHash(source, lint)).toString(),
context_hash: (await linter.contextHash(text, lint)).toString(),
source: text,
};
}

View file

@ -6,6 +6,7 @@ import {
applySuggestion,
LintFramework,
type UnpackedLint,
type UnpackedLintGroups,
type UnpackedSuggestion,
unpackLint,
} from 'lint-framework';
@ -23,15 +24,21 @@ let openSet: Set<number> = new Set();
let lfw = new LintFramework(
async (text) => {
if (!linter) return [];
if (!linter) return {};
const raw = await linter.lint(text);
// The framework expects "unpacked" lints with plain fields
const unpacked = await Promise.all(raw.map((lint) => unpackLint(text, lint, linter)));
const raw = await linter.organizedLints(text);
// The framework expects grouped lints keyed by source
const entries = await Promise.all(
Object.entries(raw).map(async ([source, lintGroup]) => {
const unpacked = await Promise.all(lintGroup.map((lint) => unpackLint(text, lint, linter)));
return [source, unpacked] as const;
}),
);
lints = unpacked;
const grouped: UnpackedLintGroups = Object.fromEntries(entries);
lints = Object.values(grouped).flat();
return unpacked;
return grouped;
},
{
ignoreLint: async (hash: string) => {