mirror of
https://github.com/Automattic/harper.git
synced 2025-08-04 18:48:02 +00:00
feat: Separated out the TreeSitterParser
to own crate for portability
This commit is contained in:
parent
4fb5ad8c82
commit
c02383c6a1
17 changed files with 101 additions and 38 deletions
35
Cargo.lock
generated
35
Cargo.lock
generated
|
@ -550,6 +550,7 @@ dependencies = [
|
|||
"ariadne",
|
||||
"clap",
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -581,6 +582,7 @@ dependencies = [
|
|||
"dirs 5.0.1",
|
||||
"futures",
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
"itertools",
|
||||
"once_cell",
|
||||
"open",
|
||||
|
@ -591,19 +593,6 @@ dependencies = [
|
|||
"tower-lsp",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter",
|
||||
"tree-sitter-c",
|
||||
"tree-sitter-c-sharp",
|
||||
"tree-sitter-cpp",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-lua",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-ruby",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-swift",
|
||||
"tree-sitter-toml",
|
||||
"tree-sitter-typescript",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
|
@ -620,6 +609,26 @@ dependencies = [
|
|||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harper-tree-sitter"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"tree-sitter",
|
||||
"tree-sitter-c",
|
||||
"tree-sitter-c-sharp",
|
||||
"tree-sitter-cpp",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-lua",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-ruby",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-swift",
|
||||
"tree-sitter-toml",
|
||||
"tree-sitter-typescript",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harper-wasm"
|
||||
version = "0.1.0"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[workspace]
|
||||
members = [ "harper-cli", "harper-core", "harper-ls", "harper-serve", "harper-wasm"]
|
||||
members = [ "harper-cli", "harper-core", "harper-ls", "harper-serve", "harper-tree-sitter", "harper-wasm"]
|
||||
resolver = "2"
|
||||
|
||||
[profile.release]
|
||||
|
|
|
@ -11,3 +11,4 @@ anyhow = "1.0.83"
|
|||
ariadne = "0.4.1"
|
||||
clap = { version = "4.5.4", features = ["derive"] }
|
||||
harper-core = { path = "../harper-core", version = "0.8.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
`harper-cli` is a small, experimental frontend for Harper.
|
||||
It can be used in any situation where you might need to check a large number of files automatically (like in continuous integration).
|
||||
|
||||
Right now it is quite feature barren, mainly because the use-case has not been defined yet.
|
||||
Right now it is quite feature barren, mainly because an external use-case has not been defined yet.
|
||||
If you have any thoughts, feel free to reach out.
|
||||
|
||||
## Possible Future Features
|
||||
|
|
|
@ -1,20 +1,32 @@
|
|||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::format_err;
|
||||
use ariadne::{Color, Label, Report, ReportKind, Source};
|
||||
use clap::Parser;
|
||||
use harper_core::{remove_overlaps, Document, FullDictionary, LintGroup, LintGroupConfig, Linter};
|
||||
use harper_tree_sitter::TreeSitterParser;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct Args {
|
||||
/// The Markdown file you wish to grammar check.
|
||||
file: PathBuf
|
||||
/// The file you wish to grammar check.
|
||||
file: PathBuf,
|
||||
/// Whether to merely print out the number of errors encountered, without
|
||||
/// further details.
|
||||
#[arg(short, long)]
|
||||
count: bool
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
let source = std::fs::read_to_string(&args.file)?;
|
||||
let doc = Document::new_markdown(&source);
|
||||
|
||||
let parser = TreeSitterParser::new_from_language_id(
|
||||
filename_to_filetype(&args.file).ok_or(format_err!("Could not detect filetype."))?
|
||||
)
|
||||
.ok_or(format_err!("Could not detect language ID."))?;
|
||||
|
||||
let doc = Document::new(&source, Box::new(parser));
|
||||
|
||||
let mut linter = LintGroup::new(
|
||||
LintGroupConfig::default(),
|
||||
|
@ -22,6 +34,11 @@ fn main() -> anyhow::Result<()> {
|
|||
);
|
||||
let mut lints = linter.lint(&doc);
|
||||
|
||||
if args.count {
|
||||
println!("{}", lints.len());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if lints.is_empty() {
|
||||
println!("No lints found");
|
||||
return Ok(());
|
||||
|
@ -52,3 +69,28 @@ fn main() -> anyhow::Result<()> {
|
|||
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
/// Convert a provided path to a corresponding Language Server Protocol file
|
||||
/// type.
|
||||
///
|
||||
/// Note to contributors: try to keep this in sync with
|
||||
/// [`TreeSitterParser::new_from_language_id`]
|
||||
fn filename_to_filetype(path: &Path) -> Option<&'static str> {
|
||||
Some(match path.extension()?.to_str()? {
|
||||
"rs" => "rust",
|
||||
"ts" => "typescript",
|
||||
"tsx" => "typescriptreact",
|
||||
"js" => "javascript",
|
||||
"jsx" => "javascriptreact",
|
||||
"go" => "go",
|
||||
"c" => "c",
|
||||
"cpp" => "cpp",
|
||||
"h" => "cpp",
|
||||
"rb" => "ruby",
|
||||
"swift" => "swift",
|
||||
"cs" => "csharp",
|
||||
"toml" => "toml",
|
||||
"lua" => "lua",
|
||||
_ => return None
|
||||
})
|
||||
}
|
||||
|
|
|
@ -67,8 +67,6 @@ fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
|
|||
fn starts_with_vowel(word: &[char]) -> bool {
|
||||
let is_likely_initialism = word.iter().all(|c| c.is_uppercase());
|
||||
|
||||
dbg!(word, is_likely_initialism);
|
||||
|
||||
if is_likely_initialism && !word.is_empty() {
|
||||
return matches!(
|
||||
word[0],
|
||||
|
|
|
@ -27,7 +27,7 @@ impl RepeatedWords {
|
|||
|
||||
add_set!(
|
||||
"the", "be", "to", "of", "and", "a", "in", "that", "have", "I", "it", "for", "not",
|
||||
"on", "with", "he", "as", "you", "do", "at", "this", "but", "his", "by", "from",
|
||||
"on", "with", "he", "as", "you", "do", "at", "this", "is", "but", "his", "by", "from",
|
||||
"they", "we", "say", "her", "she", "or", "an", "will", "my", "one", "all", "would",
|
||||
"there", "their", "what", "so", "up", "out", "if", "about", "who", "get", "which",
|
||||
"go", "me", "when", "make", "can", "like", "time", "no", "just", "him", "know", "take",
|
||||
|
|
|
@ -9,23 +9,11 @@ repository = "https://github.com/elijah-potter/harper"
|
|||
|
||||
[dependencies]
|
||||
harper-core = { path = "../harper-core", version = "0.8.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }
|
||||
tower-lsp = "0.20.0"
|
||||
tokio = { version = "1.36.0", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] }
|
||||
clap = { version = "4.5.1", features = ["derive"] }
|
||||
once_cell = "1.19.0"
|
||||
tree-sitter = "0.20.10"
|
||||
tree-sitter-rust = "0.20.4"
|
||||
tree-sitter-typescript = "0.20.3"
|
||||
tree-sitter-python = "0.20.4"
|
||||
tree-sitter-javascript = "0.20.1"
|
||||
tree-sitter-go = "0.20.0"
|
||||
tree-sitter-c = "0.20.7"
|
||||
tree-sitter-cpp = "0.20.5"
|
||||
tree-sitter-ruby = "0.20.1"
|
||||
tree-sitter-swift = "=0.4.0"
|
||||
tree-sitter-c-sharp = "0.20.0"
|
||||
tree-sitter-toml = "0.20.0"
|
||||
tree-sitter-lua = "0.0.19"
|
||||
dirs = "5.0.1"
|
||||
anyhow = "1.0.80"
|
||||
serde_json = "1.0.114"
|
||||
|
|
|
@ -13,6 +13,7 @@ use harper_core::{
|
|||
Token,
|
||||
TokenKind
|
||||
};
|
||||
use harper_tree_sitter::TreeSitterParser;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tower_lsp::jsonrpc::Result;
|
||||
|
@ -52,7 +53,6 @@ use crate::diagnostics::{lint_to_code_actions, lints_to_diagnostics};
|
|||
use crate::dictionary_io::{load_dict, save_dict};
|
||||
use crate::git_commit_parser::GitCommitParser;
|
||||
use crate::pos_conv::range_to_span;
|
||||
use crate::tree_sitter_parser::TreeSitterParser;
|
||||
|
||||
#[derive(Default)]
|
||||
struct DocumentState {
|
||||
|
|
|
@ -3,13 +3,11 @@ use std::io::stderr;
|
|||
use config::Config;
|
||||
use tokio::net::TcpListener;
|
||||
mod backend;
|
||||
mod comment_parsers;
|
||||
mod config;
|
||||
mod diagnostics;
|
||||
mod dictionary_io;
|
||||
mod git_commit_parser;
|
||||
mod pos_conv;
|
||||
mod tree_sitter_parser;
|
||||
|
||||
use backend::Backend;
|
||||
use clap::Parser;
|
||||
|
|
20
harper-tree-sitter/Cargo.toml
Normal file
20
harper-tree-sitter/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "harper-tree-sitter"
|
||||
version = "0.8.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
harper-core = { path = "../harper-core", version = "0.8.0" }
|
||||
tree-sitter = "0.20.10"
|
||||
tree-sitter-rust = "0.20.4"
|
||||
tree-sitter-typescript = "0.20.3"
|
||||
tree-sitter-python = "0.20.4"
|
||||
tree-sitter-javascript = "0.20.1"
|
||||
tree-sitter-go = "0.20.0"
|
||||
tree-sitter-c = "0.20.7"
|
||||
tree-sitter-cpp = "0.20.5"
|
||||
tree-sitter-ruby = "0.20.1"
|
||||
tree-sitter-swift = "=0.4.0"
|
||||
tree-sitter-c-sharp = "0.20.0"
|
||||
tree-sitter-toml = "0.20.0"
|
||||
tree-sitter-lua = "0.0.19"
|
3
harper-tree-sitter/README.md
Normal file
3
harper-tree-sitter/README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# `harper-tree-sitter`
|
||||
|
||||
This crate is a wrapper around `tree-sitter` that allows Harper to parse the comments of a wide variety of programming languages.
|
4
harper-tree-sitter/src/lib.rs
Normal file
4
harper-tree-sitter/src/lib.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
mod comment_parsers;
|
||||
mod tree_sitter_parser;
|
||||
|
||||
pub use tree_sitter_parser::TreeSitterParser;
|
Loading…
Add table
Add a link
Reference in a new issue