feat: Separated out the TreeSitterParser to own crate for portability

This commit is contained in:
Elijah Potter 2024-06-22 17:07:09 -06:00
parent 4fb5ad8c82
commit c02383c6a1
17 changed files with 101 additions and 38 deletions

35
Cargo.lock generated
View file

@ -550,6 +550,7 @@ dependencies = [
"ariadne",
"clap",
"harper-core",
"harper-tree-sitter",
]
[[package]]
@ -581,6 +582,7 @@ dependencies = [
"dirs 5.0.1",
"futures",
"harper-core",
"harper-tree-sitter",
"itertools",
"once_cell",
"open",
@ -591,19 +593,6 @@ dependencies = [
"tower-lsp",
"tracing",
"tracing-subscriber",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-c-sharp",
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-javascript",
"tree-sitter-lua",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-swift",
"tree-sitter-toml",
"tree-sitter-typescript",
"unicode-width",
]
@ -620,6 +609,26 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "harper-tree-sitter"
version = "0.8.0"
dependencies = [
"harper-core",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-c-sharp",
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-javascript",
"tree-sitter-lua",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-swift",
"tree-sitter-toml",
"tree-sitter-typescript",
]
[[package]]
name = "harper-wasm"
version = "0.1.0"

View file

@ -1,5 +1,5 @@
[workspace]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-serve", "harper-wasm"]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-serve", "harper-tree-sitter", "harper-wasm"]
resolver = "2"
[profile.release]

View file

@ -11,3 +11,4 @@ anyhow = "1.0.83"
ariadne = "0.4.1"
clap = { version = "4.5.4", features = ["derive"] }
harper-core = { path = "../harper-core", version = "0.8.0" }
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }

View file

@ -5,7 +5,7 @@
`harper-cli` is a small, experimental frontend for Harper.
It can be used in any situation where you might need to check a large number of files automatically (like in continuous integration).
Right now it is quite feature barren, mainly because the use-case has not been defined yet.
Right now it is quite feature barren, mainly because an external use-case has not been defined yet.
If you have any thoughts, feel free to reach out.
## Possible Future Features

View file

@ -1,20 +1,32 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use anyhow::format_err;
use ariadne::{Color, Label, Report, ReportKind, Source};
use clap::Parser;
use harper_core::{remove_overlaps, Document, FullDictionary, LintGroup, LintGroupConfig, Linter};
use harper_tree_sitter::TreeSitterParser;
#[derive(Debug, Parser)]
struct Args {
/// The Markdown file you wish to grammar check.
file: PathBuf
/// The file you wish to grammar check.
file: PathBuf,
/// Whether to merely print out the number of errors encountered, without
/// further details.
#[arg(short, long)]
count: bool
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
let source = std::fs::read_to_string(&args.file)?;
let doc = Document::new_markdown(&source);
let parser = TreeSitterParser::new_from_language_id(
filename_to_filetype(&args.file).ok_or(format_err!("Could not detect filetype."))?
)
.ok_or(format_err!("Could not detect language ID."))?;
let doc = Document::new(&source, Box::new(parser));
let mut linter = LintGroup::new(
LintGroupConfig::default(),
@ -22,6 +34,11 @@ fn main() -> anyhow::Result<()> {
);
let mut lints = linter.lint(&doc);
if args.count {
println!("{}", lints.len());
return Ok(());
}
if lints.is_empty() {
println!("No lints found");
return Ok(());
@ -52,3 +69,28 @@ fn main() -> anyhow::Result<()> {
std::process::exit(1);
}
/// Convert a provided path to a corresponding Language Server Protocol file
/// type.
///
/// Note to contributors: try to keep this in sync with
/// [`TreeSitterParser::new_from_language_id`]
fn filename_to_filetype(path: &Path) -> Option<&'static str> {
Some(match path.extension()?.to_str()? {
"rs" => "rust",
"ts" => "typescript",
"tsx" => "typescriptreact",
"js" => "javascript",
"jsx" => "javascriptreact",
"go" => "go",
"c" => "c",
"cpp" => "cpp",
"h" => "cpp",
"rb" => "ruby",
"swift" => "swift",
"cs" => "csharp",
"toml" => "toml",
"lua" => "lua",
_ => return None
})
}

View file

@ -67,8 +67,6 @@ fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
fn starts_with_vowel(word: &[char]) -> bool {
let is_likely_initialism = word.iter().all(|c| c.is_uppercase());
dbg!(word, is_likely_initialism);
if is_likely_initialism && !word.is_empty() {
return matches!(
word[0],

View file

@ -27,7 +27,7 @@ impl RepeatedWords {
add_set!(
"the", "be", "to", "of", "and", "a", "in", "that", "have", "I", "it", "for", "not",
"on", "with", "he", "as", "you", "do", "at", "this", "but", "his", "by", "from",
"on", "with", "he", "as", "you", "do", "at", "this", "is", "but", "his", "by", "from",
"they", "we", "say", "her", "she", "or", "an", "will", "my", "one", "all", "would",
"there", "their", "what", "so", "up", "out", "if", "about", "who", "get", "which",
"go", "me", "when", "make", "can", "like", "time", "no", "just", "him", "know", "take",

View file

@ -9,23 +9,11 @@ repository = "https://github.com/elijah-potter/harper"
[dependencies]
harper-core = { path = "../harper-core", version = "0.8.0" }
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }
tower-lsp = "0.20.0"
tokio = { version = "1.36.0", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] }
clap = { version = "4.5.1", features = ["derive"] }
once_cell = "1.19.0"
tree-sitter = "0.20.10"
tree-sitter-rust = "0.20.4"
tree-sitter-typescript = "0.20.3"
tree-sitter-python = "0.20.4"
tree-sitter-javascript = "0.20.1"
tree-sitter-go = "0.20.0"
tree-sitter-c = "0.20.7"
tree-sitter-cpp = "0.20.5"
tree-sitter-ruby = "0.20.1"
tree-sitter-swift = "=0.4.0"
tree-sitter-c-sharp = "0.20.0"
tree-sitter-toml = "0.20.0"
tree-sitter-lua = "0.0.19"
dirs = "5.0.1"
anyhow = "1.0.80"
serde_json = "1.0.114"

View file

@ -13,6 +13,7 @@ use harper_core::{
Token,
TokenKind
};
use harper_tree_sitter::TreeSitterParser;
use serde_json::Value;
use tokio::sync::{Mutex, RwLock};
use tower_lsp::jsonrpc::Result;
@ -52,7 +53,6 @@ use crate::diagnostics::{lint_to_code_actions, lints_to_diagnostics};
use crate::dictionary_io::{load_dict, save_dict};
use crate::git_commit_parser::GitCommitParser;
use crate::pos_conv::range_to_span;
use crate::tree_sitter_parser::TreeSitterParser;
#[derive(Default)]
struct DocumentState {

View file

@ -3,13 +3,11 @@ use std::io::stderr;
use config::Config;
use tokio::net::TcpListener;
mod backend;
mod comment_parsers;
mod config;
mod diagnostics;
mod dictionary_io;
mod git_commit_parser;
mod pos_conv;
mod tree_sitter_parser;
use backend::Backend;
use clap::Parser;

View file

@ -0,0 +1,20 @@
[package]
name = "harper-tree-sitter"
version = "0.8.0"
edition = "2021"
[dependencies]
harper-core = { path = "../harper-core", version = "0.8.0" }
tree-sitter = "0.20.10"
tree-sitter-rust = "0.20.4"
tree-sitter-typescript = "0.20.3"
tree-sitter-python = "0.20.4"
tree-sitter-javascript = "0.20.1"
tree-sitter-go = "0.20.0"
tree-sitter-c = "0.20.7"
tree-sitter-cpp = "0.20.5"
tree-sitter-ruby = "0.20.1"
tree-sitter-swift = "=0.4.0"
tree-sitter-c-sharp = "0.20.0"
tree-sitter-toml = "0.20.0"
tree-sitter-lua = "0.0.19"

View file

@ -0,0 +1,3 @@
# `harper-tree-sitter`
This crate is a wrapper around `tree-sitter` that allows Harper to parse the comments of a wide variety of programming languages.

View file

@ -0,0 +1,4 @@
mod comment_parsers;
mod tree_sitter_parser;
pub use tree_sitter_parser::TreeSitterParser;