feat: standalone linter (#50)

Allows the server to act as a standalone linter for query files. Useful
for CI.

Note that the linter ignores Rust-binding specific predicate errors,
since predicate usage is largely dependent on the implementation on the
consumer's side.
This commit is contained in:
Riley Bruins 2025-03-10 15:56:10 -07:00 committed by GitHub
parent d5330a005b
commit 5e25fd7d55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 133 additions and 49 deletions

View file

@ -72,12 +72,38 @@ vim.api.nvim_create_autocmd('FileType', {
## Standalone Tool
### Formatter
The language server can be used as a standalone formatter by passing the
`format` argument, e.g. `ts_query_ls format ./queries --mode write`. The command
can accept multiple directories to format, and must be passed a "mode" of either
`write` or `check`. The mode determines whether the files will be overwritten or
just checked for proper formatting.
```sh
# use this command for the full documentation
ts_query_ls format --help
```
### Linter
The formatter can also be used as standalone linter by passing the `check`
argument, e.g:
```sh
ts_query_ls check ./queries --config \
'{"parser_install_directories": ["/home/jdoe/Documents/parsers/"]}'
```
The command expects a list of directories to search for queries, as well as a
flag to pass JSON configuration to the server (needed to detect parser
locations).
```sh
# use this command for the full documentation
ts_query_ls check --help
```
## Checklist
- [x] References for captures

View file

@ -1,14 +1,13 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use log::info;
use regex::Regex;
use ropey::Rope;
use tower_lsp::lsp_types::DidOpenTextDocumentParams;
use tree_sitter::Parser;
use crate::{
util::{get_diagnostics, get_language, TextProviderRope},
Backend, SymbolInfo, ENGINE, QUERY_LANGUAGE,
Backend, SymbolInfo, QUERY_LANGUAGE,
};
pub async fn did_open(backend: &Backend, params: DidOpenTextDocumentParams) {
@ -25,50 +24,19 @@ pub async fn did_open(backend: &Backend, params: DidOpenTextDocumentParams) {
.cst_map
.insert(uri.clone(), parser.parse(&contents, None).unwrap());
// Get language, if it exists
let mut lang = None;
if let Ok(options) = backend.options.read() {
let mut language_retrieval_regexes: Vec<Regex> = options
.language_retrieval_patterns
.clone()
.unwrap_or(vec![])
.iter()
.map(|r| Regex::new(r).unwrap())
.collect();
language_retrieval_regexes.push(Regex::new(r"queries/([^/]+)/[^/]+\.scm$").unwrap());
language_retrieval_regexes
.push(Regex::new(r"tree-sitter-([^/]+)/queries/[^/]+\.scm$").unwrap());
let mut captures = None;
for re in language_retrieval_regexes {
if let Some(caps) = re.captures(uri.as_str()) {
captures = Some(caps);
break;
}
}
lang = captures
.and_then(|captures| captures.get(1))
.and_then(|cap| {
let cap_str = cap.as_str();
get_language(
options
.parser_aliases
.as_ref()
.and_then(|map| map.get(cap_str))
.unwrap_or(&cap_str.to_owned())
.as_str(),
&options.parser_install_directories,
&ENGINE,
)
});
}
// Initialize language info
let mut symbols_vec: Vec<SymbolInfo> = vec![];
let mut symbols_set: HashSet<SymbolInfo> = HashSet::new();
let mut fields_vec: Vec<String> = vec![];
let mut fields_set: HashSet<String> = HashSet::new();
let mut supertype_map: HashMap<SymbolInfo, BTreeSet<SymbolInfo>> = HashMap::new();
if let Some(lang) = lang {
if let Some(lang) = &backend
.options
.read()
.ok()
.as_ref()
.and_then(|options| get_language(uri, options))
{
let error_symbol = SymbolInfo {
label: "ERROR".to_owned(),
named: true,

View file

@ -28,7 +28,7 @@ use tower_lsp::{
},
Client, LanguageServer, LspService, Server,
};
use tree_sitter::{wasmtime::Engine, Language, Tree};
use tree_sitter::{wasmtime::Engine, Language, Query, QueryErrorKind, Tree};
use handlers::*;
@ -180,7 +180,7 @@ impl LanguageServer for Backend {
)]
struct Arguments {
#[command(subcommand)]
format: Option<Commands>,
commands: Option<Commands>,
}
#[derive(clap::ValueEnum, Clone, Debug)]
@ -200,6 +200,15 @@ enum Commands {
#[arg(long, short)]
mode: Mode,
},
/// Check the query files in the given directories for errors
Check {
/// List of directories to check
directories: Vec<PathBuf>,
/// String representing server's JSON configuration
#[arg(long, short)]
config: String,
},
}
fn get_scm_files(directories: &[PathBuf]) -> Vec<PathBuf> {
@ -253,11 +262,49 @@ fn format_directories(directories: &[PathBuf], mode: Mode) -> i32 {
}
} else {
eprintln!("Failed to read {:?}", path.canonicalize().unwrap());
exit_code.store(1, std::sync::atomic::Ordering::Relaxed);
}
});
exit_code.load(std::sync::atomic::Ordering::Relaxed)
}
fn check_directories(directories: &[PathBuf], config: String) -> i32 {
let Ok(options) = serde_json::from_str::<Options>(&config) else {
eprintln!("Could not parse the provided configuration");
return 1;
};
let exit_code = AtomicI32::new(0);
let scm_files = get_scm_files(directories);
scm_files.par_iter().for_each(|path| {
let uri = Url::from_file_path(path.canonicalize().unwrap()).unwrap();
if let Some(lang) = util::get_language(&uri, &options) {
if let Ok(source) = fs::read_to_string(path) {
if let Err(err) = Query::new(&lang, source.as_str()) {
match err.kind {
QueryErrorKind::Predicate => {
// Ignore predicate errors, which depend on the implementation.
}
_ => {
eprintln!("In {:?}:\n{}\n", path.canonicalize().unwrap(), err);
exit_code.store(1, std::sync::atomic::Ordering::Relaxed);
}
}
}
} else {
eprintln!("Failed to read {:?}", path.canonicalize().unwrap());
exit_code.store(1, std::sync::atomic::Ordering::Relaxed);
}
} else {
exit_code.store(1, std::sync::atomic::Ordering::Relaxed);
eprintln!(
"Could not retrieve language for {:?}",
path.canonicalize().unwrap()
)
};
});
exit_code.load(std::sync::atomic::Ordering::Relaxed)
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
@ -266,8 +313,15 @@ async fn main() {
.init();
let args = Arguments::parse();
if let Some(Commands::Format { directories, mode }) = args.format {
std::process::exit(format_directories(&directories, mode));
match args.commands {
Some(Commands::Format { directories, mode }) => {
std::process::exit(format_directories(&directories, mode));
}
Some(Commands::Check {
directories,
config,
}) => std::process::exit(check_directories(&directories, config)),
_ => {}
}
let stdin = tokio::io::stdin();

View file

@ -12,14 +12,14 @@ use ropey::Rope;
use serde_json::Value;
use streaming_iterator::StreamingIterator;
use tower_lsp::lsp_types::{
Diagnostic, DiagnosticSeverity, Position, Range, TextDocumentContentChangeEvent, TextEdit,
Diagnostic, DiagnosticSeverity, Position, Range, TextDocumentContentChangeEvent, TextEdit, Url,
};
use tree_sitter::{
wasmtime::Engine, InputEdit, Language, Node, Point, Query, QueryCursor, QueryMatch,
QueryPredicateArg, TextProvider, Tree, TreeCursor, WasmStore,
};
use crate::{Backend, Options, SymbolInfo, QUERY_LANGUAGE};
use crate::{Backend, Options, SymbolInfo, ENGINE, QUERY_LANGUAGE};
lazy_static! {
static ref LINE_START: Regex = Regex::new(r"^([^\S\r\n]*)").unwrap();
@ -453,7 +453,43 @@ pub fn lsp_textdocchange_to_ts_inputedit(
const DYLIB_EXTENSIONS: [&str; 3] = [".so", ".dll", ".dylib"];
pub fn get_language(
pub fn get_language(uri: &Url, options: &Options) -> Option<Language> {
let mut language_retrieval_regexes: Vec<Regex> = options
.language_retrieval_patterns
.clone()
.unwrap_or_default()
.iter()
.map(|r| Regex::new(r).unwrap())
.collect();
language_retrieval_regexes.push(Regex::new(r"queries/([^/]+)/[^/]+\.scm$").unwrap());
language_retrieval_regexes
.push(Regex::new(r"tree-sitter-([^/]+)/queries/[^/]+\.scm$").unwrap());
let mut captures = None;
for re in language_retrieval_regexes {
if let Some(caps) = re.captures(uri.as_str()) {
captures = Some(caps);
break;
}
}
let lang = captures
.and_then(|captures| captures.get(1))
.and_then(|cap| {
let cap_str = cap.as_str();
get_language_object(
options
.parser_aliases
.as_ref()
.and_then(|map| map.get(cap_str))
.unwrap_or(&cap_str.to_owned())
.as_str(),
&options.parser_install_directories,
&ENGINE,
)
});
lang
}
pub fn get_language_object(
name: &str,
directories: &Option<Vec<String>>,
engine: &Engine,
@ -478,7 +514,7 @@ pub fn get_language(
return Some(language);
}
}
if let Some(lang) = get_language_wasm(name.as_str(), directory, engine) {
if let Some(lang) = get_language_object_wasm(name.as_str(), directory, engine) {
return Some(lang);
}
}
@ -486,7 +522,7 @@ pub fn get_language(
None
}
fn get_language_wasm(name: &str, directory: &String, engine: &Engine) -> Option<Language> {
fn get_language_object_wasm(name: &str, directory: &String, engine: &Engine) -> Option<Language> {
let object_name = format!("tree-sitter-{name}.wasm");
// NOTE: If WasmStore could be passed around threads safely, we could just create one global
// store and put all of the WASM modules in there.