mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
feat(core): Add support for Python docstrings (#2038)
* feat(core): Add support for Python docstrings
* Remove unused dependency
* Revert "Remove unused dependency"
This reverts commit 5720b2eced.
* Fix for harper-ls
* Fix handling of multiline strings
* Fix merge artifact
* Formatting fix
* Do not pass quotes for linting
---------
Co-authored-by: Elijah Potter <me@elijahpotter.dev>
This commit is contained in:
parent
84a52e3988
commit
041d5a0b16
15 changed files with 209 additions and 5 deletions
14
Cargo.lock
generated
14
Cargo.lock
generated
|
|
@ -2302,6 +2302,7 @@ dependencies = [
|
|||
"harper-ink",
|
||||
"harper-literate-haskell",
|
||||
"harper-pos-utils",
|
||||
"harper-python",
|
||||
"harper-stats",
|
||||
"harper-typst",
|
||||
"hashbrown 0.16.0",
|
||||
|
|
@ -2336,7 +2337,6 @@ dependencies = [
|
|||
"tree-sitter-lua",
|
||||
"tree-sitter-nix",
|
||||
"tree-sitter-php",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-ruby",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-scala",
|
||||
|
|
@ -2430,6 +2430,7 @@ dependencies = [
|
|||
"harper-html",
|
||||
"harper-ink",
|
||||
"harper-literate-haskell",
|
||||
"harper-python",
|
||||
"harper-stats",
|
||||
"harper-typst",
|
||||
"indexmap",
|
||||
|
|
@ -2464,6 +2465,17 @@ dependencies = [
|
|||
"strum_macros 0.27.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harper-python"
|
||||
version = "0.66.0"
|
||||
dependencies = [
|
||||
"harper-core",
|
||||
"harper-tree-sitter",
|
||||
"paste",
|
||||
"tree-sitter",
|
||||
"tree-sitter-python",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "harper-stats"
|
||||
version = "0.66.0"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[workspace]
|
||||
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink"]
|
||||
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python"]
|
||||
resolver = "2"
|
||||
|
||||
# Comment out the below lines if you plan to use a debugger.
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ clap = { version = "4.5.48", features = ["derive", "string"] }
|
|||
harper-stats = { path = "../harper-stats", version = "0.66.0" }
|
||||
dirs = "6.0.0"
|
||||
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.66.0" }
|
||||
harper-python = { path = "../harper-python", version = "0.66.0" }
|
||||
harper-core = { path = "../harper-core", version = "0.66.0" }
|
||||
harper-pos-utils = { path = "../harper-pos-utils", version = "0.66.0", features = ["threaded"] }
|
||||
harper-comments = { path = "../harper-comments", version = "0.66.0" }
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ use harper_ink::InkParser;
|
|||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
#[cfg(feature = "training")]
|
||||
use harper_pos_utils::{BrillChunker, BrillTagger, BurnChunkerCpu};
|
||||
use harper_python::PythonParser;
|
||||
|
||||
use harper_stats::Stats;
|
||||
use serde::Serialize;
|
||||
|
|
@ -826,6 +827,7 @@ fn load_file(
|
|||
)),
|
||||
Some("org") => Box::new(OrgMode),
|
||||
Some("typ") => Box::new(harper_typst::Typst),
|
||||
Some("py") | Some("pyi") => Box::new(PythonParser::default()),
|
||||
_ => {
|
||||
if let Some(comment_parser) = CommentParser::new_from_filename(file, markdown_options) {
|
||||
Box::new(comment_parser)
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ tree-sitter-kotlin-ng = "1.1.0"
|
|||
tree-sitter-lua = "0.2.0"
|
||||
tree-sitter-nix = "0.3.0"
|
||||
tree-sitter-php = "0.24.2"
|
||||
tree-sitter-python = "0.25.0"
|
||||
tree-sitter-ruby = "0.23.1"
|
||||
tree-sitter-rust = "0.24.0"
|
||||
tree-sitter-scala = "0.24.0"
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ impl CommentParser {
|
|||
"lua" => tree_sitter_lua::LANGUAGE,
|
||||
"nix" => tree_sitter_nix::LANGUAGE,
|
||||
"php" => tree_sitter_php::LANGUAGE_PHP,
|
||||
"python" => tree_sitter_python::LANGUAGE,
|
||||
"ruby" => tree_sitter_ruby::LANGUAGE,
|
||||
"rust" => tree_sitter_rust::LANGUAGE,
|
||||
"scala" => tree_sitter_scala::LANGUAGE,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::{Lint, LintKind, Linter, Suggestion};
|
||||
use crate::Document;
|
||||
use crate::TokenStringExt;
|
||||
use crate::{Document, TokenKind};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct NoFrenchSpaces;
|
||||
|
|
@ -13,6 +13,9 @@ impl Linter for NoFrenchSpaces {
|
|||
if let Some(space_idx) = sentence.iter_space_indices().next() {
|
||||
let space = &sentence[space_idx];
|
||||
|
||||
if matches!(space.kind, TokenKind::Space(0)) {
|
||||
continue;
|
||||
}
|
||||
if space_idx == 0 && space.span.len() != 1 {
|
||||
output.push(Lint {
|
||||
span: space.span,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ harper-core = { path = "../harper-core", version = "0.66.0", features = ["concur
|
|||
harper-comments = { path = "../harper-comments", version = "0.66.0" }
|
||||
harper-typst = { path = "../harper-typst", version = "0.66.0" }
|
||||
harper-html = { path = "../harper-html", version = "0.66.0" }
|
||||
harper-python = { path = "../harper-python", version = "0.66.0" }
|
||||
tower-lsp-server = "0.22.1"
|
||||
tokio = { version = "1.47.1", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] }
|
||||
clap = { version = "4.5.48", features = ["derive"] }
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ use harper_core::{Dialect, DictWordMetadata, Document, IgnoredLints};
|
|||
use harper_html::HtmlParser;
|
||||
use harper_ink::InkParser;
|
||||
use harper_literate_haskell::LiterateHaskellParser;
|
||||
use harper_python::PythonParser;
|
||||
use harper_stats::{Record, Stats};
|
||||
use harper_typst::Typst;
|
||||
use serde_json::Value;
|
||||
|
|
@ -388,6 +389,7 @@ impl Backend {
|
|||
"mail" | "plaintext" | "text" => Some(Box::new(PlainEnglish)),
|
||||
"typst" => Some(Box::new(Typst)),
|
||||
"org" => Some(Box::new(OrgMode)),
|
||||
"python" => Some(Box::new(PythonParser::default())),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
|
|
|
|||
16
harper-python/Cargo.toml
Normal file
16
harper-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "harper-python"
|
||||
version = "0.66.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/automattic/harper"
|
||||
|
||||
[dependencies]
|
||||
harper-core = { path = "../harper-core", version = "0.66.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.66.0" }
|
||||
tree-sitter-python = "0.25.0"
|
||||
tree-sitter = "0.25.10"
|
||||
|
||||
[dev-dependencies]
|
||||
paste = "1.0.15"
|
||||
94
harper-python/src/lib.rs
Normal file
94
harper-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
use harper_core::parsers::{self, Parser, PlainEnglish};
|
||||
use harper_core::{Token, TokenKind};
|
||||
use harper_tree_sitter::TreeSitterMasker;
|
||||
use tree_sitter::Node;
|
||||
|
||||
pub struct PythonParser {
|
||||
/// Used to grab the text nodes.
|
||||
inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
|
||||
}
|
||||
|
||||
impl PythonParser {
|
||||
fn node_condition(n: &Node) -> bool {
|
||||
if n.kind().contains("comment") {
|
||||
return true;
|
||||
}
|
||||
if n.kind() == "string_content"
|
||||
&& let Some(expr_stmt) = parent_is_expression_statement(n)
|
||||
&& (is_module_level_docstring(&expr_stmt)
|
||||
|| is_fn_or_class_docstrings(&expr_stmt)
|
||||
|| is_attribute_docstring(&expr_stmt))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PythonParser {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: parsers::Mask::new(
|
||||
TreeSitterMasker::new(tree_sitter_python::LANGUAGE.into(), Self::node_condition),
|
||||
PlainEnglish,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for PythonParser {
|
||||
fn parse(&self, source: &[char]) -> Vec<Token> {
|
||||
let mut tokens = self.inner.parse(source);
|
||||
|
||||
let mut prev_kind: Option<&TokenKind> = None;
|
||||
|
||||
for token in &mut tokens {
|
||||
if let TokenKind::Space(v) = &mut token.kind {
|
||||
if let Some(TokenKind::Newline(_)) = &prev_kind {
|
||||
// Lines in multiline docstrings are indented with spaces to match the current level.
|
||||
// We need to remove such spaces to avoid triggering French spaces rule.
|
||||
*v = 0;
|
||||
} else {
|
||||
*v = (*v).clamp(0, 1);
|
||||
}
|
||||
}
|
||||
|
||||
prev_kind = Some(&token.kind);
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
}
|
||||
|
||||
fn parent_is_expression_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
|
||||
node.parent()
|
||||
.filter(|n| n.kind() == "string")
|
||||
.and_then(|string_node| string_node.parent())
|
||||
.filter(|n| n.kind() == "expression_statement")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_module_level_docstring(expr_stmt: &Node) -> bool {
|
||||
// (module . (expression_statement (string)))
|
||||
expr_stmt.parent().is_some_and(|n| n.kind() == "module")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_fn_or_class_docstrings(expr_stmt: &Node) -> bool {
|
||||
// (class/func_definition body: (block . (expression_statement (string))))
|
||||
expr_stmt
|
||||
.parent()
|
||||
.filter(|n| n.kind() == "block")
|
||||
.and_then(|n| n.parent())
|
||||
.is_some_and(|n| n.kind() == "function_definition" || n.kind() == "class_definition")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_attribute_docstring(expr_stmt: &Node) -> bool {
|
||||
// ((expression_statement (assignment)) . (expression_statement (string)))
|
||||
expr_stmt
|
||||
.prev_sibling()
|
||||
.filter(|s| s.kind() == "expression_statement")
|
||||
.and_then(|s| s.child(0))
|
||||
.is_some_and(|c| c.kind() == "assignment")
|
||||
}
|
||||
41
harper-python/tests/run_tests.rs
Normal file
41
harper-python/tests/run_tests.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::spell::FstDictionary;
|
||||
use harper_core::{Dialect, Document};
|
||||
use harper_python::PythonParser;
|
||||
|
||||
/// Creates a unit test checking Python source code parsing.
|
||||
macro_rules! create_test {
|
||||
($filename:ident.$ext:ident, $correct_expected:expr) => {
|
||||
paste::paste! {
|
||||
#[test]
|
||||
fn [<lints_$ext _ $filename _correctly>](){
|
||||
let source = include_str!(
|
||||
concat!(
|
||||
"./test_sources/",
|
||||
concat!(
|
||||
stringify!($filename), ".", stringify!($ext))
|
||||
)
|
||||
);
|
||||
|
||||
let parser = PythonParser::default();
|
||||
let dict = FstDictionary::curated();
|
||||
let document = Document::new(&source, &parser, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
assert_eq!(lints.len(), $correct_expected);
|
||||
|
||||
// Make sure that all generated tokens span real characters
|
||||
for token in document.tokens(){
|
||||
assert!(token.span.try_get_content(document.get_source()).is_some());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
create_test!(docstrings.py, 4);
|
||||
create_test!(field_docstrings.py, 2);
|
||||
create_test!(comments.py, 1);
|
||||
7
harper-python/tests/test_sources/comments.py
Normal file
7
harper-python/tests/test_sources/comments.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
# This is a camment.
|
||||
|
||||
header = "This is a haeder."
|
||||
|
||||
def main():
|
||||
welcome_message = "Hellom World!"
|
||||
22
harper-python/tests/test_sources/docstrings.py
Normal file
22
harper-python/tests/test_sources/docstrings.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
"""Errors should never passs silently"""
|
||||
def main():
|
||||
"""Beautifull is better than ugly."""
|
||||
|
||||
|
||||
class Main:
|
||||
"""Explicit is better than implicet."""
|
||||
|
||||
def __init__(self):
|
||||
"""Flat is bettter than nested."""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def multiline_docstring(action_name: str):
|
||||
"""Perform the specified action.
|
||||
|
||||
Available actions:
|
||||
- stop
|
||||
- start
|
||||
- pause
|
||||
"""
|
||||
5
harper-python/tests/test_sources/field_docstrings.py
Normal file
5
harper-python/tests/test_sources/field_docstrings.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
class Result:
|
||||
output_path: str
|
||||
"""The path to the autput file."""
|
||||
status: str
|
||||
"""The stotus of the job."""
|
||||
Loading…
Add table
Add a link
Reference in a new issue