mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
feat(core): Add support for Python docstrings (#2038)
* feat(core): Add support for Python docstrings
* Remove unused dependency
* Revert "Remove unused dependency"
This reverts commit 5720b2eced.
* Fix for harper-ls
* Fix handling of multiline strings
* Fix merge artifact
* Formatting fix
* Do not pass quotes for linting
---------
Co-authored-by: Elijah Potter <me@elijahpotter.dev>
This commit is contained in:
parent
84a52e3988
commit
041d5a0b16
15 changed files with 209 additions and 5 deletions
16
harper-python/Cargo.toml
Normal file
16
harper-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "harper-python"
|
||||
version = "0.66.0"
|
||||
edition = "2024"
|
||||
description = "The language checker for developers."
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/automattic/harper"
|
||||
|
||||
[dependencies]
|
||||
harper-core = { path = "../harper-core", version = "0.66.0" }
|
||||
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.66.0" }
|
||||
tree-sitter-python = "0.25.0"
|
||||
tree-sitter = "0.25.10"
|
||||
|
||||
[dev-dependencies]
|
||||
paste = "1.0.15"
|
||||
94
harper-python/src/lib.rs
Normal file
94
harper-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
use harper_core::parsers::{self, Parser, PlainEnglish};
|
||||
use harper_core::{Token, TokenKind};
|
||||
use harper_tree_sitter::TreeSitterMasker;
|
||||
use tree_sitter::Node;
|
||||
|
||||
pub struct PythonParser {
|
||||
/// Used to grab the text nodes.
|
||||
inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
|
||||
}
|
||||
|
||||
impl PythonParser {
|
||||
fn node_condition(n: &Node) -> bool {
|
||||
if n.kind().contains("comment") {
|
||||
return true;
|
||||
}
|
||||
if n.kind() == "string_content"
|
||||
&& let Some(expr_stmt) = parent_is_expression_statement(n)
|
||||
&& (is_module_level_docstring(&expr_stmt)
|
||||
|| is_fn_or_class_docstrings(&expr_stmt)
|
||||
|| is_attribute_docstring(&expr_stmt))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PythonParser {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: parsers::Mask::new(
|
||||
TreeSitterMasker::new(tree_sitter_python::LANGUAGE.into(), Self::node_condition),
|
||||
PlainEnglish,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for PythonParser {
|
||||
fn parse(&self, source: &[char]) -> Vec<Token> {
|
||||
let mut tokens = self.inner.parse(source);
|
||||
|
||||
let mut prev_kind: Option<&TokenKind> = None;
|
||||
|
||||
for token in &mut tokens {
|
||||
if let TokenKind::Space(v) = &mut token.kind {
|
||||
if let Some(TokenKind::Newline(_)) = &prev_kind {
|
||||
// Lines in multiline docstrings are indented with spaces to match the current level.
|
||||
// We need to remove such spaces to avoid triggering French spaces rule.
|
||||
*v = 0;
|
||||
} else {
|
||||
*v = (*v).clamp(0, 1);
|
||||
}
|
||||
}
|
||||
|
||||
prev_kind = Some(&token.kind);
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
}
|
||||
|
||||
fn parent_is_expression_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
|
||||
node.parent()
|
||||
.filter(|n| n.kind() == "string")
|
||||
.and_then(|string_node| string_node.parent())
|
||||
.filter(|n| n.kind() == "expression_statement")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_module_level_docstring(expr_stmt: &Node) -> bool {
|
||||
// (module . (expression_statement (string)))
|
||||
expr_stmt.parent().is_some_and(|n| n.kind() == "module")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_fn_or_class_docstrings(expr_stmt: &Node) -> bool {
|
||||
// (class/func_definition body: (block . (expression_statement (string))))
|
||||
expr_stmt
|
||||
.parent()
|
||||
.filter(|n| n.kind() == "block")
|
||||
.and_then(|n| n.parent())
|
||||
.is_some_and(|n| n.kind() == "function_definition" || n.kind() == "class_definition")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_attribute_docstring(expr_stmt: &Node) -> bool {
|
||||
// ((expression_statement (assignment)) . (expression_statement (string)))
|
||||
expr_stmt
|
||||
.prev_sibling()
|
||||
.filter(|s| s.kind() == "expression_statement")
|
||||
.and_then(|s| s.child(0))
|
||||
.is_some_and(|c| c.kind() == "assignment")
|
||||
}
|
||||
41
harper-python/tests/run_tests.rs
Normal file
41
harper-python/tests/run_tests.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use harper_core::linting::{LintGroup, Linter};
|
||||
use harper_core::spell::FstDictionary;
|
||||
use harper_core::{Dialect, Document};
|
||||
use harper_python::PythonParser;
|
||||
|
||||
/// Creates a unit test checking Python source code parsing.
|
||||
macro_rules! create_test {
|
||||
($filename:ident.$ext:ident, $correct_expected:expr) => {
|
||||
paste::paste! {
|
||||
#[test]
|
||||
fn [<lints_$ext _ $filename _correctly>](){
|
||||
let source = include_str!(
|
||||
concat!(
|
||||
"./test_sources/",
|
||||
concat!(
|
||||
stringify!($filename), ".", stringify!($ext))
|
||||
)
|
||||
);
|
||||
|
||||
let parser = PythonParser::default();
|
||||
let dict = FstDictionary::curated();
|
||||
let document = Document::new(&source, &parser, &dict);
|
||||
|
||||
let mut linter = LintGroup::new_curated(dict, Dialect::American);
|
||||
let lints = linter.lint(&document);
|
||||
|
||||
dbg!(&lints);
|
||||
assert_eq!(lints.len(), $correct_expected);
|
||||
|
||||
// Make sure that all generated tokens span real characters
|
||||
for token in document.tokens(){
|
||||
assert!(token.span.try_get_content(document.get_source()).is_some());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
create_test!(docstrings.py, 4);
|
||||
create_test!(field_docstrings.py, 2);
|
||||
create_test!(comments.py, 1);
|
||||
7
harper-python/tests/test_sources/comments.py
Normal file
7
harper-python/tests/test_sources/comments.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
# This is a camment.
|
||||
|
||||
header = "This is a haeder."
|
||||
|
||||
def main():
|
||||
welcome_message = "Hellom World!"
|
||||
22
harper-python/tests/test_sources/docstrings.py
Normal file
22
harper-python/tests/test_sources/docstrings.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
"""Errors should never passs silently"""
|
||||
def main():
|
||||
"""Beautifull is better than ugly."""
|
||||
|
||||
|
||||
class Main:
|
||||
"""Explicit is better than implicet."""
|
||||
|
||||
def __init__(self):
|
||||
"""Flat is bettter than nested."""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def multiline_docstring(action_name: str):
|
||||
"""Perform the specified action.
|
||||
|
||||
Available actions:
|
||||
- stop
|
||||
- start
|
||||
- pause
|
||||
"""
|
||||
5
harper-python/tests/test_sources/field_docstrings.py
Normal file
5
harper-python/tests/test_sources/field_docstrings.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
class Result:
|
||||
output_path: str
|
||||
"""The path to the autput file."""
|
||||
status: str
|
||||
"""The stotus of the job."""
|
||||
Loading…
Add table
Add a link
Reference in a new issue