ruff/crates/ruff_python_formatter/src/comments/mod.rs
2023-06-05 08:24:00 +00:00

889 lines
24 KiB
Rust

//! Types for extracting and representing comments of a syntax tree.
//!
//! Most programming languages support comments allowing programmers to document their programs.
//! Comments are different from other syntax because programming languages allow comments in almost any position,
//! giving programmers great flexibility on where they can write comments:
//!
//! ```javascript
//! /**
//! * Documentation comment
//! */
//! async /* comment */ function Test () // line comment
//! {/*inline*/}
//! ```
//!
//! This flexibility makes formatting comments challenging because:
//! * The formatter must consistently place comments so that re-formatting the output yields the same result,
//! and does not create invalid syntax (line comments).
//! * It is essential that formatters place comments close to the syntax the programmer intended to document.
//! However, the lack of rules regarding where comments are allowed and what syntax they document requires
//! the use of heuristics to infer the documented syntax.
//!
//! This module tries to strike a balance between placing comments as closely as possible to their source location
//! and reducing the complexity of formatting comments. It does so by associating comments per node rather than a token.
//! This greatly reduces the combinations of possible comment positions, but turns out to be, in practice,
//! sufficiently precise to keep comments close to their source location.
//!
//! Luckily, Python doesn't support inline comments, which simplifying the problem significantly.
//!
//! ## Node comments
//!
//! Comments are associated per node but get further distinguished on their location related to that node:
//!
//! ### Leading Comments
//!
//! A comment at the start of a node
//!
//! ```python
//! # Leading comment of the statement
//! print("test");
//!
//! [ # Leading comment of a
//! a
//! ];
//! ```
//!
//! ### Dangling Comments
//!
//! A comment that is neither at the start nor the end of a node.
//!
//! ```python
//! [
//! # I'm between two brackets. There are no nodes
//! ];
//! ```
//!
//! ### Trailing Comments
//!
//! A comment at the end of a node.
//!
//! ```python
//! [
//! a, # trailing comment of a
//! b, c
//! ];
//! ```
//!
//! ## Limitations
//! Limiting the placement of comments to leading, dangling, or trailing node comments reduces complexity inside the formatter but means,
//! that the formatter's possibility of where comments can be formatted depends on the AST structure.
//!
//! For example, *`RustPython`* doesn't create a node for the `/` operator separating positional only arguments from the other arguments.
//!
//! ```python
//! def test(
//! a,
//! /, # The following arguments are positional or named arguments
//! b
//! ):
//! pass
//! ```
//!
//! Because *`RustPython`* doesn't create a Node for the `/` argument, it is impossible to associate any
//! comments with it. Meaning, the default behaviour is to associate the `# The following ...` comment
//! with the `b` argument, which is incorrect. This limitation can be worked around by implementing
//! a custom rule to associate comments for `/` as *dangling comments* of the `Arguments` node and then
//! implement custom formatting inside of the arguments formatter.
//!
//! It is possible to add an additional optional label to [`SourceComment`] If ever the need arises to distinguish two *dangling comments* in the formatting logic,
use rustpython_parser::ast::Mod;
use std::cell::Cell;
use std::fmt::Debug;
use std::rc::Rc;
mod debug;
mod format;
mod map;
mod node_key;
mod placement;
mod visitor;
use crate::comments::debug::{DebugComment, DebugComments};
use crate::comments::map::MultiMap;
use crate::comments::node_key::NodeRefEqualityKey;
use crate::comments::visitor::CommentsVisitor;
pub(crate) use format::{
dangling_node_comments, leading_alternate_branch_comments, leading_node_comments,
trailing_comments, trailing_node_comments,
};
use ruff_formatter::{SourceCode, SourceCodeSlice};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::source_code::CommentRanges;
/// A comment in the source document.
#[derive(Debug, Clone)]
pub(crate) struct SourceComment {
/// The location of the comment in the source document.
slice: SourceCodeSlice,
/// Whether the comment has been formatted or not.
#[cfg(debug_assertions)]
formatted: Cell<bool>,
position: CommentTextPosition,
}
impl SourceComment {
/// Returns the location of the comment in the original source code.
/// Allows retrieving the text of the comment.
pub(crate) fn slice(&self) -> &SourceCodeSlice {
&self.slice
}
pub(crate) fn position(&self) -> CommentTextPosition {
self.position
}
#[cfg(not(debug_assertions))]
#[inline(always)]
pub fn mark_formatted(&self) {}
/// Marks the comment as formatted
#[cfg(debug_assertions)]
pub(crate) fn mark_formatted(&self) {
self.formatted.set(true);
}
}
impl SourceComment {
/// Returns a nice debug representation that prints the source code for every comment (and not just the range).
pub(crate) fn debug<'a>(&'a self, source_code: SourceCode<'a>) -> DebugComment<'a> {
DebugComment::new(self, source_code)
}
}
/// The position of a comment in the source text.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum CommentTextPosition {
/// A comment that is on the same line as the preceding token and is separated by at least one line break from the following token.
///
/// # Examples
///
/// ## End of line
///
/// ```python
/// a; # comment
/// b;
/// ```
///
/// `# comment` is an end of line comments because it is separated by at least one line break from the following token `b`.
/// Comments that not only end, but also start on a new line are [`OwnLine`](CommentTextPosition::OwnLine) comments.
EndOfLine,
/// A Comment that is separated by at least one line break from the preceding token.
///
/// # Examples
///
/// ```python
/// a;
/// # comment
/// b;
/// ```
///
/// `# comment` line comments because they are separated by one line break from the preceding token `a`.
OwnLine,
}
impl CommentTextPosition {
pub(crate) const fn is_own_line(self) -> bool {
matches!(self, CommentTextPosition::OwnLine)
}
pub(crate) const fn is_end_of_line(self) -> bool {
matches!(self, CommentTextPosition::EndOfLine)
}
}
type CommentsMap<'a> = MultiMap<NodeRefEqualityKey<'a>, SourceComment>;
/// The comments of a syntax tree stored by node.
///
/// Cloning `comments` is cheap as it only involves bumping a reference counter.
#[derive(Clone, Default)]
pub(crate) struct Comments<'a> {
/// The implementation uses an [Rc] so that [Comments] has a lifetime independent from the [crate::Formatter].
/// Independent lifetimes are necessary to support the use case where a (formattable object)[crate::Format]
/// iterates over all comments, and writes them into the [crate::Formatter] (mutably borrowing the [crate::Formatter] and in turn its context).
///
/// ```block
/// for leading in f.context().comments().leading_comments(node) {
/// ^
/// |- Borrows comments
/// write!(f, [comment(leading.piece.text())])?;
/// ^
/// |- Mutably borrows the formatter, state, context, and comments (if comments aren't cloned)
/// }
/// ```
///
/// The use of an `Rc` solves this problem because we can cheaply clone `comments` before iterating.
///
/// ```block
/// let comments = f.context().comments().clone();
/// for leading in comments.leading_comments(node) {
/// write!(f, [comment(leading.piece.text())])?;
/// }
/// ```
data: Rc<CommentsData<'a>>,
}
#[allow(unused)]
// TODO(micha): Remove after using the new comments infrastructure in the formatter.
impl<'a> Comments<'a> {
fn new(comments: CommentsMap<'a>) -> Self {
Self {
data: Rc::new(CommentsData { comments }),
}
}
/// Extracts the comments from the AST.
pub(crate) fn from_ast(
root: &'a Mod,
source_code: SourceCode<'a>,
comment_ranges: &'a CommentRanges,
) -> Self {
let map = if comment_ranges.is_empty() {
CommentsMap::new()
} else {
CommentsVisitor::new(source_code, comment_ranges).visit(root)
};
Self::new(map)
}
#[inline]
pub(crate) fn has_comments(&self, node: AnyNodeRef) -> bool {
self.data.comments.has(&NodeRefEqualityKey::from_ref(node))
}
/// Returns `true` if the given `node` has any [leading comments](self#leading-comments).
#[inline]
pub(crate) fn has_leading_comments(&self, node: AnyNodeRef) -> bool {
!self.leading_comments(node).is_empty()
}
/// Returns the `node`'s [leading comments](self#leading-comments).
#[inline]
pub(crate) fn leading_comments(&self, node: AnyNodeRef<'a>) -> &[SourceComment] {
self.data
.comments
.leading(&NodeRefEqualityKey::from_ref(node))
}
/// Returns `true` if node has any [dangling comments](self#dangling-comments).
pub(crate) fn has_dangling_comments(&self, node: AnyNodeRef<'a>) -> bool {
!self.dangling_comments(node).is_empty()
}
/// Returns the [dangling comments](self#dangling-comments) of `node`
pub(crate) fn dangling_comments(&self, node: AnyNodeRef<'a>) -> &[SourceComment] {
self.data
.comments
.dangling(&NodeRefEqualityKey::from_ref(node))
}
/// Returns the `node`'s [trailing comments](self#trailing-comments).
#[inline]
pub(crate) fn trailing_comments(&self, node: AnyNodeRef<'a>) -> &[SourceComment] {
self.data
.comments
.trailing(&NodeRefEqualityKey::from_ref(node))
}
/// Returns `true` if the given `node` has any [trailing comments](self#trailing-comments).
#[inline]
pub(crate) fn has_trailing_comments(&self, node: AnyNodeRef) -> bool {
!self.trailing_comments(node).is_empty()
}
/// Returns an iterator over the [leading](self#leading-comments) and [trailing comments](self#trailing-comments) of `node`.
pub(crate) fn leading_trailing_comments(
&self,
node: AnyNodeRef<'a>,
) -> impl Iterator<Item = &SourceComment> {
self.leading_comments(node)
.iter()
.chain(self.trailing_comments(node).iter())
}
/// Returns an iterator over the [leading](self#leading-comments), [dangling](self#dangling-comments), and [trailing](self#trailing) comments of `node`.
pub(crate) fn leading_dangling_trailing_comments(
&self,
node: AnyNodeRef<'a>,
) -> impl Iterator<Item = &SourceComment> {
self.data
.comments
.parts(&NodeRefEqualityKey::from_ref(node))
}
#[inline(always)]
#[cfg(not(debug_assertions))]
pub(crate) fn assert_formatted_all_comments(&self, _source_code: SourceCode) {}
#[cfg(debug_assertions)]
pub(crate) fn assert_formatted_all_comments(&self, source_code: SourceCode) {
use std::fmt::Write;
let mut output = String::new();
let unformatted_comments = self
.data
.comments
.all_parts()
.filter(|c| !c.formatted.get());
for comment in unformatted_comments {
// SAFETY: Writing to a string never fails.
writeln!(output, "{:#?}", comment.debug(source_code)).unwrap();
}
assert!(
output.is_empty(),
"The following comments have not been formatted.\n{output}"
);
}
/// Returns an object that implements [Debug] for nicely printing the [`Comments`].
pub(crate) fn debug(&'a self, source_code: SourceCode<'a>) -> DebugComments<'a> {
DebugComments::new(&self.data.comments, source_code)
}
}
#[derive(Default)]
struct CommentsData<'a> {
comments: CommentsMap<'a>,
}
#[cfg(test)]
mod tests {
use crate::comments::Comments;
use insta::assert_debug_snapshot;
use ruff_formatter::SourceCode;
use ruff_python_ast::prelude::*;
use ruff_python_ast::source_code::{CommentRanges, CommentRangesBuilder};
use rustpython_parser::lexer::lex;
use rustpython_parser::{parse_tokens, Mode};
struct CommentsTestCase<'a> {
module: Mod,
comment_ranges: CommentRanges,
source_code: SourceCode<'a>,
}
impl<'a> CommentsTestCase<'a> {
fn from_code(code: &'a str) -> Self {
let source_code = SourceCode::new(code);
let tokens: Vec<_> = lex(code, Mode::Module).collect();
let mut comment_ranges = CommentRangesBuilder::default();
for (token, range) in tokens.iter().flatten() {
comment_ranges.visit_token(token, *range);
}
let comment_ranges = comment_ranges.finish();
let parsed = parse_tokens(tokens.into_iter(), Mode::Module, "test.py")
.expect("Expect source to be valid Python");
CommentsTestCase {
source_code,
module: parsed,
comment_ranges,
}
}
fn to_comments(&self) -> Comments {
Comments::from_ast(&self.module, self.source_code, &self.comment_ranges)
}
}
#[test]
fn base_test() {
let source = r#"
# Function Leading comment
def test(x, y):
if x == y: # if statement end of line comment
print("Equal")
# Leading comment
elif x < y:
print("Less")
else:
print("Greater")
# own line comment
test(10, 20)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn only_comments() {
let source = r#"
# Some comment
# another comment
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn empty_file() {
let source = r#""#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn dangling_comment() {
let source = r#"
def test(
# Some comment
):
pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn parenthesized_expression() {
let source = r#"
a = ( # Trailing comment
10 + # More comments
3
)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn parenthesized_trailing_comment() {
let source = r#"(
a
# comment
)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn trailing_function_comment() {
let source = r#"
def test(x, y):
if x == y:
pass
elif x < y:
print("Less")
else:
print("Greater")
# trailing `else` comment
# Trailing `if` statement comment
def other(y, z):
if y == z:
pass
# Trailing `if` comment
# Trailing `other` function comment
test(10, 20)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn trailing_comment_after_single_statement_body() {
let source = r#"
if x == y: pass
# Test
print("test")
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn if_elif_else_comments() {
let source = r#"
if x == y:
pass # trailing `pass` comment
# Root `if` trailing comment
# Leading elif comment
elif x < y:
pass
# `elif` trailing comment
# Leading else comment
else:
pass
# `else` trailing comment
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn if_elif_if_else_comments() {
let source = r#"
if x == y:
pass
elif x < y:
if x < 10:
pass
# `elif` trailing comment
# Leading else comment
else:
pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn try_except_finally_else() {
let source = r#"
try:
pass
# trailing try comment
# leading handler comment
except Exception as ex:
pass
# Trailing except comment
# leading else comment
else:
pass
# trailing else comment
# leading finally comment
finally:
print("Finally!")
# Trailing finally comment
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn try_except() {
let source = r#"
def test():
try:
pass
# trailing try comment
# leading handler comment
except Exception as ex:
pass
# Trailing except comment
# Trailing function comment
print("Next statement");
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
// Issue: Match cases
#[test]
fn match_cases() {
let source = r#"def make_point_3d(pt):
match pt:
# Leading `case(x, y)` comment
case (x, y):
return Point3d(x, y, 0)
# Trailing `case(x, y) comment
# Leading `case (x, y, z)` comment
case (x, y, z):
if x < y:
print("if")
else:
print("else")
# Trailing else comment
# trailing case comment
case Point2d(x, y):
return Point3d(x, y, 0)
case _:
raise TypeError("not a point we support")
# Trailing last case comment
# Trailing match comment
# After match comment
print("other")
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn leading_most_outer() {
let source = r#"
# leading comment
x
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
// Comment should be attached to the statement
#[test]
fn trailing_most_outer() {
let source = r#"
x # trailing comment
y # trailing last node
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn trailing_most_outer_nested() {
let source = r#"
x + (
3 # trailing comment
) # outer
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn trailing_after_comma() {
let source = r#"
def test(
a, # Trailing comment for argument `a`
b,
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn positional_argument_only_comment() {
let source = r#"
def test(
a, # trailing positional comment
# Positional arguments only after here
/, # trailing positional argument comment.
# leading b comment
b,
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn positional_argument_only_leading_comma_comment() {
let source = r#"
def test(
a # trailing positional comment
# Positional arguments only after here
,/, # trailing positional argument comment.
# leading b comment
b,
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn positional_argument_only_comment_without_following_node() {
let source = r#"
def test(
a, # trailing positional comment
# Positional arguments only after here
/, # trailing positional argument comment.
# Trailing on new line
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn non_positional_arguments_with_defaults() {
let source = r#"
def test(
a=10 # trailing positional comment
# Positional arguments only after here
,/, # trailing positional argument comment.
# leading comment for b
b=20
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn non_positional_arguments_slash_on_same_line() {
let source = r#"
def test(a=10,/, # trailing positional argument comment.
# leading comment for b
b=20
): pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn binary_expression_left_operand_comment() {
let source = r#"
a = (
5
# trailing left comment
+
# leading right comment
3
)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn binary_expression_left_operand_trailing_end_of_line_comment() {
let source = r#"
a = (
5 # trailing left comment
+ # trailing operator comment
# leading right comment
3
)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn nested_binary_expression() {
let source = r#"
a = (
(5 # trailing left comment
*
2)
+ # trailing operator comment
# leading right comment
3
)
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn while_trailing_end_of_line_comment() {
let source = r#"while True:
if something.changed:
do.stuff() # trailing comment
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
#[test]
fn while_trailing_else_end_of_line_comment() {
let source = r#"while True:
pass
else: # trailing comment
pass
"#;
let test_case = CommentsTestCase::from_code(source);
let comments = test_case.to_comments();
assert_debug_snapshot!(comments.debug(test_case.source_code));
}
}