ruff/crates/ruff_python_importer/src/insertion.rs
Andrew Gallant da5eb85087 [ruff] Add API for splicing into an existing import statement
Basically, given a `from module import name1, name2, ...` statement,
we'd like to be able to insert another name in that list.

This new `Insertion::existing_import` API provides such
functionality. There isn't much to it, although we are careful
to try and avoid inserting nonsense for import statements
that are already invalid.
2025-09-17 13:59:28 -04:00

809 lines
22 KiB
Rust

//! Insert statements into Python code.
use std::ops::Add;
use ruff_diagnostics::Edit;
use ruff_python_ast::Stmt;
use ruff_python_ast::helpers::is_docstring_stmt;
use ruff_python_codegen::Stylist;
use ruff_python_parser::{TokenKind, Tokens};
use ruff_python_trivia::is_python_whitespace;
use ruff_python_trivia::{PythonWhitespace, textwrap::indent};
use ruff_source_file::{LineRanges, UniversalNewlineIterator};
use ruff_text_size::{Ranged, TextSize};
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum Placement<'a> {
/// The content will be inserted inline with the existing code (i.e., within semicolon-delimited
/// statements).
Inline,
/// The content will be inserted on its own line.
OwnLine,
/// The content will be inserted as an indented block.
Indented(&'a str),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Insertion<'a> {
/// The content to add before the insertion.
prefix: &'a str,
/// The location at which to insert.
location: TextSize,
/// The content to add after the insertion.
suffix: &'a str,
/// The line placement of insertion.
placement: Placement<'a>,
}
impl<'a> Insertion<'a> {
/// Create an [`Insertion`] to insert (e.g.) an import statement at the start of a given
/// file, along with a prefix and suffix to use for the insertion.
///
/// For example, given the following code:
///
/// ```python
/// """Hello, world!"""
///
/// import os
/// ```
///
/// The insertion returned will begin at the start of the `import os` statement, and will
/// include a trailing newline.
pub fn start_of_file(body: &[Stmt], contents: &str, stylist: &Stylist) -> Insertion<'static> {
// Skip over any docstrings.
let mut location = if let Some(mut location) = match_docstring_end(body) {
// If the first token after the docstring is a semicolon, insert after the semicolon as
// an inline statement.
if let Some(offset) = match_semicolon(&contents[location.to_usize()..]) {
return Insertion::inline(" ", location.add(offset).add(TextSize::of(';')), ";");
}
// While the first token after the docstring is a continuation character (i.e. "\"), advance
// additional rows to prevent inserting in the same logical line.
while match_continuation(&contents[location.to_usize()..]).is_some() {
location = contents.full_line_end(location);
}
// Otherwise, advance to the next row.
contents.full_line_end(location)
} else {
contents.bom_start_offset()
};
// Skip over commented lines, with whitespace separation.
for line in
UniversalNewlineIterator::with_offset(&contents[location.to_usize()..], location)
{
let trimmed_line = line.trim_whitespace_start();
if trimmed_line.is_empty() {
continue;
}
if trimmed_line.starts_with('#') {
location = line.full_end();
} else {
break;
}
}
Insertion::own_line("", location, stylist.line_ending().as_str())
}
/// Create an [`Insertion`] to insert (e.g.) an import after the end of the given
/// [`Stmt`], along with a prefix and suffix to use for the insertion.
///
/// For example, given the following code:
///
/// ```python
/// """Hello, world!"""
///
/// import os
/// import math
///
///
/// def foo():
/// pass
/// ```
///
/// The insertion returned will begin after the newline after the last import statement, which
/// in this case is the line after `import math`, and will include a trailing newline.
///
/// The statement itself is assumed to be at the top-level of the module.
pub fn end_of_statement(stmt: &Stmt, contents: &str, stylist: &Stylist) -> Insertion<'static> {
let location = stmt.end();
if let Some(offset) = match_semicolon(&contents[location.to_usize()..]) {
// If the first token after the statement is a semicolon, insert after the semicolon as
// an inline statement.
Insertion::inline(" ", location.add(offset).add(TextSize::of(';')), ";")
} else if match_continuation(&contents[location.to_usize()..]).is_some() {
// If the first token after the statement is a continuation, insert after the statement
// with a semicolon.
Insertion::inline("; ", location, "")
} else {
// Otherwise, insert on the next line.
Insertion::own_line(
"",
contents.full_line_end(location),
stylist.line_ending().as_str(),
)
}
}
/// Create an [`Insertion`] to insert an additional member to import
/// into a `from <module> import member1, member2, ...` statement.
///
/// For example, given the following code:
///
/// ```python
/// """Hello, world!"""
///
/// from collections import Counter
///
///
/// def foo():
/// pass
/// ```
///
/// The insertion returned will begin after `Counter` but before the
/// newline terminator. Callers can then call [`Insertion::into_edit`]
/// with the additional member to add. A comma delimiter is handled
/// automatically.
///
/// The statement itself is assumed to be at the top-level of the module.
///
/// This returns `None` when `stmt` isn't a `from ... import ...`
/// statement.
pub fn existing_import(stmt: &Stmt, tokens: &Tokens) -> Option<Insertion<'static>> {
let Stmt::ImportFrom(ref import_from) = *stmt else {
return None;
};
if let Some(at) = import_from.names.last().map(Ranged::end) {
return Some(Insertion::inline(", ", at, ""));
}
// Our AST can deal with partial `from ... import`
// statements, so we might not have any members
// yet. In this case, we don't need the comma.
//
// ... however, unless we can be certain that
// inserting this name leads to a valid AST, we
// give up.
let at = import_from.end();
if !matches!(
tokens
.before(at)
.last()
.map(ruff_python_parser::Token::kind),
Some(TokenKind::Import)
) {
return None;
}
Some(Insertion::inline(" ", at, ""))
}
/// Create an [`Insertion`] to insert (e.g.) an import statement at the start of a given
/// block, along with a prefix and suffix to use for the insertion.
///
/// For example, given the following code:
///
/// ```python
/// if TYPE_CHECKING:
/// import os
/// ```
///
/// The insertion returned will begin at the start of the `import os` statement, and will
/// include a trailing newline.
///
/// The block itself is assumed to be at the top-level of the module.
pub fn start_of_block(
mut location: TextSize,
contents: &'a str,
stylist: &Stylist,
tokens: &Tokens,
) -> Insertion<'a> {
enum Awaiting {
Colon(u32),
Newline,
Indent,
}
let mut state = Awaiting::Colon(0);
for token in tokens.after(location) {
match state {
// Iterate until we find the colon indicating the start of the block body.
Awaiting::Colon(depth) => match token.kind() {
TokenKind::Colon if depth == 0 => {
state = Awaiting::Newline;
}
TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => {
state = Awaiting::Colon(depth.saturating_add(1));
}
TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => {
state = Awaiting::Colon(depth.saturating_sub(1));
}
_ => {}
},
// Once we've seen the colon, we're looking for a newline; otherwise, there's no
// block body (e.g. `if True: pass`).
Awaiting::Newline => match token.kind() {
TokenKind::Comment => {}
TokenKind::Newline => {
state = Awaiting::Indent;
}
_ => {
location = token.start();
break;
}
},
// Once we've seen the newline, we're looking for the indentation of the block body.
Awaiting::Indent => match token.kind() {
TokenKind::Comment => {}
TokenKind::NonLogicalNewline => {}
TokenKind::Indent => {
// This is like:
// ```python
// if True:
// pass
// ```
// Where `range` is the indentation before the `pass` token.
return Insertion::indented(
"",
token.start(),
stylist.line_ending().as_str(),
&contents[token.range()],
);
}
_ => {
location = token.start();
break;
}
},
}
}
// This is like: `if True: pass`, where `location` is the start of the `pass` token.
Insertion::inline("", location, "; ")
}
/// Convert this [`Insertion`] into an [`Edit`] that inserts the given content.
pub fn into_edit(self, content: &str) -> Edit {
let Insertion {
prefix,
location,
suffix,
placement,
} = self;
let content = format!("{prefix}{content}{suffix}");
Edit::insertion(
match placement {
Placement::Indented(indentation) if !indentation.is_empty() => {
indent(&content, indentation).to_string()
}
_ => content,
},
location,
)
}
/// Returns `true` if this [`Insertion`] is inline.
pub fn is_inline(&self) -> bool {
matches!(self.placement, Placement::Inline)
}
/// Create an [`Insertion`] that inserts content inline (i.e., within semicolon-delimited
/// statements).
fn inline(prefix: &'a str, location: TextSize, suffix: &'a str) -> Self {
Self {
prefix,
location,
suffix,
placement: Placement::Inline,
}
}
/// Create an [`Insertion`] that starts on its own line.
fn own_line(prefix: &'a str, location: TextSize, suffix: &'a str) -> Self {
Self {
prefix,
location,
suffix,
placement: Placement::OwnLine,
}
}
/// Create an [`Insertion`] that starts on its own line, with the given indentation.
fn indented(
prefix: &'a str,
location: TextSize,
suffix: &'a str,
indentation: &'a str,
) -> Self {
Self {
prefix,
location,
suffix,
placement: Placement::Indented(indentation),
}
}
}
/// Find the end of the docstring (first string statement).
fn match_docstring_end(body: &[Stmt]) -> Option<TextSize> {
let stmt = body.first()?;
if !is_docstring_stmt(stmt) {
return None;
}
Some(stmt.end())
}
/// If the next token is a semicolon, return its offset.
fn match_semicolon(s: &str) -> Option<TextSize> {
for (offset, c) in s.char_indices() {
match c {
_ if is_python_whitespace(c) => continue,
';' => return Some(TextSize::try_from(offset).unwrap()),
_ => break,
}
}
None
}
/// If the next token is a continuation (`\`), return its offset.
fn match_continuation(s: &str) -> Option<TextSize> {
for (offset, c) in s.char_indices() {
match c {
_ if is_python_whitespace(c) => continue,
'\\' => return Some(TextSize::try_from(offset).unwrap()),
_ => break,
}
}
None
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use ruff_python_codegen::Stylist;
use ruff_python_parser::parse_module;
use ruff_source_file::LineEnding;
use ruff_text_size::{Ranged, TextSize};
use super::Insertion;
#[test]
fn start_of_file() -> Result<()> {
fn insert(contents: &str) -> Result<Insertion<'_>> {
let parsed = parse_module(contents)?;
let stylist = Stylist::from_tokens(parsed.tokens(), contents);
Ok(Insertion::start_of_file(parsed.suite(), contents, &stylist))
}
let contents = "";
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(0), LineEnding::default().as_str())
);
let contents = r#"
"""Hello, world!""""#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(19), LineEnding::default().as_str())
);
let contents = r#"
"""Hello, world!"""
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(20), "\n")
);
let contents = r#"
"""Hello, world!"""
"""Hello, world!"""
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(20), "\n")
);
let contents = r#"
"""Hello, world!"""\
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(22), "\n")
);
let contents = r#"
"""Hello, world!"""\
\
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(24), "\n")
);
let contents = r"
x = 1
"
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(0), "\n")
);
let contents = r"
#!/usr/bin/env python3
"
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(23), "\n")
);
let contents = r#"
#!/usr/bin/env python3
"""Hello, world!"""
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(43), "\n")
);
let contents = r#"
"""Hello, world!"""
#!/usr/bin/env python3
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(43), "\n")
);
let contents = r#"
"""%s""" % "Hello, world!"
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::own_line("", TextSize::from(0), "\n")
);
let contents = r#"
"""Hello, world!"""; x = 1
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::inline(" ", TextSize::from(20), ";")
);
let contents = r#"
"""Hello, world!"""; x = 1; y = \
2
"#
.trim_start();
assert_eq!(
insert(contents)?,
Insertion::inline(" ", TextSize::from(20), ";")
);
Ok(())
}
#[test]
fn start_of_block() {
fn insert(contents: &str, offset: TextSize) -> Insertion<'_> {
let parsed = parse_module(contents).unwrap();
let stylist = Stylist::from_tokens(parsed.tokens(), contents);
Insertion::start_of_block(offset, contents, &stylist, parsed.tokens())
}
let contents = "if True: pass";
assert_eq!(
insert(contents, TextSize::from(0)),
Insertion::inline("", TextSize::from(9), "; ")
);
let contents = r"
if True:
pass
"
.trim_start();
assert_eq!(
insert(contents, TextSize::from(0)),
Insertion::indented("", TextSize::from(9), "\n", " ")
);
}
#[test]
fn existing_import_works() {
fn snapshot(content: &str, member: &str) -> String {
let parsed = parse_module(content).unwrap();
let edit = Insertion::existing_import(parsed.suite().first().unwrap(), parsed.tokens())
.unwrap()
.into_edit(member);
let insert_text = edit.content().expect("edit should be non-empty");
let mut content = content.to_string();
content.replace_range(edit.range().to_std_range(), insert_text);
content
}
let source = r#"
from collections import Counter
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import Counter, defaultdict
",
);
let source = r#"
from collections import Counter, OrderedDict
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import Counter, OrderedDict, defaultdict
",
);
let source = r#"
from collections import (Counter)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import (Counter, defaultdict)",
);
let source = r#"
from collections import (Counter, OrderedDict)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import (Counter, OrderedDict, defaultdict)",
);
let source = r#"
from collections import (Counter,)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import (Counter, defaultdict,)",
);
let source = r#"
from collections import (Counter, OrderedDict,)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import (Counter, OrderedDict, defaultdict,)",
);
let source = r#"
from collections import (
Counter
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict
)
",
);
let source = r#"
from collections import (
Counter,
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict,
)
",
);
let source = r#"
from collections import (
Counter,
OrderedDict
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter,
OrderedDict, defaultdict
)
",
);
let source = r#"
from collections import (
Counter,
OrderedDict,
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter,
OrderedDict, defaultdict,
)
",
);
let source = r#"
from collections import \
Counter
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import \
Counter, defaultdict
",
);
let source = r#"
from collections import \
Counter, OrderedDict
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import \
Counter, OrderedDict, defaultdict
",
);
let source = r#"
from collections import \
Counter, \
OrderedDict
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import \
Counter, \
OrderedDict, defaultdict
",
);
/*
from collections import (
Collector # comment
)
from collections import (
Collector, # comment
)
from collections import (
Collector # comment
,
)
from collections import (
Collector
# comment
,
)
*/
let source = r#"
from collections import (
Counter # comment
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict # comment
)
",
);
let source = r#"
from collections import (
Counter, # comment
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict, # comment
)
",
);
let source = r#"
from collections import (
Counter # comment
,
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict # comment
,
)
",
);
let source = r#"
from collections import (
Counter
# comment
,
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
Counter, defaultdict
# comment
,
)
",
);
let source = r#"
from collections import (
# comment 1
Counter # comment 2
# comment 3
)
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@r"
from collections import (
# comment 1
Counter, defaultdict # comment 2
# comment 3
)
",
);
let source = r#"
from collections import Counter # comment
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import Counter, defaultdict # comment",
);
let source = r#"
from collections import Counter, OrderedDict # comment
"#;
insta::assert_snapshot!(
snapshot(source, "defaultdict"),
@"from collections import Counter, OrderedDict, defaultdict # comment",
);
}
}