Detect line endings and use them during code generation (#1482)

This commit is contained in:
Reiner Gerecke 2022-12-30 18:59:40 +01:00 committed by GitHub
parent a7dc491ff1
commit 080f99b908
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 231 additions and 38 deletions

View file

@ -0,0 +1 @@
from long_module_name import member_one, member_two, member_three, member_four, member_five

View file

@ -0,0 +1,2 @@
from long_module_name import member_one, member_two, member_three, member_four, member_five

View file

@ -0,0 +1,2 @@
from long_module_name import member_one, member_two, member_three, member_four, member_five

View file

@ -11,11 +11,13 @@ use crate::isort;
use crate::isort::track::ImportTracker;
use crate::settings::{flags, Settings};
use crate::source_code_locator::SourceCodeLocator;
use crate::source_code_style::SourceCodeStyleDetector;
fn check_import_blocks(
tracker: ImportTracker,
locator: &SourceCodeLocator,
settings: &Settings,
stylist: &SourceCodeStyleDetector,
autofix: flags::Autofix,
package: Option<&Path>,
) -> Vec<Check> {
@ -23,7 +25,7 @@ fn check_import_blocks(
for block in tracker.into_iter() {
if !block.imports.is_empty() {
if let Some(check) =
isort::plugins::check_imports(&block, locator, settings, autofix, package)
isort::plugins::check_imports(&block, locator, settings, stylist, autofix, package)
{
checks.push(check);
}
@ -32,11 +34,13 @@ fn check_import_blocks(
checks
}
#[allow(clippy::too_many_arguments)]
pub fn check_imports(
python_ast: &Suite,
locator: &SourceCodeLocator,
directives: &IsortDirectives,
settings: &Settings,
stylist: &SourceCodeStyleDetector,
autofix: flags::Autofix,
path: &Path,
package: Option<&Path>,
@ -45,5 +49,5 @@ pub fn check_imports(
for stmt in python_ast {
tracker.visit_stmt(stmt);
}
check_import_blocks(tracker, locator, settings, autofix, package)
check_import_blocks(tracker, locator, settings, stylist, autofix, package)
}

View file

@ -1,4 +1,5 @@
use crate::isort::types::{AliasData, CommentSet, ImportFromData, Importable};
use crate::source_code_style::SourceCodeStyleDetector;
// Hard-code four-space indentation for the imports themselves, to match Black.
const INDENT: &str = " ";
@ -7,14 +8,19 @@ const INDENT: &str = " ";
const CAPACITY: usize = 200;
/// Add a plain import statement to the `RopeBuilder`.
pub fn format_import(alias: &AliasData, comments: &CommentSet, is_first: bool) -> String {
pub fn format_import(
alias: &AliasData,
comments: &CommentSet,
is_first: bool,
stylist: &SourceCodeStyleDetector,
) -> String {
let mut output = String::with_capacity(CAPACITY);
if !is_first && !comments.atop.is_empty() {
output.push('\n');
output.push_str(stylist.line_ending());
}
for comment in &comments.atop {
output.push_str(comment);
output.push('\n');
output.push_str(stylist.line_ending());
}
if let Some(asname) = alias.asname {
output.push_str("import ");
@ -29,16 +35,18 @@ pub fn format_import(alias: &AliasData, comments: &CommentSet, is_first: bool) -
output.push_str(" ");
output.push_str(comment);
}
output.push('\n');
output.push_str(stylist.line_ending());
output
}
/// Add an import-from statement to the `RopeBuilder`.
#[allow(clippy::too_many_arguments)]
pub fn format_import_from(
import_from: &ImportFromData,
comments: &CommentSet,
aliases: &[(AliasData, CommentSet)],
line_length: usize,
stylist: &SourceCodeStyleDetector,
force_wrap_aliases: bool,
is_first: bool,
trailing_comma: bool,
@ -48,7 +56,8 @@ pub fn format_import_from(
.iter()
.all(|(alias, _)| alias.name == "*" && alias.asname.is_none())
{
let (single_line, ..) = format_single_line(import_from, comments, aliases, is_first);
let (single_line, ..) =
format_single_line(import_from, comments, aliases, is_first, stylist);
return single_line;
}
@ -68,13 +77,13 @@ pub fn format_import_from(
|| aliases.iter().all(|(alias, _)| alias.asname.is_none()))
{
let (single_line, import_length) =
format_single_line(import_from, comments, aliases, is_first);
format_single_line(import_from, comments, aliases, is_first, stylist);
if import_length <= line_length || aliases.iter().any(|(alias, _)| alias.name == "*") {
return single_line;
}
}
format_multi_line(import_from, comments, aliases, is_first)
format_multi_line(import_from, comments, aliases, is_first, stylist)
}
/// Format an import-from statement in single-line format.
@ -85,16 +94,17 @@ fn format_single_line(
comments: &CommentSet,
aliases: &[(AliasData, CommentSet)],
is_first: bool,
stylist: &SourceCodeStyleDetector,
) -> (String, usize) {
let mut output = String::with_capacity(CAPACITY);
let mut line_length = 0;
if !is_first && !comments.atop.is_empty() {
output.push('\n');
output.push_str(stylist.line_ending());
}
for comment in &comments.atop {
output.push_str(comment);
output.push('\n');
output.push_str(stylist.line_ending());
}
let module_name = import_from.module_name();
@ -133,7 +143,7 @@ fn format_single_line(
line_length += 2 + comment.len();
}
output.push('\n');
output.push_str(stylist.line_ending());
(output, line_length)
}
@ -144,15 +154,16 @@ fn format_multi_line(
comments: &CommentSet,
aliases: &[(AliasData, CommentSet)],
is_first: bool,
stylist: &SourceCodeStyleDetector,
) -> String {
let mut output = String::with_capacity(CAPACITY);
if !is_first && !comments.atop.is_empty() {
output.push('\n');
output.push_str(stylist.line_ending());
}
for comment in &comments.atop {
output.push_str(comment);
output.push('\n');
output.push_str(stylist.line_ending());
}
output.push_str("from ");
@ -164,13 +175,13 @@ fn format_multi_line(
output.push(' ');
output.push_str(comment);
}
output.push('\n');
output.push_str(stylist.line_ending());
for (AliasData { name, asname }, comments) in aliases {
for comment in &comments.atop {
output.push_str(INDENT);
output.push_str(comment);
output.push('\n');
output.push_str(stylist.line_ending());
}
output.push_str(INDENT);
if let Some(asname) = asname {
@ -187,11 +198,11 @@ fn format_multi_line(
output.push(' ');
output.push_str(comment);
}
output.push('\n');
output.push_str(stylist.line_ending());
}
output.push(')');
output.push('\n');
output.push_str(stylist.line_ending());
output
}

View file

@ -17,6 +17,7 @@ use crate::isort::types::{
AliasData, CommentSet, ImportBlock, ImportFromData, Importable, OrderedImportBlock,
TrailingComma,
};
use crate::source_code_style::SourceCodeStyleDetector;
use crate::SourceCodeLocator;
mod categorize;
@ -549,6 +550,7 @@ pub fn format_imports(
comments: Vec<Comment>,
locator: &SourceCodeLocator,
line_length: usize,
stylist: &SourceCodeStyleDetector,
src: &[PathBuf],
package: Option<&Path>,
known_first_party: &BTreeSet<String>,
@ -590,14 +592,19 @@ pub fn format_imports(
if is_first_block {
is_first_block = false;
} else {
output.append("\n");
output.append(stylist.line_ending());
}
let mut is_first_statement = true;
// Format `StmtKind::Import` statements.
for (alias, comments) in &import_block.import {
output.append(&format::format_import(alias, comments, is_first_statement));
output.append(&format::format_import(
alias,
comments,
is_first_statement,
stylist,
));
is_first_statement = false;
}
@ -608,6 +615,7 @@ pub fn format_imports(
comments,
aliases,
line_length,
stylist,
force_wrap_aliases,
is_first_statement,
split_on_trailing_comma && matches!(trailing_comma, TrailingComma::Present),
@ -618,11 +626,11 @@ pub fn format_imports(
match trailer {
None => {}
Some(Trailer::Sibling) => {
output.append("\n");
output.append(stylist.line_ending());
}
Some(Trailer::FunctionDef | Trailer::ClassDef) => {
output.append("\n");
output.append("\n");
output.append(stylist.line_ending());
output.append(stylist.line_ending());
}
}
output.finish().to_string()
@ -672,6 +680,9 @@ mod tests {
#[test_case(Path::new("trailing_suffix.py"))]
#[test_case(Path::new("type_comments.py"))]
#[test_case(Path::new("magic_trailing_comma.py"))]
#[test_case(Path::new("line_ending_lf.py"))]
#[test_case(Path::new("line_ending_crlf.py"))]
#[test_case(Path::new("line_ending_cr.py"))]
fn default(path: &Path) -> Result<()> {
let snapshot = format!("{}", path.to_string_lossy());
let mut checks = test_path(

View file

@ -13,6 +13,7 @@ use crate::checks::CheckKind;
use crate::isort::track::Block;
use crate::isort::{comments, format_imports};
use crate::settings::flags;
use crate::source_code_style::SourceCodeStyleDetector;
use crate::{Check, Settings, SourceCodeLocator};
fn extract_range(body: &[&Stmt]) -> Range {
@ -37,6 +38,7 @@ pub fn check_imports(
block: &Block,
locator: &SourceCodeLocator,
settings: &Settings,
stylist: &SourceCodeStyleDetector,
autofix: flags::Autofix,
package: Option<&Path>,
) -> Option<Check> {
@ -74,6 +76,7 @@ pub fn check_imports(
comments,
locator,
settings.line_length - indentation.len(),
stylist,
&settings.src,
package,
&settings.isort.known_first_party,

View file

@ -0,0 +1,21 @@
---
source: src/isort/mod.rs
expression: checks
---
- kind: UnsortedImports
location:
row: 1
column: 0
end_location:
row: 2
column: 0
fix:
content: "from long_module_name import (\r member_five,\r member_four,\r member_one,\r member_three,\r member_two,\r)\r"
location:
row: 1
column: 0
end_location:
row: 2
column: 0
parent: ~

View file

@ -0,0 +1,21 @@
---
source: src/isort/mod.rs
expression: checks
---
- kind: UnsortedImports
location:
row: 1
column: 0
end_location:
row: 2
column: 0
fix:
content: "from long_module_name import (\r\n member_five,\r\n member_four,\r\n member_one,\r\n member_three,\r\n member_two,\r\n)\r\n"
location:
row: 1
column: 0
end_location:
row: 2
column: 0
parent: ~

View file

@ -0,0 +1,21 @@
---
source: src/isort/mod.rs
expression: checks
---
- kind: UnsortedImports
location:
row: 1
column: 0
end_location:
row: 2
column: 0
fix:
content: "from long_module_name import (\n member_five,\n member_four,\n member_one,\n member_three,\n member_two,\n)\n"
location:
row: 1
column: 0
end_location:
row: 2
column: 0
parent: ~

View file

@ -110,6 +110,7 @@ pub(crate) fn check_path(
locator,
&directives.isort,
settings,
stylist,
autofix,
path,
package,
@ -285,6 +286,7 @@ pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result<usize> {
&contents,
&directives.noqa_line_for,
&settings.external,
stylist.line_ending(),
)
}

View file

@ -9,6 +9,7 @@ use regex::Regex;
use rustc_hash::{FxHashMap, FxHashSet};
use crate::checks::{Check, CheckCode, CODE_REDIRECTS};
use crate::source_code_style::LineEnding;
static NOQA_LINE_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
@ -84,8 +85,9 @@ pub fn add_noqa(
contents: &str,
noqa_line_for: &IntMap<usize, usize>,
external: &FxHashSet<String>,
line_ending: &LineEnding,
) -> Result<usize> {
let (count, output) = add_noqa_inner(checks, contents, noqa_line_for, external);
let (count, output) = add_noqa_inner(checks, contents, noqa_line_for, external, line_ending);
fs::write(path, output)?;
Ok(count)
}
@ -95,6 +97,7 @@ fn add_noqa_inner(
contents: &str,
noqa_line_for: &IntMap<usize, usize>,
external: &FxHashSet<String>,
line_ending: &LineEnding,
) -> (usize, String) {
let mut matches_by_line: FxHashMap<usize, FxHashSet<&CheckCode>> = FxHashMap::default();
for (lineno, line) in contents.lines().enumerate() {
@ -131,7 +134,7 @@ fn add_noqa_inner(
match matches_by_line.get(&lineno) {
None => {
output.push_str(line);
output.push('\n');
output.push_str(line_ending);
}
Some(codes) => {
match extract_noqa_directive(line) {
@ -146,7 +149,7 @@ fn add_noqa_inner(
let codes: Vec<&str> = codes.iter().map(AsRef::as_ref).collect();
let suffix = codes.join(", ");
output.push_str(&suffix);
output.push('\n');
output.push_str(line_ending);
count += 1;
}
Directive::All(_, start, _) => {
@ -161,7 +164,7 @@ fn add_noqa_inner(
codes.iter().map(AsRef::as_ref).sorted_unstable().collect();
let suffix = codes.join(", ");
output.push_str(&suffix);
output.push('\n');
output.push_str(line_ending);
count += 1;
}
Directive::Codes(_, start, _, existing) => {
@ -186,7 +189,7 @@ fn add_noqa_inner(
formatted.push_str(&suffix);
output.push_str(&formatted);
output.push('\n');
output.push_str(line_ending);
// Only count if the new line is an actual edit.
if formatted != line {
@ -211,6 +214,7 @@ mod tests {
use crate::ast::types::Range;
use crate::checks::{Check, CheckKind};
use crate::noqa::{add_noqa_inner, NOQA_LINE_REGEX};
use crate::source_code_style::LineEnding;
#[test]
fn regex() {
@ -232,9 +236,15 @@ mod tests {
let contents = "x = 1";
let noqa_line_for = IntMap::default();
let external = FxHashSet::default();
let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for, &external);
let (count, output) = add_noqa_inner(
&checks,
contents,
&noqa_line_for,
&external,
&LineEnding::Lf,
);
assert_eq!(count, 0);
assert_eq!(output.trim(), contents.trim());
assert_eq!(output, format!("{contents}\n"));
let checks = vec![Check::new(
CheckKind::UnusedVariable("x".to_string()),
@ -246,9 +256,15 @@ mod tests {
let contents = "x = 1";
let noqa_line_for = IntMap::default();
let external = FxHashSet::default();
let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for, &external);
let (count, output) = add_noqa_inner(
&checks,
contents,
&noqa_line_for,
&external,
&LineEnding::Lf,
);
assert_eq!(count, 1);
assert_eq!(output.trim(), "x = 1 # noqa: F841".trim());
assert_eq!(output, "x = 1 # noqa: F841\n");
let checks = vec![
Check::new(
@ -266,12 +282,18 @@ mod tests {
},
),
];
let contents = "x = 1 # noqa: E741";
let contents = "x = 1 # noqa: E741\n";
let noqa_line_for = IntMap::default();
let external = FxHashSet::default();
let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for, &external);
let (count, output) = add_noqa_inner(
&checks,
contents,
&noqa_line_for,
&external,
&LineEnding::Lf,
);
assert_eq!(count, 1);
assert_eq!(output.trim(), "x = 1 # noqa: E741, F841".trim());
assert_eq!(output, "x = 1 # noqa: E741, F841\n");
let checks = vec![
Check::new(
@ -292,8 +314,14 @@ mod tests {
let contents = "x = 1 # noqa";
let noqa_line_for = IntMap::default();
let external = FxHashSet::default();
let (count, output) = add_noqa_inner(&checks, contents, &noqa_line_for, &external);
let (count, output) = add_noqa_inner(
&checks,
contents,
&noqa_line_for,
&external,
&LineEnding::Lf,
);
assert_eq!(count, 1);
assert_eq!(output.trim(), "x = 1 # noqa: E741, F841".trim());
assert_eq!(output, "x = 1 # noqa: E741, F841\n");
}
}

View file

@ -17,6 +17,7 @@ pub struct SourceCodeStyleDetector<'a> {
locator: &'a SourceCodeLocator<'a>,
indentation: OnceCell<Indentation>,
quote: OnceCell<Quote>,
line_ending: OnceCell<LineEnding>,
}
impl<'a> SourceCodeStyleDetector<'a> {
@ -30,12 +31,18 @@ impl<'a> SourceCodeStyleDetector<'a> {
.get_or_init(|| detect_quote(self.contents, self.locator).unwrap_or_default())
}
pub fn line_ending(&'a self) -> &'a LineEnding {
self.line_ending
.get_or_init(|| detect_line_ending(self.contents).unwrap_or_default())
}
pub fn from_contents(contents: &'a str, locator: &'a SourceCodeLocator<'a>) -> Self {
Self {
contents,
locator,
indentation: OnceCell::default(),
quote: OnceCell::default(),
line_ending: OnceCell::default(),
}
}
}
@ -86,6 +93,33 @@ impl Deref for Indentation {
}
}
/// The line ending style used in Python source code.
/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
#[derive(Debug, PartialEq, Eq)]
pub enum LineEnding {
Lf,
Cr,
CrLf,
}
impl Default for LineEnding {
fn default() -> Self {
LineEnding::Lf
}
}
impl Deref for LineEnding {
type Target = str;
fn deref(&self) -> &Self::Target {
match &self {
LineEnding::CrLf => "\r\n",
LineEnding::Lf => "\n",
LineEnding::Cr => "\r",
}
}
}
/// Detect the indentation style of the given tokens.
fn detect_indentation(contents: &str, locator: &SourceCodeLocator) -> Option<Indentation> {
for (_start, tok, end) in lexer::make_tokenizer(contents).flatten() {
@ -122,9 +156,26 @@ fn detect_quote(contents: &str, locator: &SourceCodeLocator) -> Option<Quote> {
None
}
/// Detect the line ending style of the given contents.
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
if let Some(position) = contents.find('\n') {
let position = position.saturating_sub(1);
return if let Some('\r') = contents.chars().nth(position) {
Some(LineEnding::CrLf)
} else {
Some(LineEnding::Lf)
};
} else if contents.find('\r').is_some() {
return Some(LineEnding::Cr);
}
None
}
#[cfg(test)]
mod tests {
use crate::source_code_style::{detect_indentation, detect_quote, Indentation, Quote};
use crate::source_code_style::{
detect_indentation, detect_line_ending, detect_quote, Indentation, LineEnding, Quote,
};
use crate::SourceCodeLocator;
#[test]
@ -197,4 +248,19 @@ def f():
let locator = SourceCodeLocator::new(contents);
assert_eq!(detect_quote(contents, &locator), Some(Quote::Double));
}
#[test]
fn line_ending() {
let contents = "x = 1";
assert_eq!(detect_line_ending(contents), None);
let contents = "x = 1\n";
assert_eq!(detect_line_ending(contents), Some(LineEnding::Lf));
let contents = "x = 1\r";
assert_eq!(detect_line_ending(contents), Some(LineEnding::Cr));
let contents = "x = 1\r\n";
assert_eq!(detect_line_ending(contents), Some(LineEnding::CrLf));
}
}