Avoid printing continuations within import identifiers (#7744)

## Summary

It turns out that _some_ identifiers can contain newlines --
specifically, dot-delimited import identifiers, like:
```python
import foo\
    .bar
```

At present, we print all identifiers verbatim, which causes us to retain
the `\` in the formatted output. This also leads to violating some debug
assertions (see the linked issue, though that's a symptom of this
formatting failure).

This PR adds detection for import identifiers that contain newlines, and
formats them via `text` (slow) rather than `source_code_slice` (fast) in
those cases.

Closes https://github.com/astral-sh/ruff/issues/7734.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-10-02 09:51:07 -04:00 committed by GitHub
parent 0df27375ba
commit c71ff7eae1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 63 additions and 2 deletions

View file

@ -2,6 +2,13 @@ from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjf
from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa, aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa
from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa as dfgsdfgsd, aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa as sdkjflsdjlahlfd
# Continuations.
import foo\
.bar
from foo\
.bar import baz
# At the top-level, force one empty line after an import, but allow up to two empty
# lines.
import os

View file

@ -1,6 +1,7 @@
use ruff_formatter::write;
use ruff_python_ast::Alias;
use crate::other::identifier::DotDelimitedIdentifier;
use crate::prelude::*;
#[derive(Default)]
@ -13,7 +14,7 @@ impl FormatNodeRule<Alias> for FormatAlias {
name,
asname,
} = item;
name.format().fmt(f)?;
DotDelimitedIdentifier::new(name).fmt(f)?;
if let Some(asname) = asname {
write!(f, [space(), token("as"), space(), asname.format()])?;
}

View file

@ -27,3 +27,43 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Identifier {
FormatOwnedWithRule::new(self, FormatIdentifier)
}
}
/// A formatter for a dot-delimited identifier, as seen in import statements:
/// ```python
/// import foo.bar
/// ```
///
/// Dot-delimited identifiers can contain newlines via continuations (backslashes) after the
/// dot-delimited segment, as in:
/// ```python
/// import foo\
/// .bar
/// ```
///
/// While identifiers can typically be formatted via verbatim source code slices, dot-delimited
/// identifiers with newlines must be formatted via `text`. This struct implements both the fast
/// and slow paths for such identifiers.
#[derive(Debug)]
pub(crate) struct DotDelimitedIdentifier<'a>(&'a Identifier);
impl<'a> DotDelimitedIdentifier<'a> {
pub(crate) fn new(identifier: &'a Identifier) -> Self {
Self(identifier)
}
}
impl Format<PyFormatContext<'_>> for DotDelimitedIdentifier<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
// An import identifier can contain newlines by inserting continuations (backslashes) after
// a dot-delimited segment, as in:
// ```python
// import foo\
// .bar
// ```
if memchr::memchr(b'\\', f.context().source()[self.0.range()].as_bytes()).is_some() {
text(self.0.as_str(), Some(self.0.start())).fmt(f)
} else {
source_text_slice(self.0.range()).fmt(f)
}
}
}

View file

@ -6,6 +6,7 @@ use ruff_text_size::Ranged;
use crate::builders::{parenthesize_if_expands, PyFormatterExtensions, TrailingComma};
use crate::comments::{SourceComment, SuppressionKind};
use crate::expression::parentheses::parenthesized;
use crate::other::identifier::DotDelimitedIdentifier;
use crate::prelude::*;
#[derive(Default)]
@ -31,7 +32,7 @@ impl FormatNodeRule<StmtImportFrom> for FormatStmtImportFrom {
}
Ok(())
}),
module.as_ref().map(AsFormat::format),
module.as_ref().map(DotDelimitedIdentifier::new),
space(),
token("import"),
space(),

View file

@ -8,6 +8,13 @@ from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjf
from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa, aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa
from a import aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa as dfgsdfgsd, aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa as sdkjflsdjlahlfd
# Continuations.
import foo\
.bar
from foo\
.bar import baz
# At the top-level, force one empty line after an import, but allow up to two empty
# lines.
import os
@ -98,6 +105,11 @@ from a import (
aksjdhflsakhdflkjsadlfajkslhfdkjsaldajlahflashdfljahlfksajlhfajfjfsaahflakjslhdfkjalhdskjfa as sdkjflsdjlahlfd,
)
# Continuations.
import foo.bar
from foo.bar import baz
# At the top-level, force one empty line after an import, but allow up to two empty
# lines.
import os