From 1ce07d65bdce46f7a0118343aaf41f1af63ec6d5 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 9 Feb 2024 21:41:36 +0100 Subject: [PATCH] Use `usize` instead of `TextSize` for `indent_len` (#9903) --- .../src/string/docstring.rs | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index ba73519a60..b09324a10f 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -240,9 +240,9 @@ struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { /// printed. offset: TextSize, - /// Indentation alignment based on the least indented line in the + /// Indentation alignment (in columns) based on the least indented line in the /// docstring. - stripped_indentation_length: TextSize, + stripped_indentation_length: usize, /// Whether the docstring is overall already considered normalized. When it /// is, the formatter can take a fast path. @@ -345,7 +345,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { }; // This looks suspicious, but it's consistent with the whitespace // normalization that will occur anyway. - let indent = " ".repeat(min_indent.to_usize()); + let indent = " ".repeat(min_indent); for docline in formatted_lines { self.print_one( &docline.map(|line| std::format!("{indent}{line}")), @@ -355,7 +355,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { CodeExampleKind::Markdown(fenced) => { // This looks suspicious, but it's consistent with the whitespace // normalization that will occur anyway. - let indent = " ".repeat(fenced.opening_fence_indent.to_usize()); + let indent = " ".repeat(fenced.opening_fence_indent); for docline in formatted_lines { self.print_one( &docline.map(|line| std::format!("{indent}{line}")), @@ -401,18 +401,21 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { // (see example in [`format_docstring`] doc comment). We then // prepend the in-docstring indentation to the string. let indent_len = indentation_length(trim_end) - self.stripped_indentation_length; - let in_docstring_indent = " ".repeat(usize::from(indent_len)) + trim_end.trim_start(); + let in_docstring_indent = " ".repeat(indent_len) + trim_end.trim_start(); text(&in_docstring_indent).fmt(self.f)?; } else { + // It's guaranteed that the `indent` is all spaces because `tab_or_non_ascii_space` is + // `false` (indent contains neither tabs nor non-space whitespace). + // Take the string with the trailing whitespace removed, then also // skip the leading whitespace. let trimmed_line_range = TextRange::at(line.offset, trim_end.text_len()) - .add_start(self.stripped_indentation_length); + .add_start(TextSize::try_from(self.stripped_indentation_length).unwrap()); if self.already_normalized { source_text_slice(trimmed_line_range).fmt(self.f)?; } else { // All indents are ascii spaces, so the slicing is correct. - text(&trim_end[usize::from(self.stripped_indentation_length)..]).fmt(self.f)?; + text(&trim_end[self.stripped_indentation_length..]).fmt(self.f)?; } } @@ -896,7 +899,7 @@ struct CodeExampleRst<'src> { lines: Vec>, /// The indent of the line "opening" this block measured via - /// `indentation_length`. + /// `indentation_length` (in columns). /// /// It can either be the indent of a line ending with `::` (for a literal /// block) or the indent of a line starting with `.. ` (a directive). @@ -904,7 +907,7 @@ struct CodeExampleRst<'src> { /// The content body of a block needs to be indented more than the line /// opening the block, so we use this indentation to look for indentation /// that is "more than" it. - opening_indent: TextSize, + opening_indent: usize, /// The minimum indent of the block measured via `indentation_length`. /// @@ -923,7 +926,7 @@ struct CodeExampleRst<'src> { /// When the code snippet has been extracted, it is re-built before being /// reformatted. The minimum indent is stripped from each line when it is /// re-built. - min_indent: Option, + min_indent: Option, /// Whether this is a directive block or not. When not a directive, this is /// a literal block. The main difference between them is that they start @@ -1216,11 +1219,11 @@ struct CodeExampleMarkdown<'src> { lines: Vec>, /// The indent of the line "opening" fence of this block measured via - /// `indentation_length`. + /// `indentation_length` (in columns). /// /// This indentation is trimmed from the indentation of every line in the /// body of the code block, - opening_fence_indent: TextSize, + opening_fence_indent: usize, /// The kind of fence, backticks or tildes, used for this block. We need to /// keep track of which kind was used to open the block in order to look @@ -1534,23 +1537,25 @@ fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1 } +/// Returns the indentation's visual width in columns/spaces. +/// /// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up /// to the next multiple of 8. This is effectively a port of /// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs), /// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61). -fn indentation_length(line: &str) -> TextSize { - let mut indentation = 0u32; +fn indentation_length(line: &str) -> usize { + let mut indentation = 0usize; for char in line.chars() { if char == '\t' { // Pad to the next multiple of tab_width indentation += 8 - (indentation.rem_euclid(8)); } else if char.is_whitespace() { - indentation += u32::from(char.text_len()); + indentation += char.len_utf8(); } else { break; } } - TextSize::new(indentation) + indentation } /// Trims at most `indent_len` indentation from the beginning of `line`. @@ -1560,11 +1565,11 @@ fn indentation_length(line: &str) -> TextSize { /// `indentation_length`. This is useful when one needs to trim some minimum /// level of indentation from a code snippet collected from a docstring before /// attempting to reformat it. -fn indentation_trim(indent_len: TextSize, line: &str) -> &str { - let mut seen_indent_len = 0u32; +fn indentation_trim(indent_len: usize, line: &str) -> &str { + let mut seen_indent_len = 0; let mut trimmed = line; for char in line.chars() { - if seen_indent_len >= indent_len.to_u32() { + if seen_indent_len >= indent_len { return trimmed; } if char == '\t' { @@ -1572,7 +1577,7 @@ fn indentation_trim(indent_len: TextSize, line: &str) -> &str { seen_indent_len += 8 - (seen_indent_len.rem_euclid(8)); trimmed = &trimmed[1..]; } else if char.is_whitespace() { - seen_indent_len += u32::from(char.text_len()); + seen_indent_len += char.len_utf8(); trimmed = &trimmed[char.len_utf8()..]; } else { break; @@ -1608,15 +1613,14 @@ fn is_rst_option(line: &str) -> bool { #[cfg(test)] mod tests { - use ruff_text_size::TextSize; use super::indentation_length; #[test] fn test_indentation_like_black() { - assert_eq!(indentation_length("\t \t \t"), TextSize::new(24)); - assert_eq!(indentation_length("\t \t"), TextSize::new(24)); - assert_eq!(indentation_length("\t\t\t"), TextSize::new(24)); - assert_eq!(indentation_length(" "), TextSize::new(4)); + assert_eq!(indentation_length("\t \t \t"), 24); + assert_eq!(indentation_length("\t \t"), 24); + assert_eq!(indentation_length("\t\t\t"), 24); + assert_eq!(indentation_length(" "), 4); } }