mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 02:38:25 +00:00
Memoize text width (#6552)
This commit is contained in:
parent
fa6bff0078
commit
5f59101811
14 changed files with 213 additions and 184 deletions
|
@ -129,7 +129,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_nesting() {
|
||||
let mut context = FormatState::new(());
|
||||
let mut context = FormatState::new(SimpleFormatContext::default());
|
||||
let mut buffer = VecBuffer::new(&mut context);
|
||||
|
||||
write!(
|
||||
|
|
|
@ -9,7 +9,9 @@ use Tag::*;
|
|||
use crate::format_element::tag::{Condition, Tag};
|
||||
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
|
||||
use crate::prelude::*;
|
||||
use crate::{format_element, write, Argument, Arguments, FormatContext, GroupId, TextSize};
|
||||
use crate::{
|
||||
format_element, write, Argument, Arguments, FormatContext, FormatOptions, GroupId, TextSize,
|
||||
};
|
||||
use crate::{Buffer, VecBuffer};
|
||||
|
||||
/// A line break that only gets printed if the enclosing `Group` doesn't fit on a single line.
|
||||
|
@ -348,7 +350,10 @@ pub struct Text<'a> {
|
|||
position: Option<TextSize>,
|
||||
}
|
||||
|
||||
impl<Context> Format<Context> for Text<'_> {
|
||||
impl<Context> Format<Context> for Text<'_>
|
||||
where
|
||||
Context: FormatContext,
|
||||
{
|
||||
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
|
||||
if let Some(source_position) = self.position {
|
||||
f.write_element(FormatElement::SourcePosition(source_position));
|
||||
|
@ -356,6 +361,7 @@ impl<Context> Format<Context> for Text<'_> {
|
|||
|
||||
f.write_element(FormatElement::Text {
|
||||
text: self.text.to_string().into_boxed_str(),
|
||||
text_width: TextWidth::from_text(self.text, f.options().tab_width()),
|
||||
});
|
||||
|
||||
Ok(())
|
||||
|
@ -369,31 +375,13 @@ impl std::fmt::Debug for Text<'_> {
|
|||
}
|
||||
|
||||
/// Emits a text as it is written in the source document. Optimized to avoid allocations.
|
||||
pub const fn source_text_slice(
|
||||
range: TextRange,
|
||||
newlines: ContainsNewlines,
|
||||
) -> SourceTextSliceBuilder {
|
||||
SourceTextSliceBuilder {
|
||||
range,
|
||||
new_lines: newlines,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
pub enum ContainsNewlines {
|
||||
/// The string contains newline characters
|
||||
Yes,
|
||||
/// The string contains no newline characters
|
||||
No,
|
||||
|
||||
/// The string may contain newline characters, search the string to determine if there are any newlines.
|
||||
Detect,
|
||||
pub const fn source_text_slice(range: TextRange) -> SourceTextSliceBuilder {
|
||||
SourceTextSliceBuilder { range }
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub struct SourceTextSliceBuilder {
|
||||
range: TextRange,
|
||||
new_lines: ContainsNewlines,
|
||||
}
|
||||
|
||||
impl<Context> Format<Context> for SourceTextSliceBuilder
|
||||
|
@ -405,28 +393,10 @@ where
|
|||
let slice = source_code.slice(self.range);
|
||||
debug_assert_no_newlines(slice.text(source_code));
|
||||
|
||||
let contains_newlines = match self.new_lines {
|
||||
ContainsNewlines::Yes => {
|
||||
debug_assert!(
|
||||
slice.text(source_code).contains('\n'),
|
||||
"Text contains no new line characters but the caller specified that it does."
|
||||
);
|
||||
true
|
||||
}
|
||||
ContainsNewlines::No => {
|
||||
debug_assert!(
|
||||
!slice.text(source_code).contains('\n'),
|
||||
"Text contains new line characters but the caller specified that it does not."
|
||||
);
|
||||
false
|
||||
}
|
||||
ContainsNewlines::Detect => slice.text(source_code).contains('\n'),
|
||||
};
|
||||
let text_width =
|
||||
TextWidth::from_text(slice.text(source_code), f.context().options().tab_width());
|
||||
|
||||
f.write_element(FormatElement::SourceCodeSlice {
|
||||
slice,
|
||||
contains_newlines,
|
||||
});
|
||||
f.write_element(FormatElement::SourceCodeSlice { slice, text_width });
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -3,12 +3,14 @@ pub mod tag;
|
|||
|
||||
use std::borrow::Cow;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::num::NonZeroU32;
|
||||
use std::ops::Deref;
|
||||
use std::rc::Rc;
|
||||
use unicode_width::UnicodeWidthChar;
|
||||
|
||||
use crate::format_element::tag::{GroupMode, LabelId, Tag};
|
||||
use crate::source_code::SourceCodeSlice;
|
||||
use crate::TagKind;
|
||||
use crate::{TabWidth, TagKind};
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
/// Language agnostic IR for formatting source code.
|
||||
|
@ -37,13 +39,13 @@ pub enum FormatElement {
|
|||
Text {
|
||||
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
|
||||
text: Box<str>,
|
||||
text_width: TextWidth,
|
||||
},
|
||||
|
||||
/// Text that gets emitted as it is in the source code. Optimized to avoid any allocations.
|
||||
SourceCodeSlice {
|
||||
slice: SourceCodeSlice,
|
||||
/// Whether the string contains any new line characters
|
||||
contains_newlines: bool,
|
||||
text_width: TextWidth,
|
||||
},
|
||||
|
||||
/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
|
||||
|
@ -73,13 +75,10 @@ impl std::fmt::Debug for FormatElement {
|
|||
FormatElement::ExpandParent => write!(fmt, "ExpandParent"),
|
||||
FormatElement::Token { text } => fmt.debug_tuple("Token").field(text).finish(),
|
||||
FormatElement::Text { text, .. } => fmt.debug_tuple("DynamicText").field(text).finish(),
|
||||
FormatElement::SourceCodeSlice {
|
||||
slice,
|
||||
contains_newlines,
|
||||
} => fmt
|
||||
FormatElement::SourceCodeSlice { slice, text_width } => fmt
|
||||
.debug_tuple("Text")
|
||||
.field(slice)
|
||||
.field(contains_newlines)
|
||||
.field(text_width)
|
||||
.finish(),
|
||||
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
|
||||
FormatElement::BestFitting { variants, mode } => fmt
|
||||
|
@ -255,11 +254,8 @@ impl FormatElements for FormatElement {
|
|||
FormatElement::ExpandParent => true,
|
||||
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
|
||||
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),
|
||||
|
||||
FormatElement::Text { text, .. } => text.contains('\n'),
|
||||
FormatElement::SourceCodeSlice {
|
||||
contains_newlines, ..
|
||||
} => *contains_newlines,
|
||||
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
|
||||
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
|
||||
FormatElement::Interned(interned) => interned.will_break(),
|
||||
// Traverse into the most flat version because the content is guaranteed to expand when even
|
||||
// the most flat version contains some content that forces a break.
|
||||
|
@ -403,6 +399,67 @@ pub trait FormatElements {
|
|||
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
|
||||
}
|
||||
|
||||
/// New-type wrapper for a single-line text unicode width.
|
||||
/// Mainly to prevent access to the inner value.
|
||||
///
|
||||
/// ## Representation
|
||||
///
|
||||
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`],
|
||||
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8.
|
||||
///
|
||||
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1.
|
||||
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width
|
||||
/// and thus, the text should break.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub struct Width(NonZeroU32);
|
||||
|
||||
impl Width {
|
||||
pub(crate) const fn new(width: u32) -> Self {
|
||||
Width(NonZeroU32::MIN.saturating_add(width))
|
||||
}
|
||||
|
||||
pub const fn value(self) -> u32 {
|
||||
self.0.get() - 1
|
||||
}
|
||||
}
|
||||
|
||||
/// The pre-computed unicode width of a text if it is a single-line text or a marker
|
||||
/// that it is a multiline text if it contains a line feed.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum TextWidth {
|
||||
Width(Width),
|
||||
Multiline,
|
||||
}
|
||||
|
||||
impl TextWidth {
|
||||
pub fn from_text(text: &str, tab_width: TabWidth) -> TextWidth {
|
||||
let mut width = 0u32;
|
||||
|
||||
for c in text.chars() {
|
||||
let char_width = match c {
|
||||
'\t' => tab_width.value(),
|
||||
'\n' => return TextWidth::Multiline,
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
c => c.width().unwrap_or(0) as u32,
|
||||
};
|
||||
width += char_width;
|
||||
}
|
||||
|
||||
Self::Width(Width::new(width))
|
||||
}
|
||||
|
||||
pub const fn width(self) -> Option<Width> {
|
||||
match self {
|
||||
TextWidth::Width(width) => Some(width),
|
||||
TextWidth::Multiline => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn is_multiline(self) -> bool {
|
||||
matches!(self, TextWidth::Multiline)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
|
@ -430,19 +487,21 @@ mod sizes {
|
|||
// be recomputed at a later point in time?
|
||||
// You reduced the size of a format element? Excellent work!
|
||||
|
||||
use super::{BestFittingVariants, Interned, TextWidth};
|
||||
use static_assertions::assert_eq_size;
|
||||
|
||||
assert_eq_size!(ruff_text_size::TextRange, [u8; 8]);
|
||||
assert_eq_size!(crate::prelude::tag::VerbatimKind, [u8; 8]);
|
||||
assert_eq_size!(crate::prelude::Interned, [u8; 16]);
|
||||
assert_eq_size!(crate::format_element::BestFittingVariants, [u8; 16]);
|
||||
assert_eq_size!(TextWidth, [u8; 4]);
|
||||
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]);
|
||||
assert_eq_size!(Interned, [u8; 16]);
|
||||
assert_eq_size!(BestFittingVariants, [u8; 16]);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
assert_eq_size!(crate::SourceCodeSlice, [u8; 8]);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
assert_eq_size!(crate::format_element::Tag, [u8; 16]);
|
||||
assert_eq_size!(super::Tag, [u8; 16]);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
assert_eq_size!(crate::FormatElement, [u8; 24]);
|
||||
assert_eq_size!(super::FormatElement, [u8; 24]);
|
||||
}
|
||||
|
|
|
@ -104,10 +104,11 @@ impl Document {
|
|||
expands = false;
|
||||
continue;
|
||||
}
|
||||
FormatElement::Text { text, .. } => text.contains('\n'),
|
||||
FormatElement::SourceCodeSlice {
|
||||
contains_newlines, ..
|
||||
} => *contains_newlines,
|
||||
FormatElement::Text {
|
||||
text: _,
|
||||
text_width,
|
||||
} => text_width.is_multiline(),
|
||||
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
|
||||
FormatElement::ExpandParent
|
||||
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
|
||||
_ => false,
|
||||
|
@ -259,11 +260,16 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
|
|||
| FormatElement::Text { .. }
|
||||
| FormatElement::SourceCodeSlice { .. }) => {
|
||||
fn write_escaped(element: &FormatElement, f: &mut Formatter<IrFormatContext>) {
|
||||
let text = match element {
|
||||
FormatElement::Token { text } => text,
|
||||
FormatElement::Text { text } => text.as_ref(),
|
||||
FormatElement::SourceCodeSlice { slice, .. } => {
|
||||
slice.text(f.context().source_code())
|
||||
let (text, text_width) = match element {
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
FormatElement::Token { text } => {
|
||||
(*text, TextWidth::Width(Width::new(text.len() as u32)))
|
||||
}
|
||||
FormatElement::Text { text, text_width } => {
|
||||
(text.as_ref(), *text_width)
|
||||
}
|
||||
FormatElement::SourceCodeSlice { slice, text_width } => {
|
||||
(slice.text(f.context().source_code()), *text_width)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
@ -271,6 +277,7 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
|
|||
if text.contains('"') {
|
||||
f.write_element(FormatElement::Text {
|
||||
text: text.replace('"', r#"\""#).into(),
|
||||
text_width,
|
||||
});
|
||||
} else {
|
||||
f.write_element(element.clone());
|
||||
|
@ -854,15 +861,9 @@ mod tests {
|
|||
[group(&format_args![
|
||||
token("("),
|
||||
soft_block_indent(&format_args![
|
||||
source_text_slice(
|
||||
TextRange::at(TextSize::new(0), TextSize::new(19)),
|
||||
ContainsNewlines::No
|
||||
),
|
||||
source_text_slice(TextRange::at(TextSize::new(0), TextSize::new(19)),),
|
||||
space(),
|
||||
source_text_slice(
|
||||
TextRange::at(TextSize::new(20), TextSize::new(28)),
|
||||
ContainsNewlines::No
|
||||
),
|
||||
source_text_slice(TextRange::at(TextSize::new(20), TextSize::new(28)),),
|
||||
])
|
||||
])]
|
||||
)
|
||||
|
|
|
@ -343,15 +343,15 @@ mod tests {
|
|||
|
||||
struct TestFormat;
|
||||
|
||||
impl Format<()> for TestFormat {
|
||||
fn fmt(&self, f: &mut Formatter<()>) -> FormatResult<()> {
|
||||
impl Format<SimpleFormatContext> for TestFormat {
|
||||
fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
|
||||
write!(f, [token("test")])
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_element() {
|
||||
let mut state = FormatState::new(());
|
||||
let mut state = FormatState::new(SimpleFormatContext::default());
|
||||
let mut buffer = VecBuffer::new(&mut state);
|
||||
|
||||
write![&mut buffer, [TestFormat]].unwrap();
|
||||
|
@ -364,7 +364,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_multiple_elements() {
|
||||
let mut state = FormatState::new(());
|
||||
let mut state = FormatState::new(SimpleFormatContext::default());
|
||||
let mut buffer = VecBuffer::new(&mut state);
|
||||
|
||||
write![
|
||||
|
|
|
@ -9,8 +9,8 @@ use ruff_text_size::{Ranged, TextLen, TextSize};
|
|||
use crate::format_element::document::Document;
|
||||
use crate::format_element::tag::{Condition, GroupMode};
|
||||
use crate::format_element::{BestFittingMode, BestFittingVariants, LineMode, PrintMode};
|
||||
use crate::prelude::tag;
|
||||
use crate::prelude::tag::{DedentMode, Tag, TagKind, VerbatimKind};
|
||||
use crate::prelude::{tag, TextWidth};
|
||||
use crate::printer::call_stack::{
|
||||
CallStack, FitsCallStack, PrintCallStack, PrintElementArgs, StackFrame,
|
||||
};
|
||||
|
@ -96,10 +96,22 @@ impl<'a> Printer<'a> {
|
|||
match element {
|
||||
FormatElement::Space => self.print_text(Text::Token(" "), None),
|
||||
FormatElement::Token { text } => self.print_text(Text::Token(text), None),
|
||||
FormatElement::Text { text } => self.print_text(Text::Text(text), None),
|
||||
FormatElement::SourceCodeSlice { slice, .. } => {
|
||||
FormatElement::Text { text, text_width } => self.print_text(
|
||||
Text::Text {
|
||||
text,
|
||||
text_width: *text_width,
|
||||
},
|
||||
None,
|
||||
),
|
||||
FormatElement::SourceCodeSlice { slice, text_width } => {
|
||||
let text = slice.text(self.source_code);
|
||||
self.print_text(Text::Text(text), Some(slice.range()));
|
||||
self.print_text(
|
||||
Text::Text {
|
||||
text,
|
||||
text_width: *text_width,
|
||||
},
|
||||
Some(slice.range()),
|
||||
);
|
||||
}
|
||||
FormatElement::Line(line_mode) => {
|
||||
if args.mode().is_flat()
|
||||
|
@ -395,9 +407,17 @@ impl<'a> Printer<'a> {
|
|||
self.state.buffer.push_str(token);
|
||||
self.state.line_width += token.len() as u32;
|
||||
}
|
||||
Text::Text(text) => {
|
||||
for char in text.chars() {
|
||||
self.print_char(char);
|
||||
Text::Text {
|
||||
text,
|
||||
text_width: width,
|
||||
} => {
|
||||
if let Some(width) = width.width() {
|
||||
self.state.buffer.push_str(text);
|
||||
self.state.line_width += width.value();
|
||||
} else {
|
||||
for char in text.chars() {
|
||||
self.print_char(char);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1086,10 +1106,24 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> {
|
|||
}
|
||||
|
||||
FormatElement::Token { text } => return Ok(self.fits_text(Text::Token(text), args)),
|
||||
FormatElement::Text { text, .. } => return Ok(self.fits_text(Text::Text(text), args)),
|
||||
FormatElement::SourceCodeSlice { slice, .. } => {
|
||||
FormatElement::Text { text, text_width } => {
|
||||
return Ok(self.fits_text(
|
||||
Text::Text {
|
||||
text,
|
||||
text_width: *text_width,
|
||||
},
|
||||
args,
|
||||
))
|
||||
}
|
||||
FormatElement::SourceCodeSlice { slice, text_width } => {
|
||||
let text = slice.text(self.printer.source_code);
|
||||
return Ok(self.fits_text(Text::Text(text), args));
|
||||
return Ok(self.fits_text(
|
||||
Text::Text {
|
||||
text,
|
||||
text_width: *text_width,
|
||||
},
|
||||
args,
|
||||
));
|
||||
}
|
||||
FormatElement::LineSuffixBoundary => {
|
||||
if self.state.has_line_suffix {
|
||||
|
@ -1307,27 +1341,31 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> {
|
|||
Text::Token(token) => {
|
||||
self.state.line_width += token.len() as u32;
|
||||
}
|
||||
Text::Text(text) => {
|
||||
for c in text.chars() {
|
||||
let char_width = match c {
|
||||
'\t' => self.options().tab_width.value(),
|
||||
'\n' => {
|
||||
if self.must_be_flat {
|
||||
return Fits::No;
|
||||
}
|
||||
match args.measure_mode() {
|
||||
MeasureMode::FirstLine => return Fits::Yes,
|
||||
MeasureMode::AllLines => {
|
||||
self.state.line_width = 0;
|
||||
continue;
|
||||
Text::Text { text, text_width } => {
|
||||
if let Some(width) = text_width.width() {
|
||||
self.state.line_width += width.value();
|
||||
} else {
|
||||
for c in text.chars() {
|
||||
let char_width = match c {
|
||||
'\t' => self.options().tab_width.value(),
|
||||
'\n' => {
|
||||
if self.must_be_flat {
|
||||
return Fits::No;
|
||||
}
|
||||
match args.measure_mode() {
|
||||
MeasureMode::FirstLine => return Fits::Yes,
|
||||
MeasureMode::AllLines => {
|
||||
self.state.line_width = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// SAFETY: A u32 is sufficient to format files <= 4GB
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
c => c.width().unwrap_or(0) as u32,
|
||||
};
|
||||
self.state.line_width += char_width;
|
||||
// SAFETY: A u32 is sufficient to format files <= 4GB
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
c => c.width().unwrap_or(0) as u32,
|
||||
};
|
||||
self.state.line_width += char_width;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1451,7 +1489,10 @@ enum Text<'a> {
|
|||
/// ASCII only text that contains no line breaks or tab characters.
|
||||
Token(&'a str),
|
||||
/// Arbitrary text. May contain `\n` line breaks, tab characters, or unicode characters.
|
||||
Text(&'a str),
|
||||
Text {
|
||||
text: &'a str,
|
||||
text_width: TextWidth,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1669,7 +1710,7 @@ two lines`,
|
|||
|
||||
#[test]
|
||||
fn test_fill_breaks() {
|
||||
let mut state = FormatState::new(());
|
||||
let mut state = FormatState::new(SimpleFormatContext::default());
|
||||
let mut buffer = VecBuffer::new(&mut state);
|
||||
let mut formatter = Formatter::new(&mut buffer);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue