Memoize text width (#6552)

This commit is contained in:
Micha Reiser 2023-09-06 09:10:13 +02:00 committed by GitHub
parent fa6bff0078
commit 5f59101811
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 213 additions and 184 deletions

View file

@ -129,7 +129,7 @@ mod tests {
#[test]
fn test_nesting() {
let mut context = FormatState::new(());
let mut context = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut context);
write!(

View file

@ -9,7 +9,9 @@ use Tag::*;
use crate::format_element::tag::{Condition, Tag};
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
use crate::prelude::*;
use crate::{format_element, write, Argument, Arguments, FormatContext, GroupId, TextSize};
use crate::{
format_element, write, Argument, Arguments, FormatContext, FormatOptions, GroupId, TextSize,
};
use crate::{Buffer, VecBuffer};
/// A line break that only gets printed if the enclosing `Group` doesn't fit on a single line.
@ -348,7 +350,10 @@ pub struct Text<'a> {
position: Option<TextSize>,
}
impl<Context> Format<Context> for Text<'_> {
impl<Context> Format<Context> for Text<'_>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
if let Some(source_position) = self.position {
f.write_element(FormatElement::SourcePosition(source_position));
@ -356,6 +361,7 @@ impl<Context> Format<Context> for Text<'_> {
f.write_element(FormatElement::Text {
text: self.text.to_string().into_boxed_str(),
text_width: TextWidth::from_text(self.text, f.options().tab_width()),
});
Ok(())
@ -369,31 +375,13 @@ impl std::fmt::Debug for Text<'_> {
}
/// Emits a text as it is written in the source document. Optimized to avoid allocations.
pub const fn source_text_slice(
range: TextRange,
newlines: ContainsNewlines,
) -> SourceTextSliceBuilder {
SourceTextSliceBuilder {
range,
new_lines: newlines,
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum ContainsNewlines {
/// The string contains newline characters
Yes,
/// The string contains no newline characters
No,
/// The string may contain newline characters, search the string to determine if there are any newlines.
Detect,
pub const fn source_text_slice(range: TextRange) -> SourceTextSliceBuilder {
SourceTextSliceBuilder { range }
}
#[derive(Eq, PartialEq, Debug)]
pub struct SourceTextSliceBuilder {
range: TextRange,
new_lines: ContainsNewlines,
}
impl<Context> Format<Context> for SourceTextSliceBuilder
@ -405,28 +393,10 @@ where
let slice = source_code.slice(self.range);
debug_assert_no_newlines(slice.text(source_code));
let contains_newlines = match self.new_lines {
ContainsNewlines::Yes => {
debug_assert!(
slice.text(source_code).contains('\n'),
"Text contains no new line characters but the caller specified that it does."
);
true
}
ContainsNewlines::No => {
debug_assert!(
!slice.text(source_code).contains('\n'),
"Text contains new line characters but the caller specified that it does not."
);
false
}
ContainsNewlines::Detect => slice.text(source_code).contains('\n'),
};
let text_width =
TextWidth::from_text(slice.text(source_code), f.context().options().tab_width());
f.write_element(FormatElement::SourceCodeSlice {
slice,
contains_newlines,
});
f.write_element(FormatElement::SourceCodeSlice { slice, text_width });
Ok(())
}

View file

@ -3,12 +3,14 @@ pub mod tag;
use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::rc::Rc;
use unicode_width::UnicodeWidthChar;
use crate::format_element::tag::{GroupMode, LabelId, Tag};
use crate::source_code::SourceCodeSlice;
use crate::TagKind;
use crate::{TabWidth, TagKind};
use ruff_text_size::TextSize;
/// Language agnostic IR for formatting source code.
@ -37,13 +39,13 @@ pub enum FormatElement {
Text {
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
text_width: TextWidth,
},
/// Text that gets emitted as it is in the source code. Optimized to avoid any allocations.
SourceCodeSlice {
slice: SourceCodeSlice,
/// Whether the string contains any new line characters
contains_newlines: bool,
text_width: TextWidth,
},
/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
@ -73,13 +75,10 @@ impl std::fmt::Debug for FormatElement {
FormatElement::ExpandParent => write!(fmt, "ExpandParent"),
FormatElement::Token { text } => fmt.debug_tuple("Token").field(text).finish(),
FormatElement::Text { text, .. } => fmt.debug_tuple("DynamicText").field(text).finish(),
FormatElement::SourceCodeSlice {
slice,
contains_newlines,
} => fmt
FormatElement::SourceCodeSlice { slice, text_width } => fmt
.debug_tuple("Text")
.field(slice)
.field(contains_newlines)
.field(text_width)
.finish(),
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
FormatElement::BestFitting { variants, mode } => fmt
@ -255,11 +254,8 @@ impl FormatElements for FormatElement {
FormatElement::ExpandParent => true,
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),
FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
@ -403,6 +399,67 @@ pub trait FormatElements {
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
}
/// New-type wrapper for a single-line text unicode width.
/// Mainly to prevent access to the inner value.
///
/// ## Representation
///
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`],
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8.
///
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1.
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width
/// and thus, the text should break.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Width(NonZeroU32);
impl Width {
pub(crate) const fn new(width: u32) -> Self {
Width(NonZeroU32::MIN.saturating_add(width))
}
pub const fn value(self) -> u32 {
self.0.get() - 1
}
}
/// The pre-computed unicode width of a text if it is a single-line text or a marker
/// that it is a multiline text if it contains a line feed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum TextWidth {
Width(Width),
Multiline,
}
impl TextWidth {
pub fn from_text(text: &str, tab_width: TabWidth) -> TextWidth {
let mut width = 0u32;
for c in text.chars() {
let char_width = match c {
'\t' => tab_width.value(),
'\n' => return TextWidth::Multiline,
#[allow(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u32,
};
width += char_width;
}
Self::Width(Width::new(width))
}
pub const fn width(self) -> Option<Width> {
match self {
TextWidth::Width(width) => Some(width),
TextWidth::Multiline => None,
}
}
pub(crate) const fn is_multiline(self) -> bool {
matches!(self, TextWidth::Multiline)
}
}
#[cfg(test)]
mod tests {
@ -430,19 +487,21 @@ mod sizes {
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!
use super::{BestFittingVariants, Interned, TextWidth};
use static_assertions::assert_eq_size;
assert_eq_size!(ruff_text_size::TextRange, [u8; 8]);
assert_eq_size!(crate::prelude::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(crate::prelude::Interned, [u8; 16]);
assert_eq_size!(crate::format_element::BestFittingVariants, [u8; 16]);
assert_eq_size!(TextWidth, [u8; 4]);
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(Interned, [u8; 16]);
assert_eq_size!(BestFittingVariants, [u8; 16]);
#[cfg(not(debug_assertions))]
assert_eq_size!(crate::SourceCodeSlice, [u8; 8]);
#[cfg(not(debug_assertions))]
assert_eq_size!(crate::format_element::Tag, [u8; 16]);
assert_eq_size!(super::Tag, [u8; 16]);
#[cfg(not(debug_assertions))]
assert_eq_size!(crate::FormatElement, [u8; 24]);
assert_eq_size!(super::FormatElement, [u8; 24]);
}

View file

@ -104,10 +104,11 @@ impl Document {
expands = false;
continue;
}
FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::SourceCodeSlice {
contains_newlines, ..
} => *contains_newlines,
FormatElement::Text {
text: _,
text_width,
} => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
_ => false,
@ -259,11 +260,16 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
| FormatElement::Text { .. }
| FormatElement::SourceCodeSlice { .. }) => {
fn write_escaped(element: &FormatElement, f: &mut Formatter<IrFormatContext>) {
let text = match element {
FormatElement::Token { text } => text,
FormatElement::Text { text } => text.as_ref(),
FormatElement::SourceCodeSlice { slice, .. } => {
slice.text(f.context().source_code())
let (text, text_width) = match element {
#[allow(clippy::cast_possible_truncation)]
FormatElement::Token { text } => {
(*text, TextWidth::Width(Width::new(text.len() as u32)))
}
FormatElement::Text { text, text_width } => {
(text.as_ref(), *text_width)
}
FormatElement::SourceCodeSlice { slice, text_width } => {
(slice.text(f.context().source_code()), *text_width)
}
_ => unreachable!(),
};
@ -271,6 +277,7 @@ impl Format<IrFormatContext<'_>> for &[FormatElement] {
if text.contains('"') {
f.write_element(FormatElement::Text {
text: text.replace('"', r#"\""#).into(),
text_width,
});
} else {
f.write_element(element.clone());
@ -854,15 +861,9 @@ mod tests {
[group(&format_args![
token("("),
soft_block_indent(&format_args![
source_text_slice(
TextRange::at(TextSize::new(0), TextSize::new(19)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(0), TextSize::new(19)),),
space(),
source_text_slice(
TextRange::at(TextSize::new(20), TextSize::new(28)),
ContainsNewlines::No
),
source_text_slice(TextRange::at(TextSize::new(20), TextSize::new(28)),),
])
])]
)

View file

@ -343,15 +343,15 @@ mod tests {
struct TestFormat;
impl Format<()> for TestFormat {
fn fmt(&self, f: &mut Formatter<()>) -> FormatResult<()> {
impl Format<SimpleFormatContext> for TestFormat {
fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
write!(f, [token("test")])
}
}
#[test]
fn test_single_element() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);
write![&mut buffer, [TestFormat]].unwrap();
@ -364,7 +364,7 @@ mod tests {
#[test]
fn test_multiple_elements() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);
write![

View file

@ -9,8 +9,8 @@ use ruff_text_size::{Ranged, TextLen, TextSize};
use crate::format_element::document::Document;
use crate::format_element::tag::{Condition, GroupMode};
use crate::format_element::{BestFittingMode, BestFittingVariants, LineMode, PrintMode};
use crate::prelude::tag;
use crate::prelude::tag::{DedentMode, Tag, TagKind, VerbatimKind};
use crate::prelude::{tag, TextWidth};
use crate::printer::call_stack::{
CallStack, FitsCallStack, PrintCallStack, PrintElementArgs, StackFrame,
};
@ -96,10 +96,22 @@ impl<'a> Printer<'a> {
match element {
FormatElement::Space => self.print_text(Text::Token(" "), None),
FormatElement::Token { text } => self.print_text(Text::Token(text), None),
FormatElement::Text { text } => self.print_text(Text::Text(text), None),
FormatElement::SourceCodeSlice { slice, .. } => {
FormatElement::Text { text, text_width } => self.print_text(
Text::Text {
text,
text_width: *text_width,
},
None,
),
FormatElement::SourceCodeSlice { slice, text_width } => {
let text = slice.text(self.source_code);
self.print_text(Text::Text(text), Some(slice.range()));
self.print_text(
Text::Text {
text,
text_width: *text_width,
},
Some(slice.range()),
);
}
FormatElement::Line(line_mode) => {
if args.mode().is_flat()
@ -395,9 +407,17 @@ impl<'a> Printer<'a> {
self.state.buffer.push_str(token);
self.state.line_width += token.len() as u32;
}
Text::Text(text) => {
for char in text.chars() {
self.print_char(char);
Text::Text {
text,
text_width: width,
} => {
if let Some(width) = width.width() {
self.state.buffer.push_str(text);
self.state.line_width += width.value();
} else {
for char in text.chars() {
self.print_char(char);
}
}
}
}
@ -1086,10 +1106,24 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> {
}
FormatElement::Token { text } => return Ok(self.fits_text(Text::Token(text), args)),
FormatElement::Text { text, .. } => return Ok(self.fits_text(Text::Text(text), args)),
FormatElement::SourceCodeSlice { slice, .. } => {
FormatElement::Text { text, text_width } => {
return Ok(self.fits_text(
Text::Text {
text,
text_width: *text_width,
},
args,
))
}
FormatElement::SourceCodeSlice { slice, text_width } => {
let text = slice.text(self.printer.source_code);
return Ok(self.fits_text(Text::Text(text), args));
return Ok(self.fits_text(
Text::Text {
text,
text_width: *text_width,
},
args,
));
}
FormatElement::LineSuffixBoundary => {
if self.state.has_line_suffix {
@ -1307,27 +1341,31 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> {
Text::Token(token) => {
self.state.line_width += token.len() as u32;
}
Text::Text(text) => {
for c in text.chars() {
let char_width = match c {
'\t' => self.options().tab_width.value(),
'\n' => {
if self.must_be_flat {
return Fits::No;
}
match args.measure_mode() {
MeasureMode::FirstLine => return Fits::Yes,
MeasureMode::AllLines => {
self.state.line_width = 0;
continue;
Text::Text { text, text_width } => {
if let Some(width) = text_width.width() {
self.state.line_width += width.value();
} else {
for c in text.chars() {
let char_width = match c {
'\t' => self.options().tab_width.value(),
'\n' => {
if self.must_be_flat {
return Fits::No;
}
match args.measure_mode() {
MeasureMode::FirstLine => return Fits::Yes,
MeasureMode::AllLines => {
self.state.line_width = 0;
continue;
}
}
}
}
// SAFETY: A u32 is sufficient to format files <= 4GB
#[allow(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u32,
};
self.state.line_width += char_width;
// SAFETY: A u32 is sufficient to format files <= 4GB
#[allow(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u32,
};
self.state.line_width += char_width;
}
}
}
}
@ -1451,7 +1489,10 @@ enum Text<'a> {
/// ASCII only text that contains no line breaks or tab characters.
Token(&'a str),
/// Arbitrary text. May contain `\n` line breaks, tab characters, or unicode characters.
Text(&'a str),
Text {
text: &'a str,
text_width: TextWidth,
},
}
#[cfg(test)]
@ -1669,7 +1710,7 @@ two lines`,
#[test]
fn test_fill_breaks() {
let mut state = FormatState::new(());
let mut state = FormatState::new(SimpleFormatContext::default());
let mut buffer = VecBuffer::new(&mut state);
let mut formatter = Formatter::new(&mut buffer);