Add rome_formatter fork as ruff_formatter (#2872)

The Ruff autoformatter is going to be based on an intermediate representation (IR) formatted via [Wadler's algorithm](https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf). This is architecturally similar to [Rome](https://github.com/rome/tools), Prettier, [Skip](https://github.com/skiplang/skip/blob/master/src/tools/printer/printer.sk), and others.

This PR adds a fork of the `rome_formatter` crate from [Rome](https://github.com/rome/tools), renamed here to `ruff_formatter`, which provides generic definitions for a formatter IR as well as a generic IR printer. (We've also pulled in `rome_rowan`, `rome_text_size`, and `rome_text_edit`, though some of these will be removed in future PRs.)

Why fork? `rome_formatter` contains code that's specific to Rome's AST representation (e.g., it relies on a fork of rust-analyzer's `rowan`), and we'll likely want to support different abstractions and formatting capabilities (there are already a few changes coming in future PRs). Once we've dropped `ruff_rowan` and trimmed down `ruff_formatter` to the code we currently need, it's also not a huge surface area to maintain and update.
This commit is contained in:
Charlie Marsh 2023-02-14 19:22:55 -05:00 committed by GitHub
parent ac028cd9f8
commit 3ef1c2e303
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
83 changed files with 27547 additions and 1 deletions

View file

@ -0,0 +1,160 @@
use super::{Buffer, Format, Formatter};
use crate::FormatResult;
use std::ffi::c_void;
use std::marker::PhantomData;
/// Mono-morphed type to format an object. Used by the [crate::format!], [crate::format_args!], and
/// [crate::write!] macros.
///
/// This struct is similar to a dynamic dispatch (using `dyn Format`) because it stores a pointer to the value.
/// However, it doesn't store the pointer to `dyn Format`'s vtable, instead it statically resolves the function
/// pointer of `Format::format` and stores it in `formatter`.
pub struct Argument<'fmt, Context> {
/// The value to format stored as a raw pointer where `lifetime` stores the value's lifetime.
value: *const c_void,
/// Stores the lifetime of the value. To get the most out of our dear borrow checker.
lifetime: PhantomData<&'fmt ()>,
/// The function pointer to `value`'s `Format::format` method
formatter: fn(*const c_void, &mut Formatter<'_, Context>) -> FormatResult<()>,
}
impl<Context> Clone for Argument<'_, Context> {
fn clone(&self) -> Self {
*self
}
}
impl<Context> Copy for Argument<'_, Context> {}
impl<'fmt, Context> Argument<'fmt, Context> {
/// Called by the [ruff_formatter::format_args] macro. Creates a mono-morphed value for formatting
/// an object.
#[doc(hidden)]
#[inline]
pub fn new<F: Format<Context>>(value: &'fmt F) -> Self {
#[inline(always)]
fn formatter<F: Format<Context>, Context>(
ptr: *const c_void,
fmt: &mut Formatter<Context>,
) -> FormatResult<()> {
// SAFETY: Safe because the 'fmt lifetime is captured by the 'lifetime' field.
F::fmt(unsafe { &*(ptr as *const F) }, fmt)
}
Self {
value: value as *const F as *const c_void,
lifetime: PhantomData,
formatter: formatter::<F, Context>,
}
}
/// Formats the value stored by this argument using the given formatter.
#[inline(always)]
pub(super) fn format(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
(self.formatter)(self.value, f)
}
}
/// Sequence of objects that should be formatted in the specified order.
///
/// The [`format_args!`] macro will safely create an instance of this structure.
///
/// You can use the `Arguments<a>` that [`format_args!]` return in `Format` context as seen below.
/// It will call the `format` function for every of it's objects.
///
/// ```rust
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, format_args};
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [
/// format_args!(text("a"), space(), text("b"))
/// ])?;
///
/// assert_eq!("a b", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
pub struct Arguments<'fmt, Context>(pub &'fmt [Argument<'fmt, Context>]);
impl<'fmt, Context> Arguments<'fmt, Context> {
#[doc(hidden)]
#[inline(always)]
pub fn new(arguments: &'fmt [Argument<'fmt, Context>]) -> Self {
Self(arguments)
}
/// Returns the arguments
#[inline]
pub(super) fn items(&self) -> &'fmt [Argument<'fmt, Context>] {
self.0
}
}
impl<Context> Copy for Arguments<'_, Context> {}
impl<Context> Clone for Arguments<'_, Context> {
fn clone(&self) -> Self {
Self(self.0)
}
}
impl<Context> Format<Context> for Arguments<'_, Context> {
#[inline(always)]
fn fmt(&self, formatter: &mut Formatter<Context>) -> FormatResult<()> {
formatter.write_fmt(*self)
}
}
impl<Context> std::fmt::Debug for Arguments<'_, Context> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("Arguments[...]")
}
}
impl<'fmt, Context> From<&'fmt Argument<'fmt, Context>> for Arguments<'fmt, Context> {
fn from(argument: &'fmt Argument<'fmt, Context>) -> Self {
Arguments::new(std::slice::from_ref(argument))
}
}
#[cfg(test)]
mod tests {
use crate::format_element::tag::Tag;
use crate::prelude::*;
use crate::{format_args, write, FormatState, VecBuffer};
#[test]
fn test_nesting() {
let mut context = FormatState::new(());
let mut buffer = VecBuffer::new(&mut context);
write!(
&mut buffer,
[
text("function"),
space(),
text("a"),
space(),
group(&format_args!(text("("), text(")")))
]
)
.unwrap();
assert_eq!(
buffer.into_vec(),
vec![
FormatElement::StaticText { text: "function" },
FormatElement::Space,
FormatElement::StaticText { text: "a" },
FormatElement::Space,
// Group
FormatElement::Tag(Tag::StartGroup(tag::Group::new())),
FormatElement::StaticText { text: "(" },
FormatElement::StaticText { text: ")" },
FormatElement::Tag(Tag::EndGroup)
]
);
}
}

View file

@ -0,0 +1,723 @@
use super::{write, Arguments, FormatElement};
use crate::format_element::Interned;
use crate::prelude::LineMode;
use crate::{Format, FormatResult, FormatState};
use rustc_hash::FxHashMap;
use std::any::{Any, TypeId};
use std::fmt::Debug;
use std::ops::{Deref, DerefMut};
/// A trait for writing or formatting into [FormatElement]-accepting buffers or streams.
pub trait Buffer {
/// The context used during formatting
type Context;
/// Writes a [crate::FormatElement] into this buffer, returning whether the write succeeded.
///
/// # Errors
/// This function will return an instance of [crate::FormatError] on error.
///
/// # Examples
///
/// ```
/// use ruff_formatter::{Buffer, FormatElement, FormatState, SimpleFormatContext, VecBuffer};
///
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// buffer.write_element(FormatElement::StaticText { text: "test"}).unwrap();
///
/// assert_eq!(buffer.into_vec(), vec![FormatElement::StaticText { text: "test" }]);
/// ```
///
fn write_element(&mut self, element: FormatElement) -> FormatResult<()>;
/// Returns a slice containing all elements written into this buffer.
///
/// Prefer using [BufferExtensions::start_recording] over accessing [Buffer::elements] directly.
#[doc(hidden)]
fn elements(&self) -> &[FormatElement];
/// Glue for usage of the [`write!`] macro with implementors of this trait.
///
/// This method should generally not be invoked manually, but rather through the [`write!`] macro itself.
///
/// # Examples
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{Buffer, FormatState, SimpleFormatContext, VecBuffer, format_args};
///
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// buffer.write_fmt(format_args!(text("Hello World"))).unwrap();
///
/// assert_eq!(buffer.into_vec(), vec![FormatElement::StaticText{ text: "Hello World" }]);
/// ```
fn write_fmt(mut self: &mut Self, arguments: Arguments<Self::Context>) -> FormatResult<()> {
write(&mut self, arguments)
}
/// Returns the formatting state relevant for this formatting session.
fn state(&self) -> &FormatState<Self::Context>;
/// Returns the mutable formatting state relevant for this formatting session.
fn state_mut(&mut self) -> &mut FormatState<Self::Context>;
/// Takes a snapshot of the Buffers state, excluding the formatter state.
fn snapshot(&self) -> BufferSnapshot;
/// Restores the snapshot buffer
///
/// ## Panics
/// If the passed snapshot id is a snapshot of another buffer OR
/// if the snapshot is restored out of order
fn restore_snapshot(&mut self, snapshot: BufferSnapshot);
}
/// Snapshot of a buffer state that can be restored at a later point.
///
/// Used in cases where the formatting of an object fails but a parent formatter knows an alternative
/// strategy on how to format the object that might succeed.
#[derive(Debug)]
pub enum BufferSnapshot {
/// Stores an absolute position of a buffers state, for example, the offset of the last written element.
Position(usize),
/// Generic structure for custom buffers that need to store more complex data. Slightly more
/// expensive because it requires allocating the buffer state on the heap.
Any(Box<dyn Any>),
}
impl BufferSnapshot {
/// Creates a new buffer snapshot that points to the specified position.
pub const fn position(index: usize) -> Self {
Self::Position(index)
}
/// Unwraps the position value.
///
/// # Panics
///
/// If self is not a [`BufferSnapshot::Position`]
pub fn unwrap_position(&self) -> usize {
match self {
BufferSnapshot::Position(index) => *index,
BufferSnapshot::Any(_) => panic!("Tried to unwrap Any snapshot as a position."),
}
}
/// Unwraps the any value.
///
/// # Panics
///
/// If `self` is not a [`BufferSnapshot::Any`].
pub fn unwrap_any<T: 'static>(self) -> T {
match self {
BufferSnapshot::Position(_) => {
panic!("Tried to unwrap Position snapshot as Any snapshot.")
}
BufferSnapshot::Any(value) => match value.downcast::<T>() {
Ok(snapshot) => *snapshot,
Err(err) => {
panic!(
"Tried to unwrap snapshot of type {:?} as {:?}",
err.type_id(),
TypeId::of::<T>()
)
}
},
}
}
}
/// Implements the `[Buffer]` trait for all mutable references of objects implementing [Buffer].
impl<W: Buffer<Context = Context> + ?Sized, Context> Buffer for &mut W {
type Context = Context;
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
(**self).write_element(element)
}
fn elements(&self) -> &[FormatElement] {
(**self).elements()
}
fn write_fmt(&mut self, args: Arguments<Context>) -> FormatResult<()> {
(**self).write_fmt(args)
}
fn state(&self) -> &FormatState<Self::Context> {
(**self).state()
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
(**self).state_mut()
}
fn snapshot(&self) -> BufferSnapshot {
(**self).snapshot()
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
(**self).restore_snapshot(snapshot)
}
}
/// Vector backed [`Buffer`] implementation.
///
/// The buffer writes all elements into the internal elements buffer.
#[derive(Debug)]
pub struct VecBuffer<'a, Context> {
state: &'a mut FormatState<Context>,
elements: Vec<FormatElement>,
}
impl<'a, Context> VecBuffer<'a, Context> {
pub fn new(state: &'a mut FormatState<Context>) -> Self {
Self::new_with_vec(state, Vec::new())
}
pub fn new_with_vec(state: &'a mut FormatState<Context>, elements: Vec<FormatElement>) -> Self {
Self { state, elements }
}
/// Creates a buffer with the specified capacity
pub fn with_capacity(capacity: usize, state: &'a mut FormatState<Context>) -> Self {
Self {
state,
elements: Vec::with_capacity(capacity),
}
}
/// Consumes the buffer and returns the written [`FormatElement]`s as a vector.
pub fn into_vec(self) -> Vec<FormatElement> {
self.elements
}
/// Takes the elements without consuming self
pub fn take_vec(&mut self) -> Vec<FormatElement> {
std::mem::take(&mut self.elements)
}
}
impl<Context> Deref for VecBuffer<'_, Context> {
type Target = [FormatElement];
fn deref(&self) -> &Self::Target {
&self.elements
}
}
impl<Context> DerefMut for VecBuffer<'_, Context> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.elements
}
}
impl<Context> Buffer for VecBuffer<'_, Context> {
type Context = Context;
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
self.elements.push(element);
Ok(())
}
fn elements(&self) -> &[FormatElement] {
self
}
fn state(&self) -> &FormatState<Self::Context> {
self.state
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
self.state
}
fn snapshot(&self) -> BufferSnapshot {
BufferSnapshot::position(self.elements.len())
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
let position = snapshot.unwrap_position();
assert!(
self.elements.len() >= position,
r#"Outdated snapshot. This buffer contains fewer elements than at the time the snapshot was taken.
Make sure that you take and restore the snapshot in order and that this snapshot belongs to the current buffer."#
);
self.elements.truncate(position);
}
}
/// This struct wraps an existing buffer and emits a preamble text when the first text is written.
///
/// This can be useful if you, for example, want to write some content if what gets written next isn't empty.
///
/// # Examples
///
/// ```
/// use ruff_formatter::{FormatState, Formatted, PreambleBuffer, SimpleFormatContext, VecBuffer, write};
/// use ruff_formatter::prelude::*;
///
/// struct Preamble;
///
/// impl Format<SimpleFormatContext> for Preamble {
/// fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
/// write!(f, [text("# heading"), hard_line_break()])
/// }
/// }
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// {
/// let mut with_preamble = PreambleBuffer::new(&mut buffer, Preamble);
///
/// write!(&mut with_preamble, [text("this text will be on a new line")])?;
/// }
///
/// let formatted = Formatted::new(Document::from(buffer.into_vec()), SimpleFormatContext::default());
/// assert_eq!("# heading\nthis text will be on a new line", formatted.print()?.as_code());
///
/// # Ok(())
/// # }
/// ```
///
/// The pre-amble does not get written if no content is written to the buffer.
///
/// ```
/// use ruff_formatter::{FormatState, Formatted, PreambleBuffer, SimpleFormatContext, VecBuffer, write};
/// use ruff_formatter::prelude::*;
///
/// struct Preamble;
///
/// impl Format<SimpleFormatContext> for Preamble {
/// fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
/// write!(f, [text("# heading"), hard_line_break()])
/// }
/// }
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
/// {
/// let mut with_preamble = PreambleBuffer::new(&mut buffer, Preamble);
/// }
///
/// let formatted = Formatted::new(Document::from(buffer.into_vec()), SimpleFormatContext::default());
/// assert_eq!("", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
pub struct PreambleBuffer<'buf, Preamble, Context> {
/// The wrapped buffer
inner: &'buf mut dyn Buffer<Context = Context>,
/// The pre-amble to write once the first content gets written to this buffer.
preamble: Preamble,
/// Whether some content (including the pre-amble) has been written at this point.
empty: bool,
}
impl<'buf, Preamble, Context> PreambleBuffer<'buf, Preamble, Context> {
pub fn new(inner: &'buf mut dyn Buffer<Context = Context>, preamble: Preamble) -> Self {
Self {
inner,
preamble,
empty: true,
}
}
/// Returns `true` if the preamble has been written, `false` otherwise.
pub fn did_write_preamble(&self) -> bool {
!self.empty
}
}
impl<Preamble, Context> Buffer for PreambleBuffer<'_, Preamble, Context>
where
Preamble: Format<Context>,
{
type Context = Context;
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
if self.empty {
write!(self.inner, [&self.preamble])?;
self.empty = false;
}
self.inner.write_element(element)
}
fn elements(&self) -> &[FormatElement] {
self.inner.elements()
}
fn state(&self) -> &FormatState<Self::Context> {
self.inner.state()
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
self.inner.state_mut()
}
fn snapshot(&self) -> BufferSnapshot {
BufferSnapshot::Any(Box::new(PreambleBufferSnapshot {
inner: self.inner.snapshot(),
empty: self.empty,
}))
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
let snapshot = snapshot.unwrap_any::<PreambleBufferSnapshot>();
self.empty = snapshot.empty;
self.inner.restore_snapshot(snapshot.inner);
}
}
struct PreambleBufferSnapshot {
inner: BufferSnapshot,
empty: bool,
}
/// Buffer that allows you inspecting elements as they get written to the formatter.
pub struct Inspect<'inner, Context, Inspector> {
inner: &'inner mut dyn Buffer<Context = Context>,
inspector: Inspector,
}
impl<'inner, Context, Inspector> Inspect<'inner, Context, Inspector> {
fn new(inner: &'inner mut dyn Buffer<Context = Context>, inspector: Inspector) -> Self {
Self { inner, inspector }
}
}
impl<'inner, Context, Inspector> Buffer for Inspect<'inner, Context, Inspector>
where
Inspector: FnMut(&FormatElement),
{
type Context = Context;
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
(self.inspector)(&element);
self.inner.write_element(element)
}
fn elements(&self) -> &[FormatElement] {
self.inner.elements()
}
fn state(&self) -> &FormatState<Self::Context> {
self.inner.state()
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
self.inner.state_mut()
}
fn snapshot(&self) -> BufferSnapshot {
self.inner.snapshot()
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
self.inner.restore_snapshot(snapshot)
}
}
/// A Buffer that removes any soft line breaks.
///
/// * Removes [`lines`](FormatElement::Line) with the mode [`Soft`](LineMode::Soft).
/// * Replaces [`lines`](FormatElement::Line) with the mode [`Soft`](LineMode::SoftOrSpace) with a [`Space`](FormatElement::Space)
///
/// # Examples
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, write};
///
/// # fn main() -> FormatResult<()> {
/// use ruff_formatter::{RemoveSoftLinesBuffer, SimpleFormatContext, VecBuffer};
/// use ruff_formatter::prelude::format_with;
/// let formatted = format!(
/// SimpleFormatContext::default(),
/// [format_with(|f| {
/// let mut buffer = RemoveSoftLinesBuffer::new(f);
///
/// write!(
/// buffer,
/// [
/// text("The next soft line or space gets replaced by a space"),
/// soft_line_break_or_space(),
/// text("and the line here"),
/// soft_line_break(),
/// text("is removed entirely.")
/// ]
/// )
/// })]
/// )?;
///
/// assert_eq!(
/// formatted.document().as_ref(),
/// &[
/// FormatElement::StaticText { text: "The next soft line or space gets replaced by a space" },
/// FormatElement::Space,
/// FormatElement::StaticText { text: "and the line here" },
/// FormatElement::StaticText { text: "is removed entirely." }
/// ]
/// );
///
/// # Ok(())
/// # }
/// ```
pub struct RemoveSoftLinesBuffer<'a, Context> {
inner: &'a mut dyn Buffer<Context = Context>,
/// Caches the interned elements after the soft line breaks have been removed.
///
/// The `key` is the [Interned] element as it has been passed to [Self::write_element] or the child of another
/// [Interned] element. The `value` is the matching document of the key where all soft line breaks have been removed.
///
/// It's fine to not snapshot the cache. The worst that can happen is that it holds on interned elements
/// that are now unused. But there's little harm in that and the cache is cleaned when dropping the buffer.
interned_cache: FxHashMap<Interned, Interned>,
}
impl<'a, Context> RemoveSoftLinesBuffer<'a, Context> {
/// Creates a new buffer that removes the soft line breaks before writing them into `buffer`.
pub fn new(inner: &'a mut dyn Buffer<Context = Context>) -> Self {
Self {
inner,
interned_cache: FxHashMap::default(),
}
}
/// Removes the soft line breaks from an interned element.
fn clean_interned(&mut self, interned: &Interned) -> Interned {
clean_interned(interned, &mut self.interned_cache)
}
}
// Extracted to function to avoid monomorphization
fn clean_interned(
interned: &Interned,
interned_cache: &mut FxHashMap<Interned, Interned>,
) -> Interned {
match interned_cache.get(interned) {
Some(cleaned) => cleaned.clone(),
None => {
// Find the first soft line break element or interned element that must be changed
let result = interned
.iter()
.enumerate()
.find_map(|(index, element)| match element {
FormatElement::Line(LineMode::Soft | LineMode::SoftOrSpace) => {
let mut cleaned = Vec::new();
cleaned.extend_from_slice(&interned[..index]);
Some((cleaned, &interned[index..]))
}
FormatElement::Interned(inner) => {
let cleaned_inner = clean_interned(inner, interned_cache);
if &cleaned_inner != inner {
let mut cleaned = Vec::with_capacity(interned.len());
cleaned.extend_from_slice(&interned[..index]);
cleaned.push(FormatElement::Interned(cleaned_inner));
Some((cleaned, &interned[index + 1..]))
} else {
None
}
}
_ => None,
});
let result = match result {
// Copy the whole interned buffer so that becomes possible to change the necessary elements.
Some((mut cleaned, rest)) => {
for element in rest {
let element = match element {
FormatElement::Line(LineMode::Soft) => continue,
FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space,
FormatElement::Interned(interned) => {
FormatElement::Interned(clean_interned(interned, interned_cache))
}
element => element.clone(),
};
cleaned.push(element)
}
Interned::new(cleaned)
}
// No change necessary, return existing interned element
None => interned.clone(),
};
interned_cache.insert(interned.clone(), result.clone());
result
}
}
}
impl<Context> Buffer for RemoveSoftLinesBuffer<'_, Context> {
type Context = Context;
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
let element = match element {
FormatElement::Line(LineMode::Soft) => return Ok(()),
FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space,
FormatElement::Interned(interned) => {
FormatElement::Interned(self.clean_interned(&interned))
}
element => element,
};
self.inner.write_element(element)
}
fn elements(&self) -> &[FormatElement] {
self.inner.elements()
}
fn state(&self) -> &FormatState<Self::Context> {
self.inner.state()
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
self.inner.state_mut()
}
fn snapshot(&self) -> BufferSnapshot {
self.inner.snapshot()
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
self.inner.restore_snapshot(snapshot)
}
}
pub trait BufferExtensions: Buffer + Sized {
/// Returns a new buffer that calls the passed inspector for every element that gets written to the output
#[must_use]
fn inspect<F>(&mut self, inspector: F) -> Inspect<Self::Context, F>
where
F: FnMut(&FormatElement),
{
Inspect::new(self, inspector)
}
/// Starts a recording that gives you access to all elements that have been written between the start
/// and end of the recording
///
/// #Examples
///
/// ```
/// use std::ops::Deref;
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{write, format, SimpleFormatContext};
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [format_with(|f| {
/// let mut recording = f.start_recording();
///
/// write!(recording, [text("A")])?;
/// write!(recording, [text("B")])?;
///
/// write!(recording, [format_with(|f| write!(f, [text("C"), text("D")]))])?;
///
/// let recorded = recording.stop();
/// assert_eq!(
/// recorded.deref(),
/// &[
/// FormatElement::StaticText{ text: "A" },
/// FormatElement::StaticText{ text: "B" },
/// FormatElement::StaticText{ text: "C" },
/// FormatElement::StaticText{ text: "D" }
/// ]
/// );
///
/// Ok(())
/// })])?;
///
/// assert_eq!(formatted.print()?.as_code(), "ABCD");
/// # Ok(())
/// # }
/// ```
#[must_use]
fn start_recording(&mut self) -> Recording<Self> {
Recording::new(self)
}
/// Writes a sequence of elements into this buffer.
fn write_elements<I>(&mut self, elements: I) -> FormatResult<()>
where
I: IntoIterator<Item = FormatElement>,
{
for element in elements.into_iter() {
self.write_element(element)?;
}
Ok(())
}
}
impl<T> BufferExtensions for T where T: Buffer {}
#[derive(Debug)]
pub struct Recording<'buf, Buffer> {
start: usize,
buffer: &'buf mut Buffer,
}
impl<'buf, B> Recording<'buf, B>
where
B: Buffer,
{
fn new(buffer: &'buf mut B) -> Self {
Self {
start: buffer.elements().len(),
buffer,
}
}
#[inline(always)]
pub fn write_fmt(&mut self, arguments: Arguments<B::Context>) -> FormatResult<()> {
self.buffer.write_fmt(arguments)
}
#[inline(always)]
pub fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
self.buffer.write_element(element)
}
pub fn stop(self) -> Recorded<'buf> {
let buffer: &'buf B = self.buffer;
let elements = buffer.elements();
let recorded = if self.start > elements.len() {
// May happen if buffer was rewinded.
&[]
} else {
&elements[self.start..]
};
Recorded(recorded)
}
}
#[derive(Debug, Copy, Clone)]
pub struct Recorded<'a>(&'a [FormatElement]);
impl Deref for Recorded<'_> {
type Target = [FormatElement];
fn deref(&self) -> &Self::Target {
self.0
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,632 @@
use super::{
map::CommentsMap, CommentPlacement, CommentStyle, CommentTextPosition, DecoratedComment,
SourceComment, TransformSourceMap,
};
use crate::source_map::{DeletedRangeEntry, DeletedRanges};
use crate::{TextRange, TextSize};
use ruff_rowan::syntax::SyntaxElementKey;
use ruff_rowan::{
Direction, Language, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxToken, WalkEvent,
};
use rustc_hash::FxHashSet;
/// Extracts all comments from a syntax tree.
pub(super) struct CommentsBuilderVisitor<'a, Style: CommentStyle> {
builder: CommentsBuilder<Style::Language>,
style: &'a Style,
parentheses: SourceParentheses<'a>,
// State
pending_comments: Vec<DecoratedComment<Style::Language>>,
preceding_node: Option<SyntaxNode<Style::Language>>,
following_node_index: Option<usize>,
parents: Vec<SyntaxNode<Style::Language>>,
last_token: Option<SyntaxToken<Style::Language>>,
}
impl<'a, Style> CommentsBuilderVisitor<'a, Style>
where
Style: CommentStyle,
{
pub(super) fn new(style: &'a Style, source_map: Option<&'a TransformSourceMap>) -> Self {
Self {
style,
builder: Default::default(),
parentheses: SourceParentheses::from_source_map(source_map),
pending_comments: Default::default(),
preceding_node: Default::default(),
following_node_index: Default::default(),
parents: Default::default(),
last_token: Default::default(),
}
}
pub(super) fn visit(
mut self,
root: &SyntaxNode<Style::Language>,
) -> (
CommentsMap<SyntaxElementKey, SourceComment<Style::Language>>,
FxHashSet<SyntaxElementKey>,
) {
for event in root.preorder_with_tokens(Direction::Next) {
match event {
WalkEvent::Enter(SyntaxElement::Node(node)) => {
self.visit_node(WalkEvent::Enter(node))
}
WalkEvent::Leave(SyntaxElement::Node(node)) => {
self.visit_node(WalkEvent::Leave(node))
}
WalkEvent::Enter(SyntaxElement::Token(token)) => self.visit_token(token),
WalkEvent::Leave(SyntaxElement::Token(_)) => {
// Handled as part of enter
}
}
}
assert!(
self.parents.is_empty(),
"Expected all enclosing nodes to have been processed but contains {:#?}",
self.parents
);
// Process any comments attached to the last token.
// Important for range formatting where it isn't guaranteed that the
// last token is an EOF token.
if let Some(last_token) = self.last_token.take() {
self.parents.push(root.clone());
let (comments_start, lines_before, position, trailing_end) =
self.visit_trailing_comments(last_token, None);
Self::update_comments(
&mut self.pending_comments[comments_start..],
position,
lines_before,
trailing_end,
);
}
self.flush_comments(None);
self.builder.finish()
}
fn visit_node(&mut self, event: WalkEvent<SyntaxNode<Style::Language>>) {
match event {
WalkEvent::Enter(node) => {
// Lists cannot have comments attached. They either belong to the entire parent or to
// the first child. So we ignore lists all together
if node.kind().is_list() {
return;
}
let is_root = matches!(self.following_node_index, Some(0));
// Associate comments with the most outer node
// Set following here because it is the "following node" of the next token's leading trivia.
if self.following_node_index.is_none() || is_root {
// Flush in case the node doesn't have any tokens.
self.flush_comments(Some(&node));
self.following_node_index = Some(self.parents.len());
}
self.parents.push(node);
}
WalkEvent::Leave(node) => {
if node.kind().is_list() {
return;
}
self.parents.pop().unwrap();
// We're passed this node, flush any pending comments for its children
self.following_node_index = None;
self.flush_comments(None);
// We're passed this node, so it must precede the sibling that comes next.
self.preceding_node = Some(node);
}
}
}
fn visit_token(&mut self, token: SyntaxToken<Style::Language>) {
// Process the trailing trivia of the last token
let (comments_start, mut lines_before, mut position, mut trailing_end) =
if let Some(last_token) = self.last_token.take() {
self.visit_trailing_comments(last_token, Some(&token))
} else {
(
self.pending_comments.len(),
0,
CommentTextPosition::SameLine,
None,
)
};
// Process the leading trivia of the current token. the trailing trivia is handled as part of the next token
for leading in token.leading_trivia().pieces() {
if leading.is_newline() {
lines_before += 1;
// All comments following from here are own line comments
position = CommentTextPosition::OwnLine;
if trailing_end.is_none() {
trailing_end = Some(self.pending_comments.len());
}
} else if leading.is_skipped() {
self.builder.mark_has_skipped(&token);
lines_before = 0;
break;
} else if let Some(comment) = leading.as_comments() {
let kind = Style::get_comment_kind(&comment);
self.queue_comment(DecoratedComment {
enclosing: self.enclosing_node().clone(),
preceding: self.preceding_node.clone(),
following: None,
following_token: Some(token.clone()),
lines_before,
lines_after: 0,
text_position: position,
kind,
comment,
});
lines_before = 0;
}
}
self.last_token = Some(token);
Self::update_comments(
&mut self.pending_comments[comments_start..],
position,
lines_before,
trailing_end,
);
// Set following node to `None` because it now becomes the enclosing node.
if let Some(following_node) = self.following_node() {
self.flush_comments(Some(&following_node.clone()));
self.following_node_index = None;
// The following node is only set after entering a node
// That means, following node is only set for the first token of a node.
// Unset preceding node if this is the first token because the preceding node belongs to the parent.
self.preceding_node = None;
}
}
fn enclosing_node(&self) -> &SyntaxNode<Style::Language> {
let element = match self.following_node_index {
None => self.parents.last(),
Some(index) if index == 0 => Some(&self.parents[0]),
Some(index) => Some(&self.parents[index - 1]),
};
element.expect("Expected enclosing nodes to at least contain the root node.")
}
fn following_node(&self) -> Option<&SyntaxNode<Style::Language>> {
self.following_node_index.map(|index| {
self.parents
.get(index)
.expect("Expected following node index to point to a valid parent node")
})
}
fn queue_comment(&mut self, comment: DecoratedComment<Style::Language>) {
self.pending_comments.push(comment);
}
fn update_comments(
comments: &mut [DecoratedComment<Style::Language>],
position: CommentTextPosition,
lines_before: u32,
trailing_end: Option<usize>,
) {
let trailing_end = trailing_end.unwrap_or(comments.len());
let mut comments = comments.iter_mut().enumerate().peekable();
// Update the lines after of all comments as well as the positioning of end of line comments.
while let Some((index, comment)) = comments.next() {
// Update the position of all trailing comments to be end of line as we've seen a line break since.
if index < trailing_end && position.is_own_line() {
comment.text_position = CommentTextPosition::EndOfLine;
}
comment.lines_after = comments
.peek()
.map_or(lines_before, |(_, next)| next.lines_before);
}
}
fn flush_comments(&mut self, following: Option<&SyntaxNode<Style::Language>>) {
for mut comment in self.pending_comments.drain(..) {
comment.following = following.cloned();
let placement = self.style.place_comment(comment);
self.builder.add_comment(placement);
}
}
fn visit_trailing_comments(
&mut self,
token: SyntaxToken<Style::Language>,
following_token: Option<&SyntaxToken<Style::Language>>,
) -> (usize, u32, CommentTextPosition, Option<usize>) {
let mut comments_start = 0;
// The index of the last trailing comment in `pending_comments`.
let mut trailing_end: Option<usize> = None;
// Number of lines before the next comment, token, or skipped token trivia
let mut lines_before = 0;
// Trailing comments are all `SameLine` comments EXCEPT if any is followed by a line break,
// a leading comment (that always have line breaks), or there's a line break before the token.
let mut position = CommentTextPosition::SameLine;
// Process the trailing trivia of the last token
for piece in token.trailing_trivia().pieces() {
if piece.is_newline() {
lines_before += 1;
// All comments following from here are own line comments
position = CommentTextPosition::OwnLine;
if trailing_end.is_none() {
trailing_end = Some(self.pending_comments.len());
}
} else if let Some(comment) = piece.as_comments() {
self.queue_comment(DecoratedComment {
enclosing: self.enclosing_node().clone(),
preceding: self.preceding_node.clone(),
following: None,
following_token: following_token.cloned(),
lines_before,
lines_after: 0, // Will be initialized after
text_position: position,
kind: Style::get_comment_kind(&comment),
comment,
});
lines_before = 0;
}
if let Some(parens_source_range) = self
.parentheses
.r_paren_source_range(piece.text_range().end())
{
self.flush_before_r_paren_comments(
parens_source_range,
&token,
position,
lines_before,
comments_start,
trailing_end,
);
lines_before = 0;
position = CommentTextPosition::SameLine;
comments_start = 0;
trailing_end = None;
}
}
(comments_start, lines_before, position, trailing_end)
}
/// Processes comments appearing right before a `)` of a parenthesized expressions.
#[cold]
fn flush_before_r_paren_comments(
&mut self,
parens_source_range: TextRange,
last_token: &SyntaxToken<Style::Language>,
position: CommentTextPosition,
lines_before: u32,
start: usize,
trailing_end: Option<usize>,
) {
let enclosing = self.enclosing_node().clone();
let comments = &mut self.pending_comments[start..];
let trailing_end = trailing_end.unwrap_or(comments.len());
let mut comments = comments.iter_mut().enumerate().peekable();
let parenthesized_node = self
.parentheses
.outer_most_parenthesized_node(last_token, parens_source_range);
let preceding = parenthesized_node;
// Using the `enclosing` as default but it's mainly to satisfy Rust. The only case where it is used
// is if someone formats a Parenthesized expression as the root. Something we explicitly disallow
// in ruff_js_formatter
let enclosing = preceding.parent().unwrap_or(enclosing);
// Update the lines after of all comments as well as the positioning of end of line comments.
while let Some((index, comment)) = comments.next() {
// Update the position of all trailing comments to be end of line as we've seen a line break since.
if index < trailing_end && position.is_own_line() {
comment.text_position = CommentTextPosition::EndOfLine;
}
comment.preceding = Some(preceding.clone());
comment.enclosing = enclosing.clone();
comment.lines_after = comments
.peek()
.map_or(lines_before, |(_, next)| next.lines_before);
}
self.flush_comments(None);
}
}
struct CommentsBuilder<L: Language> {
comments: CommentsMap<SyntaxElementKey, SourceComment<L>>,
skipped: FxHashSet<SyntaxElementKey>,
}
impl<L: Language> CommentsBuilder<L> {
fn add_comment(&mut self, placement: CommentPlacement<L>) {
match placement {
CommentPlacement::Leading { node, comment } => {
self.push_leading_comment(&node, comment);
}
CommentPlacement::Trailing { node, comment } => {
self.push_trailing_comment(&node, comment);
}
CommentPlacement::Dangling { node, comment } => {
self.push_dangling_comment(&node, comment)
}
CommentPlacement::Default(mut comment) => {
match comment.text_position {
CommentTextPosition::EndOfLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
(Some(preceding), Some(_)) => {
// Attach comments with both preceding and following node to the preceding
// because there's a line break separating it from the following node.
// ```javascript
// a; // comment
// b
// ```
self.push_trailing_comment(&preceding, comment);
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
CommentTextPosition::OwnLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
// Following always wins for a leading comment
// ```javascript
// a;
// // comment
// b
// ```
// attach the comment to the `b` expression statement
(_, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
CommentTextPosition::SameLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
(Some(preceding), Some(following)) => {
// Only make it a trailing comment if it directly follows the preceding node but not if it is separated
// by one or more tokens
// ```javascript
// a /* comment */ b; // Comment is a trailing comment
// a, /* comment */ b; // Comment should be a leading comment
// ```
if preceding.text_range().end()
== comment.piece().as_piece().token().text_range().end()
{
self.push_trailing_comment(&preceding, comment);
} else {
self.push_leading_comment(&following, comment);
}
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
}
}
}
}
fn mark_has_skipped(&mut self, token: &SyntaxToken<L>) {
self.skipped.insert(token.key());
}
fn push_leading_comment(&mut self, node: &SyntaxNode<L>, comment: impl Into<SourceComment<L>>) {
self.comments.push_leading(node.key(), comment.into());
}
fn push_dangling_comment(
&mut self,
node: &SyntaxNode<L>,
comment: impl Into<SourceComment<L>>,
) {
self.comments.push_dangling(node.key(), comment.into());
}
fn push_trailing_comment(
&mut self,
node: &SyntaxNode<L>,
comment: impl Into<SourceComment<L>>,
) {
self.comments.push_trailing(node.key(), comment.into());
}
fn finish(
self,
) -> (
CommentsMap<SyntaxElementKey, SourceComment<L>>,
FxHashSet<SyntaxElementKey>,
) {
(self.comments, self.skipped)
}
}
impl<L: Language> Default for CommentsBuilder<L> {
fn default() -> Self {
Self {
comments: CommentsMap::new(),
skipped: FxHashSet::default(),
}
}
}
enum SourceParentheses<'a> {
Empty,
SourceMap {
map: &'a TransformSourceMap,
next: Option<DeletedRangeEntry<'a>>,
tail: DeletedRanges<'a>,
},
}
impl<'a> SourceParentheses<'a> {
fn from_source_map(source_map: Option<&'a TransformSourceMap>) -> Self {
match source_map {
None => Self::Empty,
Some(source_map) => {
let mut deleted = source_map.deleted_ranges();
SourceParentheses::SourceMap {
map: source_map,
next: deleted.next(),
tail: deleted,
}
}
}
}
/// Returns the range of `node` including its parentheses if any. Otherwise returns the range as is
fn parenthesized_range<L: Language>(&self, node: &SyntaxNode<L>) -> TextRange {
match self {
SourceParentheses::Empty => node.text_trimmed_range(),
SourceParentheses::SourceMap { map, .. } => map.trimmed_source_range(node),
}
}
/// Tests if the next offset is at a position where the original source document used to have an `)`.
///
/// Must be called with offsets in increasing order.
///
/// Returns the source range of the `)` if there's any `)` in the deleted range at this offset. Returns `None` otherwise
fn r_paren_source_range(&mut self, offset: TextSize) -> Option<TextRange> {
match self {
SourceParentheses::Empty => None,
SourceParentheses::SourceMap { next, tail, .. } => {
while let Some(range) = next {
#[allow(clippy::comparison_chain)]
if range.transformed == offset {
// A deleted range can contain multiple tokens. See if there's any `)` in the deleted
// range and compute its source range.
return range.text.find(')').map(|r_paren_position| {
let start = range.source + TextSize::from(r_paren_position as u32);
TextRange::at(start, TextSize::from(1))
});
} else if range.transformed > offset {
return None;
} else {
*next = tail.next();
}
}
None
}
}
}
/// Searches the outer most node that still is inside of the parentheses specified by the `parentheses_source_range`.
fn outer_most_parenthesized_node<L: Language>(
&self,
token: &SyntaxToken<L>,
parentheses_source_range: TextRange,
) -> SyntaxNode<L> {
match self {
SourceParentheses::Empty => token.parent().unwrap(),
SourceParentheses::SourceMap { map, .. } => {
debug_assert_eq!(&map.text()[parentheses_source_range], ")");
// How this works: We search the outer most node that, in the source document ends right after the `)`.
// The issue is, it is possible that multiple nodes end right after the `)`
//
// ```javascript
// !(
// a
// /* comment */
// )
// ```
// The issue is, that in the transformed document, the `ReferenceIdentifier`, `IdentifierExpression`, `UnaryExpression`, and `ExpressionStatement`
// all end at the end position of `)`.
// However, not all the nodes start at the same position. That's why this code also tracks the start.
// We first find the closest node that directly ends at the position of the right paren. We then continue
// upwards to find the most outer node that starts at the same position as that node. (In this case,
// `ReferenceIdentifier` -> `IdentifierExpression`.
let mut start_offset = None;
let r_paren_source_end = parentheses_source_range.end();
let ancestors = token.ancestors().take_while(|node| {
let source_range = self.parenthesized_range(node);
if let Some(start) = start_offset {
TextRange::new(start, r_paren_source_end).contains_range(source_range)
}
// Greater than to guarantee that we always return at least one node AND
// handle the case where a node is wrapped in multiple parentheses.
// Take the first node that fully encloses the parentheses
else if source_range.end() >= r_paren_source_end {
start_offset = Some(source_range.start());
true
} else {
source_range.end() < r_paren_source_end
}
});
// SAFETY:
// * The builder starts with a node which guarantees that every token has a parent node.
// * The above `take_while` guarantees to return `true` for the parent of the token.
// Thus, there's always at least one node
ancestors.last().unwrap()
}
}
}
}

View file

@ -0,0 +1,836 @@
use countme::Count;
use rustc_hash::FxHashMap;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use std::num::NonZeroU32;
use std::ops::Range;
/// An optimized multi-map implementation for storing leading, dangling, and trailing parts for a key.
///
/// A naive implementation using three multimaps, one to store the leading, dangling, and trailing parts,
/// requires between `keys < allocations < keys * 3` vec allocations.
///
/// This map implementation optimises for the use case where:
/// * Parts belonging to the same key are inserted together. For example, all parts for the key `a` are inserted
/// before inserting any parts for the key `b`.
/// * The parts per key are inserted in the following order: leading, dangling, and then trailing parts.
///
/// Parts inserted in the above mentioned order are stored in a `Vec` shared by all keys to reduce the number
/// of allocations and increased cache locality. The implementation falls back to
/// storing the leading, dangling, and trailing parts of a key in dedicated `Vec`s if the parts
/// aren't inserted in the above described order. However, this comes with a slight performance penalty due to:
/// * Requiring up to three [Vec] allocations, one for the leading, dangling, and trailing parts.
/// * Requires copying already inserted parts for that key (by cloning) into the newly allocated [Vec]s.
/// * Resolving the slices for every part requires an extra level of indirection.
///
/// ## Limitations
///
/// The map supports storing up to `u32::MAX - 1` parts. Inserting the `u32::MAX`nth part panics.
///
/// ## Comments
///
/// Storing the leading, dangling, and trailing comments is an exemplary use case for this map implementation because
/// it is generally desired to keep the comments in the same order as in the source document. This translates to
/// inserting the comments per node and for every node in leading, dangling, trailing order (same order as this map optimises for).
///
/// Running Rome formatter on real world use cases showed that more than 99.99% of comments get inserted in
/// the described order.
///
/// The size limitation isn't a concern for comments because Rome supports source documents with a size up to 4GB (`u32::MAX`)
/// and every comment has at least a size of 2 bytes:
/// * 1 byte for the start sequence, e.g. `#`
/// * 1 byte for the end sequence, e.g. `\n`
///
/// Meaning, the upper bound for comments parts in a document are `u32::MAX / 2`.
pub(super) struct CommentsMap<K, V> {
/// Lookup table to retrieve the entry for a key.
index: FxHashMap<K, Entry>,
/// Flat array storing all the parts that have been inserted in order.
parts: Vec<V>,
/// Vector containing the leading, dangling, and trailing vectors for out of order entries.
///
/// The length of `out_of_order` is a multiple of 3 where:
/// * `index % 3 == 0`: Leading parts
/// * `index % 3 == 1`: Dangling parts
/// * `index % 3 == 2`: Trailing parts
out_of_order: Vec<Vec<V>>,
}
impl<K: std::hash::Hash + Eq, V> CommentsMap<K, V> {
pub fn new() -> Self {
Self {
index: FxHashMap::default(),
parts: Vec::new(),
out_of_order: Vec::new(),
}
}
/// Pushes a leading part for `key`.
pub fn push_leading(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::leading(start..self.parts.len())),
);
}
// Has only leading comments and no elements have been pushed since
Some(Entry::InOrder(entry))
if entry.trailing_start.is_none() && self.parts.len() == entry.range().end =>
{
self.parts.push(part);
entry.increment_leading_range();
}
Some(Entry::OutOfOrder(entry)) => {
let leading = &mut self.out_of_order[entry.leading_index()];
leading.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.leading_index()].push(part);
}
}
}
/// Pushes a dangling part for `key`
pub fn push_dangling(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::dangling(start..self.parts.len())),
);
}
// Has leading and dangling comments and its comments are at the end of parts
Some(Entry::InOrder(entry))
if entry.trailing_end.is_none() && self.parts.len() == entry.range().end =>
{
self.parts.push(part);
entry.increment_dangling_range();
}
Some(Entry::OutOfOrder(entry)) => {
let dangling = &mut self.out_of_order[entry.dangling_index()];
dangling.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.dangling_index()].push(part);
}
}
}
/// Pushes a trailing part for `key`.
pub fn push_trailing(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::trailing(start..self.parts.len())),
);
}
// Its comments are at the end
Some(Entry::InOrder(entry)) if entry.range().end == self.parts.len() => {
self.parts.push(part);
entry.increment_trailing_range();
}
Some(Entry::OutOfOrder(entry)) => {
let trailing = &mut self.out_of_order[entry.trailing_index()];
trailing.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.trailing_index()].push(part);
}
}
}
#[cold]
fn entry_to_out_of_order<'a>(
entry: &'a mut Entry,
parts: &[V],
out_of_order: &mut Vec<Vec<V>>,
) -> &'a mut OutOfOrderEntry
where
V: Clone,
{
match entry {
Entry::InOrder(in_order) => {
let index = out_of_order.len();
out_of_order.push(parts[in_order.leading_range()].to_vec());
out_of_order.push(parts[in_order.dangling_range()].to_vec());
out_of_order.push(parts[in_order.trailing_range()].to_vec());
*entry = Entry::OutOfOrder(OutOfOrderEntry {
leading_index: index,
_count: Count::new(),
});
match entry {
Entry::InOrder(_) => unreachable!(),
Entry::OutOfOrder(out_of_order) => out_of_order,
}
}
Entry::OutOfOrder(entry) => entry,
}
}
/// Retrieves all leading parts of `key`
pub fn leading(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.leading_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.leading_index()],
}
}
/// Retrieves all dangling parts of `key`.
pub fn dangling(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.dangling_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.dangling_index()],
}
}
/// Retrieves all trailing parts of `key`.
pub fn trailing(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.trailing_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.trailing_index()],
}
}
/// Returns `true` if `key` has any leading, dangling, or trailing part.
pub fn has(&self, key: &K) -> bool {
self.index.get(key).is_some()
}
/// Returns an iterator over all leading, dangling, and trailing parts of `key`.
pub fn parts(&self, key: &K) -> PartsIterator<V> {
match self.index.get(key) {
None => PartsIterator::Slice([].iter()),
Some(entry) => PartsIterator::from_entry(entry, self),
}
}
/// Returns an iterator over the parts of all keys.
#[allow(unused)]
pub fn all_parts(&self) -> impl Iterator<Item = &V> {
self.index
.values()
.flat_map(|entry| PartsIterator::from_entry(entry, self))
}
}
impl<K: std::hash::Hash + Eq, V> Default for CommentsMap<K, V> {
fn default() -> Self {
Self::new()
}
}
impl<K, V> std::fmt::Debug for CommentsMap<K, V>
where
K: std::fmt::Debug,
V: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_map();
for (key, entry) in &self.index {
builder.entry(&key, &DebugEntry { entry, map: self });
}
builder.finish()
}
}
/// Iterator to iterate over all leading, dangling, and trailing parts of a key.
pub(super) enum PartsIterator<'a, V> {
/// The slice into the [CommentsMap::parts] [Vec] if this is an in-order entry or the trailing parts
/// of an out-of-order entry.
Slice(std::slice::Iter<'a, V>),
/// Iterator over the leading parts of an out-of-order entry. Returns the dangling parts, and then the
/// trailing parts once the leading iterator is fully consumed.
Leading {
leading: std::slice::Iter<'a, V>,
dangling: &'a [V],
trailing: &'a [V],
},
/// Iterator over the dangling parts of an out-of-order entry. Returns the trailing parts
/// once the leading iterator is fully consumed.
Dangling {
dangling: std::slice::Iter<'a, V>,
trailing: &'a [V],
},
}
impl<'a, V> PartsIterator<'a, V> {
fn from_entry<K>(entry: &Entry, map: &'a CommentsMap<K, V>) -> Self {
match entry {
Entry::OutOfOrder(entry) => PartsIterator::Leading {
leading: map.out_of_order[entry.leading_index()].iter(),
dangling: &map.out_of_order[entry.dangling_index()],
trailing: &map.out_of_order[entry.trailing_index()],
},
Entry::InOrder(entry) => PartsIterator::Slice(map.parts[entry.range()].iter()),
}
}
}
impl<'a, V> Iterator for PartsIterator<'a, V> {
type Item = &'a V;
fn next(&mut self) -> Option<Self::Item> {
match self {
PartsIterator::Slice(inner) => inner.next(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => match leading.next() {
Some(next) => Some(next),
None if !dangling.is_empty() => {
let mut dangling_iterator = dangling.iter();
let next = dangling_iterator.next().unwrap();
*self = PartsIterator::Dangling {
dangling: dangling_iterator,
trailing,
};
Some(next)
}
None => {
let mut trailing_iterator = trailing.iter();
let next = trailing_iterator.next();
*self = PartsIterator::Slice(trailing_iterator);
next
}
},
PartsIterator::Dangling { dangling, trailing } => match dangling.next() {
Some(next) => Some(next),
None => {
let mut trailing_iterator = trailing.iter();
let next = trailing_iterator.next();
*self = PartsIterator::Slice(trailing_iterator);
next
}
},
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
PartsIterator::Slice(slice) => slice.size_hint(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => {
let len = leading.len() + dangling.len() + trailing.len();
(len, Some(len))
}
PartsIterator::Dangling { dangling, trailing } => {
let len = dangling.len() + trailing.len();
(len, Some(len))
}
}
}
fn last(self) -> Option<Self::Item>
where
Self: Sized,
{
match self {
PartsIterator::Slice(slice) => slice.last(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => trailing
.last()
.or_else(|| dangling.last())
.or_else(|| leading.last()),
PartsIterator::Dangling { dangling, trailing } => {
trailing.last().or_else(|| dangling.last())
}
}
}
}
impl<V> ExactSizeIterator for PartsIterator<'_, V> {}
impl<V> FusedIterator for PartsIterator<'_, V> {}
#[derive(Debug)]
enum Entry {
InOrder(InOrderEntry),
OutOfOrder(OutOfOrderEntry),
}
struct DebugEntry<'a, K, V> {
entry: &'a Entry,
map: &'a CommentsMap<K, V>,
}
impl<K, V> Debug for DebugEntry<'_, K, V>
where
K: Debug,
V: Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let leading = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.leading_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.leading_range()],
};
let dangling = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.dangling_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.dangling_range()],
};
let trailing = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.trailing_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.trailing_range()],
};
let mut list = f.debug_list();
list.entries(leading.iter().map(DebugValue::Leading));
list.entries(dangling.iter().map(DebugValue::Dangling));
list.entries(trailing.iter().map(DebugValue::Trailing));
list.finish()
}
}
enum DebugValue<'a, V> {
Leading(&'a V),
Dangling(&'a V),
Trailing(&'a V),
}
impl<V> Debug for DebugValue<'_, V>
where
V: Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
DebugValue::Leading(leading) => f.debug_tuple("Leading").field(leading).finish(),
DebugValue::Dangling(dangling) => f.debug_tuple("Dangling").field(dangling).finish(),
DebugValue::Trailing(trailing) => f.debug_tuple("Trailing").field(trailing).finish(),
}
}
}
#[derive(Debug)]
struct InOrderEntry {
/// Index into the [CommentsMap::parts] vector where the leading parts of this entry start
leading_start: PartIndex,
/// Index into the [CommentsMap::parts] vector where the dangling parts (and, thus, the leading parts end) start.
dangling_start: PartIndex,
/// Index into the [CommentsMap::parts] vector where the trailing parts (and, thus, the dangling parts end) of this entry start
trailing_start: Option<PartIndex>,
/// Index into the [CommentsMap::parts] vector where the trailing parts of this entry end
trailing_end: Option<PartIndex>,
_count: Count<InOrderEntry>,
}
impl InOrderEntry {
fn leading(range: Range<usize>) -> Self {
InOrderEntry {
leading_start: PartIndex::from_len(range.start),
dangling_start: PartIndex::from_len(range.end),
trailing_start: None,
trailing_end: None,
_count: Count::new(),
}
}
fn dangling(range: Range<usize>) -> Self {
let start = PartIndex::from_len(range.start);
InOrderEntry {
leading_start: start,
dangling_start: start,
trailing_start: Some(PartIndex::from_len(range.end)),
trailing_end: None,
_count: Count::new(),
}
}
fn trailing(range: Range<usize>) -> Self {
let start = PartIndex::from_len(range.start);
InOrderEntry {
leading_start: start,
dangling_start: start,
trailing_start: Some(start),
trailing_end: Some(PartIndex::from_len(range.end)),
_count: Count::new(),
}
}
fn increment_leading_range(&mut self) {
assert!(
self.trailing_start.is_none(),
"Can't extend the leading range for an in order entry with dangling comments."
);
self.dangling_start.increment();
}
fn increment_dangling_range(&mut self) {
assert!(
self.trailing_end.is_none(),
"Can't extend the dangling range for an in order entry with trailing comments."
);
match &mut self.trailing_start {
Some(start) => start.increment(),
None => self.trailing_start = Some(self.dangling_start.incremented()),
}
}
fn increment_trailing_range(&mut self) {
match (self.trailing_start, &mut self.trailing_end) {
// Already has some trailing comments
(Some(_), Some(end)) => end.increment(),
// Has dangling comments only
(Some(start), None) => self.trailing_end = Some(start.incremented()),
// Has leading comments only
(None, None) => {
self.trailing_start = Some(self.dangling_start);
self.trailing_end = Some(self.dangling_start.incremented())
}
(None, Some(_)) => {
unreachable!()
}
}
}
fn leading_range(&self) -> Range<usize> {
self.leading_start.value()..self.dangling_start.value()
}
fn dangling_range(&self) -> Range<usize> {
match self.trailing_start {
None => self.dangling_start.value()..self.dangling_start.value(),
Some(trailing_start) => self.dangling_start.value()..trailing_start.value(),
}
}
fn trailing_range(&self) -> Range<usize> {
match (self.trailing_start, self.trailing_end) {
(Some(trailing_start), Some(trailing_end)) => {
trailing_start.value()..trailing_end.value()
}
// Only dangling comments
(Some(trailing_start), None) => trailing_start.value()..trailing_start.value(),
(None, Some(_)) => {
panic!("Trailing end shouldn't be set if trailing start is none");
}
(None, None) => self.dangling_start.value()..self.dangling_start.value(),
}
}
fn range(&self) -> Range<usize> {
self.leading_start.value()
..self
.trailing_end
.or(self.trailing_start)
.unwrap_or(self.dangling_start)
.value()
}
}
#[derive(Debug)]
struct OutOfOrderEntry {
/// Index into the [CommentsMap::out_of_order] vector at which offset the leaading vec is stored.
leading_index: usize,
_count: Count<OutOfOrderEntry>,
}
impl OutOfOrderEntry {
const fn leading_index(&self) -> usize {
self.leading_index
}
const fn dangling_index(&self) -> usize {
self.leading_index + 1
}
const fn trailing_index(&self) -> usize {
self.leading_index + 2
}
}
/// Index into the [CommentsMap::parts] vector.
///
/// Stores the index as a [NonZeroU32], starting at 1 instead of 0 so that
/// `size_of::<PartIndex>() == size_of::<Option<PartIndex>>()`.
///
/// This means, that only `u32 - 1` parts can be stored. This should be sufficient for storing comments
/// because: Comments have length of two or more bytes because they consist of a start and end character sequence (`#` + new line, `/*` and `*/`).
/// Thus, a document with length `u32` can have at most `u32::MAX / 2` comment-parts.
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
struct PartIndex(NonZeroU32);
impl PartIndex {
fn from_len(value: usize) -> Self {
Self(NonZeroU32::try_from(value as u32 + 1).unwrap())
}
fn value(&self) -> usize {
(u32::from(self.0) - 1) as usize
}
fn increment(&mut self) {
*self = self.incremented();
}
fn incremented(&self) -> PartIndex {
PartIndex(NonZeroU32::new(self.0.get() + 1).unwrap())
}
}
#[cfg(test)]
mod tests {
use crate::comments::map::CommentsMap;
static EMPTY: [i32; 0] = [];
#[test]
fn leading_dangling_trailing() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.parts, vec![1, 2, 3, 4]);
assert_eq!(map.leading(&"a"), &[1]);
assert_eq!(map.dangling(&"a"), &[2, 3]);
assert_eq!(map.trailing(&"a"), &[4]);
assert!(map.has(&"a"));
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![1, 2, 3, 4]
);
}
#[test]
fn dangling_trailing() {
let mut map = CommentsMap::new();
map.push_dangling("a", 1);
map.push_dangling("a", 2);
map.push_trailing("a", 3);
assert_eq!(map.parts, vec![1, 2, 3]);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &[1, 2]);
assert_eq!(map.trailing(&"a"), &[3]);
assert!(map.has(&"a"));
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1, 2, 3]);
}
#[test]
fn trailing() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_trailing("a", 2);
assert_eq!(map.parts, vec![1, 2]);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &[1, 2]);
assert!(map.has(&"a"));
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1, 2]);
}
#[test]
fn empty() {
let map = CommentsMap::<&str, i32>::default();
assert_eq!(map.parts, Vec::<i32>::new());
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &EMPTY);
assert!(!map.has(&"a"));
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
Vec::<i32>::new()
);
}
#[test]
fn multiple_keys() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("b", 2);
map.push_trailing("c", 3);
map.push_leading("d", 4);
map.push_dangling("d", 5);
map.push_trailing("d", 6);
assert_eq!(map.parts, &[1, 2, 3, 4, 5, 6]);
assert_eq!(map.leading(&"a"), &[1]);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &EMPTY);
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1]);
assert_eq!(map.leading(&"b"), &EMPTY);
assert_eq!(map.dangling(&"b"), &[2]);
assert_eq!(map.trailing(&"b"), &EMPTY);
assert_eq!(map.parts(&"b").copied().collect::<Vec<_>>(), vec![2]);
assert_eq!(map.leading(&"c"), &EMPTY);
assert_eq!(map.dangling(&"c"), &EMPTY);
assert_eq!(map.trailing(&"c"), &[3]);
assert_eq!(map.parts(&"c").copied().collect::<Vec<_>>(), vec![3]);
assert_eq!(map.leading(&"d"), &[4]);
assert_eq!(map.dangling(&"d"), &[5]);
assert_eq!(map.trailing(&"d"), &[6]);
assert_eq!(map.parts(&"d").copied().collect::<Vec<_>>(), vec![4, 5, 6]);
}
#[test]
fn dangling_leading() {
let mut map = CommentsMap::new();
map.push_dangling("a", 1);
map.push_leading("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.leading(&"a"), [2]);
assert_eq!(map.dangling(&"a"), [1, 3]);
assert_eq!(map.trailing(&"a"), [4]);
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![2, 1, 3, 4]
);
assert!(map.has(&"a"));
}
#[test]
fn trailing_leading() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_leading("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.leading(&"a"), [2]);
assert_eq!(map.dangling(&"a"), [3]);
assert_eq!(map.trailing(&"a"), [1, 4]);
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![2, 3, 1, 4]
);
assert!(map.has(&"a"));
}
#[test]
fn trailing_dangling() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_dangling("a", 2);
map.push_trailing("a", 3);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &[2]);
assert_eq!(map.trailing(&"a"), &[1, 3]);
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![2, 1, 3]);
assert!(map.has(&"a"));
}
#[test]
fn keys_out_of_order() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("b", 2);
map.push_leading("a", 3);
map.push_trailing("c", 4);
map.push_dangling("b", 5);
map.push_leading("d", 6);
map.push_trailing("c", 7);
assert_eq!(map.leading(&"a"), &[1, 3]);
assert_eq!(map.dangling(&"b"), &[2, 5]);
assert_eq!(map.trailing(&"c"), &[4, 7]);
assert!(map.has(&"a"));
assert!(map.has(&"b"));
assert!(map.has(&"c"));
}
}

View file

@ -0,0 +1,176 @@
use crate::prelude::TagKind;
use ruff_rowan::{SyntaxError, TextRange};
use std::error::Error;
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
/// Series of errors encountered during formatting
pub enum FormatError {
/// In case a node can't be formatted because it either misses a require child element or
/// a child is present that should not (e.g. a trailing comma after a rest element).
SyntaxError,
/// In case range formatting failed because the provided range was larger
/// than the formatted syntax tree
RangeError { input: TextRange, tree: TextRange },
/// In case printing the document failed because it has an invalid structure.
InvalidDocument(InvalidDocumentError),
/// Formatting failed because some content encountered a situation where a layout
/// choice by an enclosing [crate::Format] resulted in a poor layout for a child [crate::Format].
///
/// It's up to an enclosing [crate::Format] to handle the error and pick another layout.
/// This error should not be raised if there's no outer [crate::Format] handling the poor layout error,
/// avoiding that formatting of the whole document fails.
PoorLayout,
}
impl std::fmt::Display for FormatError {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FormatError::SyntaxError => fmt.write_str("syntax error"),
FormatError::RangeError { input, tree } => std::write!(
fmt,
"formatting range {input:?} is larger than syntax tree {tree:?}"
),
FormatError::InvalidDocument(error) => std::write!(fmt, "Invalid document: {error}\n\n This is an internal Rome error. Please report if necessary."),
FormatError::PoorLayout => {
std::write!(fmt, "Poor layout: The formatter wasn't able to pick a good layout for your document. This is an internal Rome error. Please report if necessary.")
}
}
}
}
impl Error for FormatError {}
impl From<SyntaxError> for FormatError {
fn from(error: SyntaxError) -> Self {
FormatError::from(&error)
}
}
impl From<&SyntaxError> for FormatError {
fn from(syntax_error: &SyntaxError) -> Self {
match syntax_error {
SyntaxError::MissingRequiredChild => FormatError::SyntaxError,
}
}
}
impl From<PrintError> for FormatError {
fn from(error: PrintError) -> Self {
FormatError::from(&error)
}
}
impl From<&PrintError> for FormatError {
fn from(error: &PrintError) -> Self {
match error {
PrintError::InvalidDocument(reason) => FormatError::InvalidDocument(*reason),
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum InvalidDocumentError {
/// Mismatching start/end kinds
///
/// ```plain
/// StartIndent
/// ...
/// EndGroup
/// ```
StartEndTagMismatch {
start_kind: TagKind,
end_kind: TagKind,
},
/// End tag without a corresponding start tag.
///
/// ```plain
/// Text
/// EndGroup
/// ```
StartTagMissing { kind: TagKind },
/// Expected a specific start tag but instead is:
/// * at the end of the document
/// * at another start tag
/// * at an end tag
ExpectedStart {
expected_start: TagKind,
actual: ActualStart,
},
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ActualStart {
/// The actual element is not a tag.
Content,
/// The actual element was a start tag of another kind.
Start(TagKind),
/// The actual element is an end tag instead of a start tag.
End(TagKind),
/// Reached the end of the document
EndOfDocument,
}
impl std::fmt::Display for InvalidDocumentError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InvalidDocumentError::StartEndTagMismatch {
start_kind,
end_kind,
} => {
std::write!(
f,
"Expected end tag of kind {start_kind:?} but found {end_kind:?}."
)
}
InvalidDocumentError::StartTagMissing { kind } => {
std::write!(f, "End tag of kind {kind:?} without matching start tag.")
}
InvalidDocumentError::ExpectedStart {
expected_start,
actual,
} => {
match actual {
ActualStart::EndOfDocument => {
std::write!(f, "Expected start tag of kind {expected_start:?} but at the end of document.")
}
ActualStart::Start(start) => {
std::write!(f, "Expected start tag of kind {expected_start:?} but found start tag of kind {start:?}.")
}
ActualStart::End(end) => {
std::write!(f, "Expected start tag of kind {expected_start:?} but found end tag of kind {end:?}.")
}
ActualStart::Content => {
std::write!(f, "Expected start tag of kind {expected_start:?} but found non-tag element.")
}
}
}
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum PrintError {
InvalidDocument(InvalidDocumentError),
}
impl Error for PrintError {}
impl std::fmt::Display for PrintError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PrintError::InvalidDocument(inner) => {
std::write!(f, "Invalid document: {inner}")
}
}
}
}

View file

@ -0,0 +1,395 @@
pub mod document;
pub mod tag;
use crate::format_element::tag::{LabelId, Tag};
use std::borrow::Cow;
use crate::{TagKind, TextSize};
#[cfg(target_pointer_width = "64")]
use ruff_rowan::static_assert;
use ruff_rowan::SyntaxTokenText;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::rc::Rc;
/// Language agnostic IR for formatting source code.
///
/// Use the helper functions like [crate::builders::space], [crate::builders::soft_line_break] etc. defined in this file to create elements.
#[derive(Clone, Eq, PartialEq)]
pub enum FormatElement {
/// A space token, see [crate::builders::space] for documentation.
Space,
/// A new line, see [crate::builders::soft_line_break], [crate::builders::hard_line_break], and [crate::builders::soft_line_break_or_space] for documentation.
Line(LineMode),
/// Forces the parent group to print in expanded mode.
ExpandParent,
/// Token constructed by the formatter from a static string
StaticText { text: &'static str },
/// Token constructed from the input source as a dynamic
/// string with its start position in the input document.
DynamicText {
/// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
/// The start position of the dynamic token in the unformatted source code
source_position: TextSize,
},
/// A token for a text that is taken as is from the source code (input text and formatted representation are identical).
/// Implementing by taking a slice from a `SyntaxToken` to avoid allocating a new string.
SyntaxTokenTextSlice {
/// The start position of the token in the unformatted source code
source_position: TextSize,
/// The token text
slice: SyntaxTokenText,
},
/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
/// line suffixes, potentially by inserting a hard line break.
LineSuffixBoundary,
/// An interned format element. Useful when the same content must be emitted multiple times to avoid
/// deep cloning the IR when using the `best_fitting!` macro or `if_group_fits_on_line` and `if_group_breaks`.
Interned(Interned),
/// A list of different variants representing the same content. The printer picks the best fitting content.
/// Line breaks inside of a best fitting don't propagate to parent groups.
BestFitting(BestFitting),
/// A [Tag] that marks the start/end of some content to which some special formatting is applied.
Tag(Tag),
}
impl std::fmt::Debug for FormatElement {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
FormatElement::Space => write!(fmt, "Space"),
FormatElement::Line(mode) => fmt.debug_tuple("Line").field(mode).finish(),
FormatElement::ExpandParent => write!(fmt, "ExpandParent"),
FormatElement::StaticText { text } => {
fmt.debug_tuple("StaticText").field(text).finish()
}
FormatElement::DynamicText { text, .. } => {
fmt.debug_tuple("DynamicText").field(text).finish()
}
FormatElement::SyntaxTokenTextSlice { slice, .. } => fmt
.debug_tuple("SyntaxTokenTextSlice")
.field(slice)
.finish(),
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
FormatElement::BestFitting(best_fitting) => {
fmt.debug_tuple("BestFitting").field(&best_fitting).finish()
}
FormatElement::Interned(interned) => {
fmt.debug_list().entries(interned.deref()).finish()
}
FormatElement::Tag(tag) => fmt.debug_tuple("Tag").field(tag).finish(),
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum LineMode {
/// See [crate::builders::soft_line_break_or_space] for documentation.
SoftOrSpace,
/// See [crate::builders::soft_line_break] for documentation.
Soft,
/// See [crate::builders::hard_line_break] for documentation.
Hard,
/// See [crate::builders::empty_line] for documentation.
Empty,
}
impl LineMode {
pub const fn is_hard(&self) -> bool {
matches!(self, LineMode::Hard)
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum PrintMode {
/// Omits any soft line breaks
Flat,
/// Prints soft line breaks as line breaks
Expanded,
}
impl PrintMode {
pub const fn is_flat(&self) -> bool {
matches!(self, PrintMode::Flat)
}
pub const fn is_expanded(&self) -> bool {
matches!(self, PrintMode::Expanded)
}
}
#[derive(Clone)]
pub struct Interned(Rc<[FormatElement]>);
impl Interned {
pub(super) fn new(content: Vec<FormatElement>) -> Self {
Self(content.into())
}
}
impl PartialEq for Interned {
fn eq(&self, other: &Interned) -> bool {
Rc::ptr_eq(&self.0, &other.0)
}
}
impl Eq for Interned {}
impl Hash for Interned {
fn hash<H>(&self, hasher: &mut H)
where
H: Hasher,
{
Rc::as_ptr(&self.0).hash(hasher);
}
}
impl std::fmt::Debug for Interned {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Deref for Interned {
type Target = [FormatElement];
fn deref(&self) -> &Self::Target {
self.0.deref()
}
}
const LINE_SEPARATOR: char = '\u{2028}';
const PARAGRAPH_SEPARATOR: char = '\u{2029}';
pub const LINE_TERMINATORS: [char; 3] = ['\r', LINE_SEPARATOR, PARAGRAPH_SEPARATOR];
/// Replace the line terminators matching the provided list with "\n"
/// since its the only line break type supported by the printer
pub fn normalize_newlines<const N: usize>(text: &str, terminators: [char; N]) -> Cow<str> {
let mut result = String::new();
let mut last_end = 0;
for (start, part) in text.match_indices(terminators) {
result.push_str(&text[last_end..start]);
result.push('\n');
last_end = start + part.len();
// If the current character is \r and the
// next is \n, skip over the entire sequence
if part == "\r" && text[last_end..].starts_with('\n') {
last_end += 1;
}
}
// If the result is empty no line terminators were matched,
// return the entire input text without allocating a new String
if result.is_empty() {
Cow::Borrowed(text)
} else {
result.push_str(&text[last_end..text.len()]);
Cow::Owned(result)
}
}
impl FormatElement {
/// Returns `true` if self is a [FormatElement::Tag]
pub const fn is_tag(&self) -> bool {
matches!(self, FormatElement::Tag(_))
}
/// Returns `true` if self is a [FormatElement::Tag] and [Tag::is_start] is `true`.
pub const fn is_start_tag(&self) -> bool {
match self {
FormatElement::Tag(tag) => tag.is_start(),
_ => false,
}
}
/// Returns `true` if self is a [FormatElement::Tag] and [Tag::is_end] is `true`.
pub const fn is_end_tag(&self) -> bool {
match self {
FormatElement::Tag(tag) => tag.is_end(),
_ => false,
}
}
pub const fn is_text(&self) -> bool {
matches!(
self,
FormatElement::SyntaxTokenTextSlice { .. }
| FormatElement::DynamicText { .. }
| FormatElement::StaticText { .. }
)
}
pub const fn is_space(&self) -> bool {
matches!(self, FormatElement::Space)
}
}
impl FormatElements for FormatElement {
fn will_break(&self) -> bool {
match self {
FormatElement::ExpandParent => true,
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),
FormatElement::StaticText { text } => text.contains('\n'),
FormatElement::DynamicText { text, .. } => text.contains('\n'),
FormatElement::SyntaxTokenTextSlice { slice, .. } => slice.contains('\n'),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
FormatElement::BestFitting(best_fitting) => best_fitting.most_flat().will_break(),
FormatElement::LineSuffixBoundary | FormatElement::Space | FormatElement::Tag(_) => {
false
}
}
}
fn has_label(&self, label_id: LabelId) -> bool {
match self {
FormatElement::Tag(Tag::StartLabelled(actual)) => *actual == label_id,
FormatElement::Interned(interned) => interned.deref().has_label(label_id),
_ => false,
}
}
fn start_tag(&self, _: TagKind) -> Option<&Tag> {
None
}
fn end_tag(&self, kind: TagKind) -> Option<&Tag> {
match self {
FormatElement::Tag(tag) if tag.kind() == kind && tag.is_end() => Some(tag),
_ => None,
}
}
}
/// Provides the printer with different representations for the same element so that the printer
/// can pick the best fitting variant.
///
/// Best fitting is defined as the variant that takes the most horizontal space but fits on the line.
#[derive(Clone, Eq, PartialEq)]
pub struct BestFitting {
/// The different variants for this element.
/// The first element is the one that takes up the most space horizontally (the most flat),
/// The last element takes up the least space horizontally (but most horizontal space).
variants: Box<[Box<[FormatElement]>]>,
}
impl BestFitting {
/// Creates a new best fitting IR with the given variants. The method itself isn't unsafe
/// but it is to discourage people from using it because the printer will panic if
/// the slice doesn't contain at least the least and most expanded variants.
///
/// You're looking for a way to create a `BestFitting` object, use the `best_fitting![least_expanded, most_expanded]` macro.
///
/// ## Safety
/// The slice must contain at least two variants.
#[doc(hidden)]
pub unsafe fn from_vec_unchecked(variants: Vec<Box<[FormatElement]>>) -> Self {
debug_assert!(
variants.len() >= 2,
"Requires at least the least expanded and most expanded variants"
);
Self {
variants: variants.into_boxed_slice(),
}
}
/// Returns the most expanded variant
pub fn most_expanded(&self) -> &[FormatElement] {
self.variants.last().expect(
"Most contain at least two elements, as guaranteed by the best fitting builder.",
)
}
pub fn variants(&self) -> &[Box<[FormatElement]>] {
&self.variants
}
/// Returns the least expanded variant
pub fn most_flat(&self) -> &[FormatElement] {
self.variants.first().expect(
"Most contain at least two elements, as guaranteed by the best fitting builder.",
)
}
}
impl std::fmt::Debug for BestFitting {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(&*self.variants).finish()
}
}
pub trait FormatElements {
/// Returns true if this [FormatElement] is guaranteed to break across multiple lines by the printer.
/// This is the case if this format element recursively contains a:
/// * [crate::builders::empty_line] or [crate::builders::hard_line_break]
/// * A token containing '\n'
///
/// Use this with caution, this is only a heuristic and the printer may print the element over multiple
/// lines if this element is part of a group and the group doesn't fit on a single line.
fn will_break(&self) -> bool;
/// Returns true if the element has the given label.
fn has_label(&self, label: LabelId) -> bool;
/// Returns the start tag of `kind` if:
/// * the last element is an end tag of `kind`.
/// * there's a matching start tag in this document (may not be true if this slice is an interned element and the `start` is in the document storing the interned element).
fn start_tag(&self, kind: TagKind) -> Option<&Tag>;
/// Returns the end tag if:
/// * the last element is an end tag of `kind`
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
}
#[cfg(test)]
mod tests {
use crate::format_element::{normalize_newlines, LINE_TERMINATORS};
#[test]
fn test_normalize_newlines() {
assert_eq!(normalize_newlines("a\nb", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\n\n\nb", LINE_TERMINATORS), "a\n\n\nb");
assert_eq!(normalize_newlines("a\rb", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\r\nb", LINE_TERMINATORS), "a\nb");
assert_eq!(
normalize_newlines("a\r\n\r\n\r\nb", LINE_TERMINATORS),
"a\n\n\nb"
);
assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb");
}
}
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<ruff_rowan::TextRange>() == 8usize);
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<crate::format_element::tag::VerbatimKind>() == 8usize);
#[cfg(not(debug_assertions))]
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<crate::format_element::Tag>() == 16usize);
// Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint.
// Is there a more efficient way to encode the data to avoid increasing its size? Can the information
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!
#[cfg(not(debug_assertions))]
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<crate::FormatElement>() == 24usize);

View file

@ -0,0 +1,714 @@
use super::tag::Tag;
use crate::format_element::tag::DedentMode;
use crate::prelude::tag::GroupMode;
use crate::prelude::*;
use crate::printer::LineEnding;
use crate::{format, write};
use crate::{
BufferExtensions, Format, FormatContext, FormatElement, FormatOptions, FormatResult, Formatter,
IndentStyle, LineWidth, PrinterOptions, TransformSourceMap,
};
use ruff_rowan::TextSize;
use rustc_hash::FxHashMap;
use std::collections::HashMap;
use std::ops::Deref;
/// A formatted document.
#[derive(Debug, Clone, Eq, PartialEq, Default)]
pub struct Document {
elements: Vec<FormatElement>,
}
impl Document {
/// Sets [`expand`](tag::Group::expand) to [`GroupMode::Propagated`] if the group contains any of:
/// * a group with [`expand`](tag::Group::expand) set to [GroupMode::Propagated] or [GroupMode::Expand].
/// * a non-soft [line break](FormatElement::Line) with mode [LineMode::Hard], [LineMode::Empty], or [LineMode::Literal].
/// * a [FormatElement::ExpandParent]
///
/// [`BestFitting`] elements act as expand boundaries, meaning that the fact that a
/// [`BestFitting`]'s content expands is not propagated past the [`BestFitting`] element.
///
/// [`BestFitting`]: FormatElement::BestFitting
pub(crate) fn propagate_expand(&mut self) {
#[derive(Debug)]
enum Enclosing<'a> {
Group(&'a tag::Group),
BestFitting,
}
fn expand_parent(enclosing: &[Enclosing]) {
if let Some(Enclosing::Group(group)) = enclosing.last() {
group.propagate_expand();
}
}
fn propagate_expands<'a>(
elements: &'a [FormatElement],
enclosing: &mut Vec<Enclosing<'a>>,
checked_interned: &mut FxHashMap<&'a Interned, bool>,
) -> bool {
let mut expands = false;
for element in elements {
let element_expands = match element {
FormatElement::Tag(Tag::StartGroup(group)) => {
enclosing.push(Enclosing::Group(group));
false
}
FormatElement::Tag(Tag::EndGroup) => match enclosing.pop() {
Some(Enclosing::Group(group)) => !group.mode().is_flat(),
_ => false,
},
FormatElement::Interned(interned) => match checked_interned.get(interned) {
Some(interned_expands) => *interned_expands,
None => {
let interned_expands =
propagate_expands(interned, enclosing, checked_interned);
checked_interned.insert(interned, interned_expands);
interned_expands
}
},
FormatElement::BestFitting(best_fitting) => {
enclosing.push(Enclosing::BestFitting);
for variant in best_fitting.variants() {
propagate_expands(variant, enclosing, checked_interned);
}
// Best fitting acts as a boundary
expands = false;
enclosing.pop();
continue;
}
FormatElement::StaticText { text } => text.contains('\n'),
FormatElement::DynamicText { text, .. } => text.contains('\n'),
FormatElement::SyntaxTokenTextSlice { slice, .. } => slice.contains('\n'),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
_ => false,
};
if element_expands {
expands = true;
expand_parent(enclosing)
}
}
expands
}
let mut enclosing: Vec<Enclosing> = Vec::new();
let mut interned: FxHashMap<&Interned, bool> = FxHashMap::default();
propagate_expands(self, &mut enclosing, &mut interned);
}
}
impl From<Vec<FormatElement>> for Document {
fn from(elements: Vec<FormatElement>) -> Self {
Self { elements }
}
}
impl Deref for Document {
type Target = [FormatElement];
fn deref(&self) -> &Self::Target {
self.elements.as_slice()
}
}
impl std::fmt::Display for Document {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let formatted = format!(IrFormatContext::default(), [self.elements.as_slice()])
.expect("Formatting not to throw any FormatErrors");
f.write_str(
formatted
.print()
.expect("Expected a valid document")
.as_code(),
)
}
}
#[derive(Clone, Default, Debug)]
struct IrFormatContext {
/// The interned elements that have been printed to this point
printed_interned_elements: HashMap<Interned, usize>,
}
impl FormatContext for IrFormatContext {
type Options = IrFormatOptions;
fn options(&self) -> &Self::Options {
&IrFormatOptions
}
fn source_map(&self) -> Option<&TransformSourceMap> {
None
}
}
#[derive(Debug, Clone, Default)]
struct IrFormatOptions;
impl FormatOptions for IrFormatOptions {
fn indent_style(&self) -> IndentStyle {
IndentStyle::Space(2)
}
fn line_width(&self) -> LineWidth {
LineWidth(80)
}
fn as_print_options(&self) -> PrinterOptions {
PrinterOptions {
tab_width: 2,
print_width: self.line_width().into(),
line_ending: LineEnding::LineFeed,
indent_style: IndentStyle::Space(2),
}
}
}
impl Format<IrFormatContext> for &[FormatElement] {
fn fmt(&self, f: &mut Formatter<IrFormatContext>) -> FormatResult<()> {
use Tag::*;
write!(f, [ContentArrayStart])?;
let mut tag_stack = Vec::new();
let mut first_element = true;
let mut in_text = false;
let mut iter = self.iter().peekable();
while let Some(element) = iter.next() {
if !first_element && !in_text && !element.is_end_tag() {
// Write a separator between every two elements
write!(f, [text(","), soft_line_break_or_space()])?;
}
first_element = false;
match element {
element @ FormatElement::Space
| element @ FormatElement::StaticText { .. }
| element @ FormatElement::DynamicText { .. }
| element @ FormatElement::SyntaxTokenTextSlice { .. } => {
if !in_text {
write!(f, [text("\"")])?;
}
in_text = true;
match element {
FormatElement::Space => {
write!(f, [text(" ")])?;
}
element if element.is_text() => f.write_element(element.clone())?,
_ => unreachable!(),
}
let is_next_text = iter.peek().map_or(false, |e| e.is_text() || e.is_space());
if !is_next_text {
write!(f, [text("\"")])?;
in_text = false;
}
}
FormatElement::Line(mode) => match mode {
LineMode::SoftOrSpace => {
write!(f, [text("soft_line_break_or_space")])?;
}
LineMode::Soft => {
write!(f, [text("soft_line_break")])?;
}
LineMode::Hard => {
write!(f, [text("hard_line_break")])?;
}
LineMode::Empty => {
write!(f, [text("empty_line")])?;
}
},
FormatElement::ExpandParent => {
write!(f, [text("expand_parent")])?;
}
FormatElement::LineSuffixBoundary => {
write!(f, [text("line_suffix_boundary")])?;
}
FormatElement::BestFitting(best_fitting) => {
write!(f, [text("best_fitting([")])?;
f.write_elements([
FormatElement::Tag(StartIndent),
FormatElement::Line(LineMode::Hard),
])?;
for variant in best_fitting.variants() {
write!(f, [variant.deref(), hard_line_break()])?;
}
f.write_elements([
FormatElement::Tag(EndIndent),
FormatElement::Line(LineMode::Hard),
])?;
write!(f, [text("])")])?;
}
FormatElement::Interned(interned) => {
let interned_elements = &mut f.context_mut().printed_interned_elements;
match interned_elements.get(interned).copied() {
None => {
let index = interned_elements.len();
interned_elements.insert(interned.clone(), index);
write!(
f,
[
dynamic_text(
&std::format!("<interned {index}>"),
TextSize::default()
),
space(),
&interned.deref(),
]
)?;
}
Some(reference) => {
write!(
f,
[dynamic_text(
&std::format!("<ref interned *{reference}>"),
TextSize::default()
)]
)?;
}
}
}
FormatElement::Tag(tag) => {
if tag.is_start() {
first_element = true;
tag_stack.push(tag.kind());
}
// Handle documents with mismatching start/end or superfluous end tags
else {
match tag_stack.pop() {
None => {
// Only write the end tag without any indent to ensure the output document is valid.
write!(
f,
[
text("<END_TAG_WITHOUT_START<"),
dynamic_text(
&std::format!("{:?}", tag.kind()),
TextSize::default()
),
text(">>"),
]
)?;
first_element = false;
continue;
}
Some(start_kind) if start_kind != tag.kind() => {
write!(
f,
[
ContentArrayEnd,
text(")"),
soft_line_break_or_space(),
text("ERROR<START_END_TAG_MISMATCH<start: "),
dynamic_text(
&std::format!("{start_kind:?}"),
TextSize::default()
),
text(", end: "),
dynamic_text(
&std::format!("{:?}", tag.kind()),
TextSize::default()
),
text(">>")
]
)?;
first_element = false;
continue;
}
_ => {
// all ok
}
}
}
match tag {
StartIndent => {
write!(f, [text("indent(")])?;
}
StartDedent(mode) => {
let label = match mode {
DedentMode::Level => "dedent",
DedentMode::Root => "dedentRoot",
};
write!(f, [text(label), text("(")])?;
}
StartAlign(tag::Align(count)) => {
write!(
f,
[
text("align("),
dynamic_text(&count.to_string(), TextSize::default()),
text(","),
space(),
]
)?;
}
StartLineSuffix => {
write!(f, [text("line_suffix(")])?;
}
StartVerbatim(_) => {
write!(f, [text("verbatim(")])?;
}
StartGroup(group) => {
write!(f, [text("group(")])?;
if let Some(group_id) = group.id() {
write!(
f,
[
dynamic_text(
&std::format!("\"{group_id:?}\""),
TextSize::default()
),
text(","),
space(),
]
)?;
}
match group.mode() {
GroupMode::Flat => {}
GroupMode::Expand => {
write!(f, [text("expand: true,"), space()])?;
}
GroupMode::Propagated => {
write!(f, [text("expand: propagated,"), space()])?;
}
}
}
StartIndentIfGroupBreaks(id) => {
write!(
f,
[
text("indent_if_group_breaks("),
dynamic_text(&std::format!("\"{id:?}\""), TextSize::default()),
text(","),
space(),
]
)?;
}
StartConditionalContent(condition) => {
match condition.mode {
PrintMode::Flat => {
write!(f, [text("if_group_fits_on_line(")])?;
}
PrintMode::Expanded => {
write!(f, [text("if_group_breaks(")])?;
}
}
if let Some(group_id) = condition.group_id {
write!(
f,
[
dynamic_text(
&std::format!("\"{group_id:?}\""),
TextSize::default()
),
text(","),
space(),
]
)?;
}
}
StartLabelled(label_id) => {
write!(
f,
[
text("label("),
dynamic_text(
&std::format!("\"{label_id:?}\""),
TextSize::default()
),
text(","),
space(),
]
)?;
}
StartFill => {
write!(f, [text("fill(")])?;
}
StartEntry => {
// handled after the match for all start tags
}
EndEntry => write!(f, [ContentArrayEnd])?,
EndFill
| EndLabelled
| EndConditionalContent
| EndIndentIfGroupBreaks
| EndAlign
| EndIndent
| EndGroup
| EndLineSuffix
| EndDedent
| EndVerbatim => {
write!(f, [ContentArrayEnd, text(")")])?;
}
};
if tag.is_start() {
write!(f, [ContentArrayStart])?;
}
}
}
}
while let Some(top) = tag_stack.pop() {
write!(
f,
[
ContentArrayEnd,
text(")"),
soft_line_break_or_space(),
dynamic_text(
&std::format!("<START_WITHOUT_END<{top:?}>>"),
TextSize::default()
),
]
)?;
}
write!(f, [ContentArrayEnd])
}
}
struct ContentArrayStart;
impl Format<IrFormatContext> for ContentArrayStart {
fn fmt(&self, f: &mut Formatter<IrFormatContext>) -> FormatResult<()> {
use Tag::*;
write!(f, [text("[")])?;
f.write_elements([
FormatElement::Tag(StartGroup(tag::Group::new())),
FormatElement::Tag(StartIndent),
FormatElement::Line(LineMode::Soft),
])
}
}
struct ContentArrayEnd;
impl Format<IrFormatContext> for ContentArrayEnd {
fn fmt(&self, f: &mut Formatter<IrFormatContext>) -> FormatResult<()> {
use Tag::*;
f.write_elements([
FormatElement::Tag(EndIndent),
FormatElement::Line(LineMode::Soft),
FormatElement::Tag(EndGroup),
])?;
write!(f, [text("]")])
}
}
impl FormatElements for [FormatElement] {
fn will_break(&self) -> bool {
use Tag::*;
let mut ignore_depth = 0usize;
for element in self {
match element {
// Line suffix
// Ignore if any of its content breaks
FormatElement::Tag(StartLineSuffix) => {
ignore_depth += 1;
}
FormatElement::Tag(EndLineSuffix) => {
ignore_depth -= 1;
}
FormatElement::Interned(interned) if ignore_depth == 0 => {
if interned.will_break() {
return true;
}
}
element if ignore_depth == 0 && element.will_break() => {
return true;
}
_ => continue,
}
}
debug_assert_eq!(ignore_depth, 0, "Unclosed start container");
false
}
fn has_label(&self, expected: LabelId) -> bool {
self.first()
.map_or(false, |element| element.has_label(expected))
}
fn start_tag(&self, kind: TagKind) -> Option<&Tag> {
// Assert that the document ends at a tag with the specified kind;
let _ = self.end_tag(kind)?;
fn traverse_slice<'a>(
slice: &'a [FormatElement],
kind: TagKind,
depth: &mut usize,
) -> Option<&'a Tag> {
for element in slice.iter().rev() {
match element {
FormatElement::Tag(tag) if tag.kind() == kind => {
if tag.is_start() {
if *depth == 0 {
// Invalid document
return None;
} else if *depth == 1 {
return Some(tag);
} else {
*depth -= 1;
}
} else {
*depth += 1;
}
}
FormatElement::Interned(interned) => {
match traverse_slice(interned, kind, depth) {
Some(start) => {
return Some(start);
}
// Reached end or invalid document
None if *depth == 0 => {
return None;
}
_ => {
// continue with other elements
}
}
}
_ => {}
}
}
None
}
let mut depth = 0usize;
traverse_slice(self, kind, &mut depth)
}
fn end_tag(&self, kind: TagKind) -> Option<&Tag> {
self.last().and_then(|element| element.end_tag(kind))
}
}
#[cfg(test)]
mod tests {
use crate::prelude::*;
use crate::SimpleFormatContext;
use crate::{format, format_args, write};
#[test]
fn display_elements() {
let formatted = format!(
SimpleFormatContext::default(),
[format_with(|f| {
write!(
f,
[group(&format_args![
text("("),
soft_block_indent(&format_args![
text("Some longer content"),
space(),
text("That should ultimately break"),
])
])]
)
})]
)
.unwrap();
let document = formatted.into_document();
assert_eq!(
&std::format!("{document}"),
r#"[
group([
"(",
indent([
soft_line_break,
"Some longer content That should ultimately break"
]),
soft_line_break
])
]"#
);
}
#[test]
fn display_invalid_document() {
use Tag::*;
let document = Document::from(vec![
FormatElement::StaticText { text: "[" },
FormatElement::Tag(StartGroup(tag::Group::new())),
FormatElement::Tag(StartIndent),
FormatElement::Line(LineMode::Soft),
FormatElement::StaticText { text: "a" },
// Close group instead of indent
FormatElement::Tag(EndGroup),
FormatElement::Line(LineMode::Soft),
FormatElement::Tag(EndIndent),
FormatElement::StaticText { text: "]" },
// End tag without start
FormatElement::Tag(EndIndent),
// Start tag without an end
FormatElement::Tag(StartIndent),
]);
assert_eq!(
&std::format!("{document}"),
r#"[
"[",
group([
indent([soft_line_break, "a"])
ERROR<START_END_TAG_MISMATCH<start: Indent, end: Group>>,
soft_line_break
])
ERROR<START_END_TAG_MISMATCH<start: Group, end: Indent>>,
"]"<END_TAG_WITHOUT_START<Indent>>,
indent([])
<START_WITHOUT_END<Indent>>
]"#
);
}
}

View file

@ -0,0 +1,287 @@
use crate::format_element::PrintMode;
use crate::{GroupId, TextSize};
#[cfg(debug_assertions)]
use std::any::type_name;
use std::any::TypeId;
use std::cell::Cell;
use std::num::NonZeroU8;
/// A Tag marking the start and end of some content to which some special formatting should be applied.
///
/// Tags always come in pairs of a start and an end tag and the styling defined by this tag
/// will be applied to all elements in between the start/end tags.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Tag {
/// Indents the content one level deeper, see [crate::builders::indent] for documentation and examples.
StartIndent,
EndIndent,
/// Variant of [TagKind::Indent] that indents content by a number of spaces. For example, `Align(2)`
/// indents any content following a line break by an additional two spaces.
///
/// Nesting (Aligns)[TagKind::Align] has the effect that all except the most inner align are handled as (Indent)[TagKind::Indent].
StartAlign(Align),
EndAlign,
/// Reduces the indention of the specified content either by one level or to the root, depending on the mode.
/// Reverse operation of `Indent` and can be used to *undo* an `Align` for nested content.
StartDedent(DedentMode),
EndDedent,
/// Creates a logical group where its content is either consistently printed:
/// * on a single line: Omitting `LineMode::Soft` line breaks and printing spaces for `LineMode::SoftOrSpace`
/// * on multiple lines: Printing all line breaks
///
/// See [crate::builders::group] for documentation and examples.
StartGroup(Group),
EndGroup,
/// Allows to specify content that gets printed depending on whatever the enclosing group
/// is printed on a single line or multiple lines. See [crate::builders::if_group_breaks] for examples.
StartConditionalContent(Condition),
EndConditionalContent,
/// Optimized version of [Tag::StartConditionalContent] for the case where some content
/// should be indented if the specified group breaks.
StartIndentIfGroupBreaks(GroupId),
EndIndentIfGroupBreaks,
/// Concatenates multiple elements together with a given separator printed in either
/// flat or expanded mode to fill the print width. Expect that the content is a list of alternating
/// [element, separator] See [crate::Formatter::fill].
StartFill,
EndFill,
/// Entry inside of a [Tag::StartFill]
StartEntry,
EndEntry,
/// Delay the printing of its content until the next line break
StartLineSuffix,
EndLineSuffix,
/// A token that tracks tokens/nodes that are printed as verbatim.
StartVerbatim(VerbatimKind),
EndVerbatim,
/// Special semantic element marking the content with a label.
/// This does not directly influence how the content will be printed.
///
/// See [crate::builders::labelled] for documentation.
StartLabelled(LabelId),
EndLabelled,
}
impl Tag {
/// Returns `true` if `self` is any start tag.
pub const fn is_start(&self) -> bool {
matches!(
self,
Tag::StartIndent
| Tag::StartAlign(_)
| Tag::StartDedent(_)
| Tag::StartGroup { .. }
| Tag::StartConditionalContent(_)
| Tag::StartIndentIfGroupBreaks(_)
| Tag::StartFill
| Tag::StartEntry
| Tag::StartLineSuffix
| Tag::StartVerbatim(_)
| Tag::StartLabelled(_)
)
}
/// Returns `true` if `self` is any end tag.
pub const fn is_end(&self) -> bool {
!self.is_start()
}
pub const fn kind(&self) -> TagKind {
use Tag::*;
match self {
StartIndent | EndIndent => TagKind::Indent,
StartAlign(_) | EndAlign => TagKind::Align,
StartDedent(_) | EndDedent => TagKind::Dedent,
StartGroup(_) | EndGroup => TagKind::Group,
StartConditionalContent(_) | EndConditionalContent => TagKind::ConditionalContent,
StartIndentIfGroupBreaks(_) | EndIndentIfGroupBreaks => TagKind::IndentIfGroupBreaks,
StartFill | EndFill => TagKind::Fill,
StartEntry | EndEntry => TagKind::Entry,
StartLineSuffix | EndLineSuffix => TagKind::LineSuffix,
StartVerbatim(_) | EndVerbatim => TagKind::Verbatim,
StartLabelled(_) | EndLabelled => TagKind::Labelled,
}
}
}
/// The kind of a [Tag].
///
/// Each start end tag pair has its own [tag kind](TagKind).
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum TagKind {
Indent,
Align,
Dedent,
Group,
ConditionalContent,
IndentIfGroupBreaks,
Fill,
Entry,
LineSuffix,
Verbatim,
Labelled,
}
#[derive(Debug, Copy, Default, Clone, Eq, PartialEq)]
pub enum GroupMode {
/// Print group in flat mode.
#[default]
Flat,
/// The group should be printed in expanded mode
Expand,
/// Expand mode has been propagated from an enclosing group to this group.
Propagated,
}
impl GroupMode {
pub const fn is_flat(&self) -> bool {
matches!(self, GroupMode::Flat)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Default)]
pub struct Group {
id: Option<GroupId>,
mode: Cell<GroupMode>,
}
impl Group {
pub fn new() -> Self {
Self {
id: None,
mode: Cell::new(GroupMode::Flat),
}
}
pub fn with_id(mut self, id: Option<GroupId>) -> Self {
self.id = id;
self
}
pub fn with_mode(mut self, mode: GroupMode) -> Self {
self.mode = Cell::new(mode);
self
}
pub fn mode(&self) -> GroupMode {
self.mode.get()
}
pub fn propagate_expand(&self) {
if self.mode.get() == GroupMode::Flat {
self.mode.set(GroupMode::Propagated)
}
}
pub fn id(&self) -> Option<GroupId> {
self.id
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum DedentMode {
/// Reduces the indent by a level (if the current indent is > 0)
Level,
/// Reduces the indent to the root
Root,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Condition {
/// * Flat -> Omitted if the enclosing group is a multiline group, printed for groups fitting on a single line
/// * Multiline -> Omitted if the enclosing group fits on a single line, printed if the group breaks over multiple lines.
pub(crate) mode: PrintMode,
/// The id of the group for which it should check if it breaks or not. The group must appear in the document
/// before the conditional group (but doesn't have to be in the ancestor chain).
pub(crate) group_id: Option<GroupId>,
}
impl Condition {
pub fn new(mode: PrintMode) -> Self {
Self {
mode,
group_id: None,
}
}
pub fn with_group_id(mut self, id: Option<GroupId>) -> Self {
self.group_id = id;
self
}
pub fn mode(&self) -> PrintMode {
self.mode
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Align(pub(crate) NonZeroU8);
impl Align {
pub fn count(&self) -> NonZeroU8 {
self.0
}
}
#[derive(Eq, PartialEq, Copy, Clone)]
pub struct LabelId {
id: TypeId,
#[cfg(debug_assertions)]
label: &'static str,
}
#[cfg(debug_assertions)]
impl std::fmt::Debug for LabelId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.label)
}
}
#[cfg(not(debug_assertions))]
impl std::fmt::Debug for LabelId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::write!(f, "#{:?}", self.id)
}
}
impl LabelId {
pub fn of<T: ?Sized + 'static>() -> Self {
Self {
id: TypeId::of::<T>(),
#[cfg(debug_assertions)]
label: type_name::<T>(),
}
}
}
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
pub enum VerbatimKind {
Bogus,
Suppressed,
Verbatim {
/// the length of the formatted node
length: TextSize,
},
}
impl VerbatimKind {
pub const fn is_bogus(&self) -> bool {
matches!(self, VerbatimKind::Bogus)
}
}

View file

@ -0,0 +1,178 @@
use crate::prelude::*;
use std::cell::RefCell;
use std::marker::PhantomData;
use std::ops::Deref;
use crate::Buffer;
/// Utility trait that allows memorizing the output of a [Format].
/// Useful to avoid re-formatting the same object twice.
pub trait MemoizeFormat<Context> {
/// Returns a formattable object that memoizes the result of `Format` by cloning.
/// Mainly useful if the same sub-tree can appear twice in the formatted output because it's
/// used inside of `if_group_breaks` or `if_group_fits_single_line`.
///
/// ```
/// use std::cell::Cell;
/// use ruff_formatter::{format, write};
/// use ruff_formatter::prelude::*;
/// use ruff_rowan::TextSize;
///
/// struct MyFormat {
/// value: Cell<u64>
/// }
///
/// impl MyFormat {
/// pub fn new() -> Self {
/// Self { value: Cell::new(1) }
/// }
/// }
///
/// impl Format<SimpleFormatContext> for MyFormat {
/// fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
/// let value = self.value.get();
/// self.value.set(value + 1);
///
/// write!(f, [dynamic_text(&std::format!("Formatted {value} times."), TextSize::from(0))])
/// }
/// }
///
/// # fn main() -> FormatResult<()> {
/// let normal = MyFormat::new();
///
/// // Calls `format` for everytime the object gets formatted
/// assert_eq!(
/// "Formatted 1 times. Formatted 2 times.",
/// format!(SimpleFormatContext::default(), [normal, space(), normal])?.print()?.as_code()
/// );
///
/// // Memoized memoizes the result and calls `format` only once.
/// let memoized = normal.memoized();
/// assert_eq!(
/// "Formatted 3 times. Formatted 3 times.",
/// format![SimpleFormatContext::default(), [memoized, space(), memoized]]?.print()?.as_code()
/// );
/// # Ok(())
/// # }
/// ```
///
fn memoized(self) -> Memoized<Self, Context>
where
Self: Sized + Format<Context>,
{
Memoized::new(self)
}
}
impl<T, Context> MemoizeFormat<Context> for T where T: Format<Context> {}
/// Memoizes the output of its inner [Format] to avoid re-formatting a potential expensive object.
#[derive(Debug)]
pub struct Memoized<F, Context> {
inner: F,
memory: RefCell<Option<FormatResult<Option<FormatElement>>>>,
options: PhantomData<Context>,
}
impl<F, Context> Memoized<F, Context>
where
F: Format<Context>,
{
fn new(inner: F) -> Self {
Self {
inner,
memory: RefCell::new(None),
options: PhantomData,
}
}
/// Gives access to the memoized content.
///
/// Performs the formatting if the content hasn't been formatted at this point.
///
/// # Example
///
/// Inspect if some memoized content breaks.
///
/// ```rust
/// use std::cell::Cell;
/// use ruff_formatter::{format, write};
/// use ruff_formatter::prelude::*;
/// use ruff_rowan::TextSize;
///
/// #[derive(Default)]
/// struct Counter {
/// value: Cell<u64>
/// }
///
/// impl Format<SimpleFormatContext> for Counter {
/// fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
/// let current = self.value.get();
///
/// write!(f, [
/// text("Count:"),
/// space(),
/// dynamic_text(&std::format!("{current}"), TextSize::default()),
/// hard_line_break()
/// ])?;
///
/// self.value.set(current + 1);
/// Ok(())
/// }
/// }
///
/// # fn main() -> FormatResult<()> {
/// let content = format_with(|f| {
/// let mut counter = Counter::default().memoized();
/// let counter_content = counter.inspect(f)?;
///
/// if counter_content.will_break() {
/// write!(f, [text("Counter:"), block_indent(&counter)])
/// } else {
/// write!(f, [text("Counter:"), counter])
/// }?;
///
/// write!(f, [counter])
/// });
///
///
/// let formatted = format!(SimpleFormatContext::default(), [content])?;
/// assert_eq!("Counter:\n\tCount: 0\nCount: 0\n", formatted.print()?.as_code());
/// # Ok(())
/// # }
///
/// ```
pub fn inspect(&mut self, f: &mut Formatter<Context>) -> FormatResult<&[FormatElement]> {
let result = self
.memory
.get_mut()
.get_or_insert_with(|| f.intern(&self.inner));
match result.as_ref() {
Ok(Some(FormatElement::Interned(interned))) => Ok(interned.deref()),
Ok(Some(other)) => Ok(std::slice::from_ref(other)),
Ok(None) => Ok(&[]),
Err(error) => Err(*error),
}
}
}
impl<F, Context> Format<Context> for Memoized<F, Context>
where
F: Format<Context>,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let mut memory = self.memory.borrow_mut();
let result = memory.get_or_insert_with(|| f.intern(&self.inner));
match result {
Ok(Some(elements)) => {
f.write_element(elements.clone())?;
Ok(())
}
Ok(None) => Ok(()),
Err(err) => Err(*err),
}
}
}

View file

@ -0,0 +1,288 @@
use crate::buffer::BufferSnapshot;
use crate::builders::{FillBuilder, JoinBuilder, JoinNodesBuilder, Line};
use crate::prelude::*;
use crate::{
Arguments, Buffer, Comments, CstFormatContext, FormatContext, FormatState, FormatStateSnapshot,
GroupId, VecBuffer,
};
/// Handles the formatting of a CST and stores the context how the CST should be formatted (user preferences).
/// The formatter is passed to the [Format] implementation of every node in the CST so that they
/// can use it to format their children.
pub struct Formatter<'buf, Context> {
pub(super) buffer: &'buf mut dyn Buffer<Context = Context>,
}
impl<'buf, Context> Formatter<'buf, Context> {
/// Creates a new context that uses the given formatter context
pub fn new(buffer: &'buf mut (dyn Buffer<Context = Context> + 'buf)) -> Self {
Self { buffer }
}
/// Returns the format options
pub fn options(&self) -> &Context::Options
where
Context: FormatContext,
{
self.context().options()
}
/// Returns the Context specifying how to format the current CST
pub fn context(&self) -> &Context {
self.state().context()
}
/// Returns a mutable reference to the context.
pub fn context_mut(&mut self) -> &mut Context {
self.state_mut().context_mut()
}
/// Creates a new group id that is unique to this document. The passed debug name is used in the
/// [std::fmt::Debug] of the document if this is a debug build.
/// The name is unused for production builds and has no meaning on the equality of two group ids.
pub fn group_id(&self, debug_name: &'static str) -> GroupId {
self.state().group_id(debug_name)
}
/// Joins multiple [Format] together without any separator
///
/// ## Examples
///
/// ```rust
/// use ruff_formatter::format;
/// use ruff_formatter::prelude::*;
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [format_with(|f| {
/// f.join()
/// .entry(&text("a"))
/// .entry(&space())
/// .entry(&text("+"))
/// .entry(&space())
/// .entry(&text("b"))
/// .finish()
/// })])?;
///
/// assert_eq!(
/// "a + b",
/// formatted.print()?.as_code()
/// );
/// # Ok(())
/// # }
/// ```
pub fn join<'a>(&'a mut self) -> JoinBuilder<'a, 'buf, (), Context> {
JoinBuilder::new(self)
}
/// Joins the objects by placing the specified separator between every two items.
///
/// ## Examples
///
/// Joining different tokens by separating them with a comma and a space.
///
/// ```
/// use ruff_formatter::{format, format_args};
/// use ruff_formatter::prelude::*;
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [format_with(|f| {
/// f.join_with(&format_args!(text(","), space()))
/// .entry(&text("1"))
/// .entry(&text("2"))
/// .entry(&text("3"))
/// .entry(&text("4"))
/// .finish()
/// })])?;
///
/// assert_eq!(
/// "1, 2, 3, 4",
/// formatted.print()?.as_code()
/// );
/// # Ok(())
/// # }
/// ```
pub fn join_with<'a, Joiner>(
&'a mut self,
joiner: Joiner,
) -> JoinBuilder<'a, 'buf, Joiner, Context>
where
Joiner: Format<Context>,
{
JoinBuilder::with_separator(self, joiner)
}
/// Specialized version of [crate::Formatter::join_with] for joining SyntaxNodes separated by a space, soft
/// line break or empty line depending on the input file.
///
/// This functions inspects the input source and separates consecutive elements with either
/// a [crate::builders::soft_line_break_or_space] or [crate::builders::empty_line] depending on how many line breaks were
/// separating the elements in the original file.
pub fn join_nodes_with_soft_line<'a>(
&'a mut self,
) -> JoinNodesBuilder<'a, 'buf, Line, Context> {
JoinNodesBuilder::new(soft_line_break_or_space(), self)
}
/// Specialized version of [crate::Formatter::join_with] for joining SyntaxNodes separated by one or more
/// line breaks depending on the input file.
///
/// This functions inspects the input source and separates consecutive elements with either
/// a [crate::builders::hard_line_break] or [crate::builders::empty_line] depending on how many line breaks were separating the
/// elements in the original file.
pub fn join_nodes_with_hardline<'a>(&'a mut self) -> JoinNodesBuilder<'a, 'buf, Line, Context> {
JoinNodesBuilder::new(hard_line_break(), self)
}
/// Concatenates a list of [crate::Format] objects with spaces and line breaks to fit
/// them on as few lines as possible. Each element introduces a conceptual group. The printer
/// first tries to print the item in flat mode but then prints it in expanded mode if it doesn't fit.
///
/// ## Examples
///
/// ```rust
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, format_args};
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [format_with(|f| {
/// f.fill()
/// .entry(&soft_line_break_or_space(), &text("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
/// .entry(&soft_line_break_or_space(), &text("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"))
/// .entry(&soft_line_break_or_space(), &text("cccccccccccccccccccccccccccccc"))
/// .entry(&soft_line_break_or_space(), &text("dddddddddddddddddddddddddddddd"))
/// .finish()
/// })])?;
///
/// assert_eq!(
/// "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\ncccccccccccccccccccccccccccccc dddddddddddddddddddddddddddddd",
/// formatted.print()?.as_code()
/// );
/// # Ok(())
/// # }
/// ```
///
/// ```rust
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, format_args};
///
/// # fn main() -> FormatResult<()> {
/// let entries = vec![
/// text("<b>Important: </b>"),
/// text("Please do not commit memory bugs such as segfaults, buffer overflows, etc. otherwise you "),
/// text("<em>will</em>"),
/// text(" be reprimanded")
/// ];
///
/// let formatted = format!(SimpleFormatContext::default(), [format_with(|f| {
/// f.fill().entries(&soft_line_break(), entries.iter()).finish()
/// })])?;
///
/// assert_eq!(
/// &std::format!("<b>Important: </b>\nPlease do not commit memory bugs such as segfaults, buffer overflows, etc. otherwise you \n<em>will</em> be reprimanded"),
/// formatted.print()?.as_code()
/// );
/// # Ok(())
/// # }
/// ```
pub fn fill<'a>(&'a mut self) -> FillBuilder<'a, 'buf, Context> {
FillBuilder::new(self)
}
/// Formats `content` into an interned element without writing it to the formatter's buffer.
pub fn intern(&mut self, content: &dyn Format<Context>) -> FormatResult<Option<FormatElement>> {
let mut buffer = VecBuffer::new(self.state_mut());
crate::write!(&mut buffer, [content])?;
let elements = buffer.into_vec();
Ok(self.intern_vec(elements))
}
pub fn intern_vec(&mut self, mut elements: Vec<FormatElement>) -> Option<FormatElement> {
match elements.len() {
0 => None,
// Doesn't get cheaper than calling clone, use the element directly
// SAFETY: Safe because of the `len == 1` check in the match arm.
1 => Some(elements.pop().unwrap()),
_ => Some(FormatElement::Interned(Interned::new(elements))),
}
}
}
impl<Context> Formatter<'_, Context>
where
Context: FormatContext,
{
/// Take a snapshot of the state of the formatter
#[inline]
pub fn state_snapshot(&self) -> FormatterSnapshot {
FormatterSnapshot {
buffer: self.buffer.snapshot(),
state: self.state().snapshot(),
}
}
#[inline]
/// Restore the state of the formatter to a previous snapshot
pub fn restore_state_snapshot(&mut self, snapshot: FormatterSnapshot) {
self.state_mut().restore_snapshot(snapshot.state);
self.buffer.restore_snapshot(snapshot.buffer);
}
}
impl<Context> Formatter<'_, Context>
where
Context: CstFormatContext,
{
/// Returns the comments from the context.
pub fn comments(&self) -> &Comments<Context::Language> {
self.context().comments()
}
}
impl<Context> Buffer for Formatter<'_, Context> {
type Context = Context;
#[inline(always)]
fn write_element(&mut self, element: FormatElement) -> FormatResult<()> {
self.buffer.write_element(element)
}
fn elements(&self) -> &[FormatElement] {
self.buffer.elements()
}
#[inline(always)]
fn write_fmt(&mut self, arguments: Arguments<Self::Context>) -> FormatResult<()> {
for argument in arguments.items() {
argument.format(self)?;
}
Ok(())
}
fn state(&self) -> &FormatState<Self::Context> {
self.buffer.state()
}
fn state_mut(&mut self) -> &mut FormatState<Self::Context> {
self.buffer.state_mut()
}
fn snapshot(&self) -> BufferSnapshot {
self.buffer.snapshot()
}
fn restore_snapshot(&mut self, snapshot: BufferSnapshot) {
self.buffer.restore_snapshot(snapshot)
}
}
/// Snapshot of the formatter state used to handle backtracking if
/// errors are encountered in the formatting process and the formatter
/// has to fallback to printing raw tokens
///
/// In practice this only saves the set of printed tokens in debug
/// mode and compiled to nothing in release mode
pub struct FormatterSnapshot {
buffer: BufferSnapshot,
state: FormatStateSnapshot,
}

View file

@ -0,0 +1,82 @@
use std::num::NonZeroU32;
use std::sync::atomic::{AtomicU32, Ordering};
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
pub struct DebugGroupId {
value: NonZeroU32,
name: &'static str,
}
impl DebugGroupId {
#[allow(unused)]
fn new(value: NonZeroU32, debug_name: &'static str) -> Self {
Self {
value,
name: debug_name,
}
}
}
impl std::fmt::Debug for DebugGroupId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "#{}-{}", self.name, self.value)
}
}
/// Unique identification for a group.
///
/// See [crate::Formatter::group_id] on how to get a unique id.
#[repr(transparent)]
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
pub struct ReleaseGroupId {
value: NonZeroU32,
}
impl ReleaseGroupId {
/// Creates a new unique group id with the given debug name (only stored in debug builds)
#[allow(unused)]
fn new(value: NonZeroU32, _: &'static str) -> Self {
Self { value }
}
}
impl From<GroupId> for u32 {
fn from(id: GroupId) -> Self {
id.value.get()
}
}
impl std::fmt::Debug for ReleaseGroupId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "#{}", self.value)
}
}
#[cfg(not(debug_assertions))]
pub type GroupId = ReleaseGroupId;
#[cfg(debug_assertions)]
pub type GroupId = DebugGroupId;
/// Builder to construct unique group ids that are unique if created with the same builder.
pub(super) struct UniqueGroupIdBuilder {
next_id: AtomicU32,
}
impl UniqueGroupIdBuilder {
/// Creates a new unique group id with the given debug name.
pub fn group_id(&self, debug_name: &'static str) -> GroupId {
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
let id = NonZeroU32::new(id).unwrap_or_else(|| panic!("Group ID counter overflowed"));
GroupId::new(id, debug_name)
}
}
impl Default for UniqueGroupIdBuilder {
fn default() -> Self {
UniqueGroupIdBuilder {
// Start with 1 because `GroupId` wraps a `NonZeroU32` to reduce memory usage.
next_id: AtomicU32::new(1),
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,502 @@
/// Constructs the parameters for other formatting macros.
///
/// This macro functions by taking a list of objects implementing [crate::Format]. It canonicalize the
/// arguments into a single type.
///
/// This macro produces a value of type [crate::Arguments]. This value can be passed to
/// the macros within [crate]. All other formatting macros ([`format!`](crate::format!),
/// [`write!`](crate::write!)) are proxied through this one. This macro avoids heap allocations.
///
/// You can use the [`Arguments`] value that `format_args!` returns in `Format` contexts
/// as seen below.
///
/// ```rust
/// use ruff_formatter::{SimpleFormatContext, format, format_args};
/// use ruff_formatter::prelude::*;
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [
/// format_args!(text("Hello World"))
/// ])?;
///
/// assert_eq!("Hello World", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
///
/// [`Format`]: crate::Format
/// [`Arguments`]: crate::Arguments
#[macro_export]
macro_rules! format_args {
($($value:expr),+ $(,)?) => {
$crate::Arguments::new(&[
$(
$crate::Argument::new(&$value)
),+
])
}
}
/// Writes formatted data into a buffer.
///
/// This macro accepts a 'buffer' and a list of format arguments. Each argument will be formatted
/// and the result will be passed to the buffer. The writer may be any value with a `write_fmt` method;
/// generally this comes from an implementation of the [crate::Buffer] trait.
///
/// # Examples
///
/// ```rust
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{Buffer, FormatState, SimpleFormatContext, VecBuffer, write};
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
/// write!(&mut buffer, [text("Hello"), space()])?;
/// write!(&mut buffer, [text("World")])?;
///
/// assert_eq!(
/// buffer.into_vec(),
/// vec![
/// FormatElement::StaticText { text: "Hello" },
/// FormatElement::Space,
/// FormatElement::StaticText { text: "World" },
/// ]
/// );
/// # Ok(())
/// # }
/// ```
#[macro_export]
macro_rules! write {
($dst:expr, [$($arg:expr),+ $(,)?]) => {{
let result = $dst.write_fmt($crate::format_args!($($arg),+));
result
}}
}
/// Writes formatted data into the given buffer and prints all written elements for a quick and dirty debugging.
///
/// An example:
///
/// ```rust
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{FormatState, VecBuffer};
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// dbg_write!(buffer, [text("Hello")])?;
/// // ^-- prints: [src/main.rs:7][0] = StaticToken("Hello")
///
/// assert_eq!(buffer.into_vec(), vec![FormatElement::StaticText { text: "Hello" }]);
/// # Ok(())
/// # }
/// ```
///
/// Note that the macro is intended as debugging tool and therefore you should avoid having
/// uses of it in version control for long periods (other than in tests and similar). Format output
/// from production code is better done with `[write!]`
#[macro_export]
macro_rules! dbg_write {
($dst:expr, [$($arg:expr),+ $(,)?]) => {{
use $crate::BufferExtensions;
let mut count = 0;
let mut inspect = $dst.inspect(|element: &FormatElement| {
std::eprintln!(
"[{}:{}][{}] = {element:#?}",
std::file!(), std::line!(), count
);
count += 1;
});
let result = inspect.write_fmt($crate::format_args!($($arg),+));
result
}}
}
/// Creates the Format IR for a value.
///
/// The first argument `format!` receives is the [crate::FormatContext] that specify how elements must be formatted.
/// Additional parameters passed get formatted by using their [crate::Format] implementation.
///
///
/// ## Examples
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::format;
///
/// let formatted = format!(SimpleFormatContext::default(), [text("("), text("a"), text(")")]).unwrap();
///
/// assert_eq!(
/// formatted.into_document(),
/// Document::from(vec![
/// FormatElement::StaticText { text: "(" },
/// FormatElement::StaticText { text: "a" },
/// FormatElement::StaticText { text: ")" },
/// ])
/// );
/// ```
#[macro_export]
macro_rules! format {
($context:expr, [$($arg:expr),+ $(,)?]) => {{
($crate::format($context, $crate::format_args!($($arg),+)))
}}
}
/// Provides multiple different alternatives and the printer picks the first one that fits.
/// Use this as last resort because it requires that the printer must try all variants in the worst case.
/// The passed variants must be in the following order:
/// * First: The variant that takes up most space horizontally
/// * Last: The variant that takes up the least space horizontally by splitting the content over multiple lines.
///
/// ## Examples
///
/// ```
/// use ruff_formatter::{Formatted, LineWidth, format, format_args, SimpleFormatOptions};
/// use ruff_formatter::prelude::*;
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(
/// SimpleFormatContext::default(),
/// [
/// text("aVeryLongIdentifier"),
/// best_fitting!(
/// // Everything fits on a single line
/// format_args!(
/// text("("),
/// group(&format_args![
/// text("["),
/// soft_block_indent(&format_args![
/// text("1,"),
/// soft_line_break_or_space(),
/// text("2,"),
/// soft_line_break_or_space(),
/// text("3"),
/// ]),
/// text("]")
/// ]),
/// text(")")
/// ),
///
/// // Breaks after `[`, but prints all elements on a single line
/// format_args!(
/// text("("),
/// text("["),
/// block_indent(&text("1, 2, 3")),
/// text("]"),
/// text(")"),
/// ),
///
/// // Breaks after `[` and prints each element on a single line
/// format_args!(
/// text("("),
/// block_indent(&format_args![
/// text("["),
/// block_indent(&format_args![
/// text("1,"),
/// hard_line_break(),
/// text("2,"),
/// hard_line_break(),
/// text("3"),
/// ]),
/// text("]"),
/// ]),
/// text(")")
/// )
/// )
/// ]
/// )?;
///
/// let document = formatted.into_document();
///
/// // Takes the first variant if everything fits on a single line
/// assert_eq!(
/// "aVeryLongIdentifier([1, 2, 3])",
/// Formatted::new(document.clone(), SimpleFormatContext::default())
/// .print()?
/// .as_code()
/// );
///
/// // It takes the second if the first variant doesn't fit on a single line. The second variant
/// // has some additional line breaks to make sure inner groups don't break
/// assert_eq!(
/// "aVeryLongIdentifier([\n\t1, 2, 3\n])",
/// Formatted::new(document.clone(), SimpleFormatContext::new(SimpleFormatOptions { line_width: 21.try_into().unwrap(), ..SimpleFormatOptions::default() }))
/// .print()?
/// .as_code()
/// );
///
/// // Prints the last option as last resort
/// assert_eq!(
/// "aVeryLongIdentifier(\n\t[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n)",
/// Formatted::new(document.clone(), SimpleFormatContext::new(SimpleFormatOptions { line_width: 20.try_into().unwrap(), ..SimpleFormatOptions::default() }))
/// .print()?
/// .as_code()
/// );
/// # Ok(())
/// # }
/// ```
///
/// ### Enclosing group with `should_expand: true`
///
/// ```
/// use ruff_formatter::{Formatted, LineWidth, format, format_args, SimpleFormatOptions};
/// use ruff_formatter::prelude::*;
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(
/// SimpleFormatContext::default(),
/// [
/// best_fitting!(
/// // Prints the method call on the line but breaks the array.
/// format_args!(
/// text("expect(a).toMatch("),
/// group(&format_args![
/// text("["),
/// soft_block_indent(&format_args![
/// text("1,"),
/// soft_line_break_or_space(),
/// text("2,"),
/// soft_line_break_or_space(),
/// text("3"),
/// ]),
/// text("]")
/// ]).should_expand(true),
/// text(")")
/// ),
///
/// // Breaks after `(`
/// format_args!(
/// text("expect(a).toMatch("),
/// group(&soft_block_indent(
/// &group(&format_args![
/// text("["),
/// soft_block_indent(&format_args![
/// text("1,"),
/// soft_line_break_or_space(),
/// text("2,"),
/// soft_line_break_or_space(),
/// text("3"),
/// ]),
/// text("]")
/// ]).should_expand(true),
/// )).should_expand(true),
/// text(")")
/// ),
/// )
/// ]
/// )?;
///
/// let document = formatted.into_document();
///
/// assert_eq!(
/// "expect(a).toMatch([\n\t1,\n\t2,\n\t3\n])",
/// Formatted::new(document.clone(), SimpleFormatContext::default())
/// .print()?
/// .as_code()
/// );
///
/// # Ok(())
/// # }
/// ```
///
/// The first variant fits because all its content up to the first line break fit on the line without exceeding
/// the configured print width.
///
/// ## Complexity
/// Be mindful of using this IR element as it has a considerable performance penalty:
/// * There are multiple representation for the same content. This results in increased memory usage
/// and traversal time in the printer.
/// * The worst case complexity is that the printer tires each variant. This can result in quadratic
/// complexity if used in nested structures.
///
/// ## Behavior
/// This IR is similar to Prettier's `conditionalGroup`. The printer measures each variant, except the [`MostExpanded`], in [`Flat`] mode
/// to find the first variant that fits and prints this variant in [`Flat`] mode. If no variant fits, then
/// the printer falls back to printing the [`MostExpanded`] variant in `[`Expanded`] mode.
///
/// The definition of *fits* differs to groups in that the printer only tests if it is possible to print
/// the content up to the first non-soft line break without exceeding the configured print width.
/// This definition differs from groups as that non-soft line breaks make group expand.
///
/// [crate::BestFitting] acts as a "break" boundary, meaning that it is considered to fit
///
///
/// [`Flat`]: crate::format_element::PrintMode::Flat
/// [`Expanded`]: crate::format_element::PrintMode::Expanded
/// [`MostExpanded`]: crate::format_element::BestFitting::most_expanded
#[macro_export]
macro_rules! best_fitting {
($least_expanded:expr, $($tail:expr),+ $(,)?) => {{
unsafe {
$crate::BestFitting::from_arguments_unchecked($crate::format_args!($least_expanded, $($tail),+))
}
}}
}
#[cfg(test)]
mod tests {
use crate::prelude::*;
use crate::{write, FormatState, SimpleFormatOptions, VecBuffer};
struct TestFormat;
impl Format<()> for TestFormat {
fn fmt(&self, f: &mut Formatter<()>) -> FormatResult<()> {
write!(f, [text("test")])
}
}
#[test]
fn test_single_element() {
let mut state = FormatState::new(());
let mut buffer = VecBuffer::new(&mut state);
write![&mut buffer, [TestFormat]].unwrap();
assert_eq!(
buffer.into_vec(),
vec![FormatElement::StaticText { text: "test" }]
);
}
#[test]
fn test_multiple_elements() {
let mut state = FormatState::new(());
let mut buffer = VecBuffer::new(&mut state);
write![
&mut buffer,
[text("a"), space(), text("simple"), space(), TestFormat]
]
.unwrap();
assert_eq!(
buffer.into_vec(),
vec![
FormatElement::StaticText { text: "a" },
FormatElement::Space,
FormatElement::StaticText { text: "simple" },
FormatElement::Space,
FormatElement::StaticText { text: "test" }
]
);
}
#[test]
fn best_fitting_variants_print_as_lists() {
use crate::prelude::*;
use crate::{format, format_args, Formatted};
// The second variant below should be selected when printing at a width of 30
let formatted_best_fitting = format!(
SimpleFormatContext::default(),
[
text("aVeryLongIdentifier"),
soft_line_break_or_space(),
best_fitting![
format_args![text(
"Something that will not fit on a line with 30 character print width."
)],
format_args![group(&format_args![
text("Start"),
soft_line_break(),
group(&soft_block_indent(&format_args![
text("1,"),
soft_line_break_or_space(),
text("2,"),
soft_line_break_or_space(),
text("3"),
])),
soft_line_break_or_space(),
soft_block_indent(&format_args![
text("1,"),
soft_line_break_or_space(),
text("2,"),
soft_line_break_or_space(),
group(&format_args!(
text("A,"),
soft_line_break_or_space(),
text("B")
)),
soft_line_break_or_space(),
text("3")
]),
soft_line_break_or_space(),
text("End")
])
.should_expand(true)],
format_args!(text("Most"), hard_line_break(), text("Expanded"))
]
]
)
.unwrap();
// This matches the IR above except that the `best_fitting` was replaced with
// the contents of its second variant.
let formatted_normal_list = format!(
SimpleFormatContext::default(),
[
text("aVeryLongIdentifier"),
soft_line_break_or_space(),
format_args![
text("Start"),
soft_line_break(),
&group(&soft_block_indent(&format_args![
text("1,"),
soft_line_break_or_space(),
text("2,"),
soft_line_break_or_space(),
text("3"),
])),
soft_line_break_or_space(),
&soft_block_indent(&format_args![
text("1,"),
soft_line_break_or_space(),
text("2,"),
soft_line_break_or_space(),
group(&format_args!(
text("A,"),
soft_line_break_or_space(),
text("B")
)),
soft_line_break_or_space(),
text("3")
]),
soft_line_break_or_space(),
text("End")
],
]
)
.unwrap();
let best_fitting_code = Formatted::new(
formatted_best_fitting.into_document(),
SimpleFormatContext::new(SimpleFormatOptions {
line_width: 30.try_into().unwrap(),
..SimpleFormatOptions::default()
}),
)
.print()
.expect("Document to be valid")
.as_code()
.to_string();
let normal_list_code = Formatted::new(
formatted_normal_list.into_document(),
SimpleFormatContext::new(SimpleFormatOptions {
line_width: 30.try_into().unwrap(),
..SimpleFormatOptions::default()
}),
)
.print()
.expect("Document to be valid")
.as_code()
.to_string();
// The variant that "fits" will print its contents as if it were a normal list
// outside of a BestFitting element.
assert_eq!(best_fitting_code, normal_list_code);
}
}

View file

@ -0,0 +1,21 @@
pub use crate::builders::*;
pub use crate::format_element::*;
pub use crate::format_extensions::{MemoizeFormat, Memoized};
pub use crate::formatter::Formatter;
pub use crate::printer::PrinterOptions;
pub use crate::trivia::{
format_dangling_comments, format_leading_comments, format_only_if_breaks, format_removed,
format_replaced, format_trailing_comments, format_trimmed_token,
};
pub use crate::diagnostics::FormatError;
pub use crate::format_element::document::Document;
pub use crate::format_element::tag::{LabelId, Tag, TagKind};
pub use crate::verbatim::{
format_bogus_node, format_or_verbatim, format_suppressed_node, format_verbatim_node,
};
pub use crate::{
best_fitting, dbg_write, format, format_args, write, Buffer as _, BufferExtensions, Format,
Format as _, FormatResult, FormatRule, FormatWithRule as _, SimpleFormatContext,
};

View file

@ -0,0 +1,81 @@
use indexmap::IndexSet;
use ruff_rowan::{Direction, Language, SyntaxNode, SyntaxToken, TextSize};
/// Tracks the ranges of the formatted (including replaced or tokens formatted as verbatim) tokens.
///
/// This implementation uses the fact that no two tokens can have an overlapping range to avoid the need for an interval tree.
/// Thus, testing if a token has already been formatted only requires testing if a token starting at the same offset has been formatted.
#[derive(Debug, Clone, Default)]
pub struct PrintedTokens {
/// Key: Start of a token's range
offsets: IndexSet<TextSize>,
disabled: bool,
}
#[derive(Copy, Clone)]
pub struct PrintedTokensSnapshot {
len: usize,
disabled: bool,
}
impl PrintedTokens {
/// Tracks a formatted token
///
/// ## Panics
/// If this token has been formatted before.
pub fn track_token<L: Language>(&mut self, token: &SyntaxToken<L>) {
if self.disabled {
return;
}
let range = token.text_trimmed_range();
if !self.offsets.insert(range.start()) {
panic!("You tried to print the token '{token:?}' twice, and this is not valid.");
}
}
/// Enables or disables the assertion tracking
pub(crate) fn set_disabled(&mut self, disabled: bool) {
self.disabled = disabled;
}
pub(crate) fn is_disabled(&self) -> bool {
self.disabled
}
pub(crate) fn snapshot(&self) -> PrintedTokensSnapshot {
PrintedTokensSnapshot {
len: self.offsets.len(),
disabled: self.disabled,
}
}
pub(crate) fn restore(&mut self, snapshot: PrintedTokensSnapshot) {
let PrintedTokensSnapshot { len, disabled } = snapshot;
self.offsets.truncate(len);
self.disabled = disabled
}
/// Asserts that all tokens of the passed in node have been tracked
///
/// ## Panics
/// If any descendant token of `root` hasn't been tracked
pub fn assert_all_tracked<L: Language>(&self, root: &SyntaxNode<L>) {
let mut offsets = self.offsets.clone();
for token in root.descendants_tokens(Direction::Next) {
if !offsets.remove(&token.text_trimmed_range().start()) {
panic!("token has not been seen by the formatter: {token:#?}.\
\nUse `format_replaced` if you want to replace a token from the formatted output.\
\nUse `format_removed` if you want to remove a token from the formatted output.\n\
parent: {:#?}", token.parent())
}
}
for offset in offsets {
panic!("tracked offset {offset:?} doesn't match any token of {root:#?}. Have you passed a token from another tree?");
}
}
}

View file

@ -0,0 +1,235 @@
use crate::format_element::tag::TagKind;
use crate::format_element::PrintMode;
use crate::printer::stack::{Stack, StackedStack};
use crate::printer::Indention;
use crate::{IndentStyle, InvalidDocumentError, PrintError, PrintResult};
use std::fmt::Debug;
use std::num::NonZeroU8;
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub(super) enum StackFrameKind {
Root,
Tag(TagKind),
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub(super) struct StackFrame {
kind: StackFrameKind,
args: PrintElementArgs,
}
/// Stores arguments passed to `print_element` call, holding the state specific to printing an element.
/// E.g. the `indent` depends on the token the Printer's currently processing. That's why
/// it must be stored outside of the [PrinterState] that stores the state common to all elements.
///
/// The state is passed by value, which is why it's important that it isn't storing any heavy
/// data structures. Such structures should be stored on the [PrinterState] instead.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(super) struct PrintElementArgs {
indent: Indention,
mode: PrintMode,
}
impl PrintElementArgs {
pub fn new(indent: Indention) -> Self {
Self {
indent,
..Self::default()
}
}
pub(super) fn mode(&self) -> PrintMode {
self.mode
}
pub(super) fn indention(&self) -> Indention {
self.indent
}
pub fn increment_indent_level(mut self, indent_style: IndentStyle) -> Self {
self.indent = self.indent.increment_level(indent_style);
self
}
pub fn decrement_indent(mut self) -> Self {
self.indent = self.indent.decrement();
self
}
pub fn reset_indent(mut self) -> Self {
self.indent = Indention::default();
self
}
pub fn set_indent_align(mut self, count: NonZeroU8) -> Self {
self.indent = self.indent.set_align(count);
self
}
pub fn with_print_mode(mut self, mode: PrintMode) -> Self {
self.mode = mode;
self
}
}
impl Default for PrintElementArgs {
fn default() -> Self {
Self {
indent: Indention::Level(0),
mode: PrintMode::Expanded,
}
}
}
/// Call stack that stores the [PrintElementCallArgs].
///
/// New [PrintElementCallArgs] are pushed onto the stack for every [`start`](Tag::is_start) [`Tag`](FormatElement::Tag)
/// and popped when reaching the corresponding [`end`](Tag::is_end) [`Tag`](FormatElement::Tag).
pub(super) trait CallStack {
type Stack: Stack<StackFrame> + Debug;
fn stack(&self) -> &Self::Stack;
fn stack_mut(&mut self) -> &mut Self::Stack;
/// Pops the call arguments at the top and asserts that they correspond to a start tag of `kind`.
///
/// Returns `Ok` with the arguments if the kind of the top stack frame matches `kind`, otherwise
/// returns `Err`.
fn pop(&mut self, kind: TagKind) -> PrintResult<PrintElementArgs> {
let last = self.stack_mut().pop();
match last {
Some(StackFrame {
kind: StackFrameKind::Tag(actual_kind),
args,
}) if actual_kind == kind => Ok(args),
// Start / End kind don't match
Some(StackFrame {
kind: StackFrameKind::Tag(expected_kind),
..
}) => Err(PrintError::InvalidDocument(Self::invalid_document_error(
kind,
Some(expected_kind),
))),
// Tried to pop the outer most stack frame, which is not valid
Some(
frame @ StackFrame {
kind: StackFrameKind::Root,
..
},
) => {
// Put it back in to guarantee that the stack is never empty
self.stack_mut().push(frame);
Err(PrintError::InvalidDocument(Self::invalid_document_error(
kind, None,
)))
}
// This should be unreachable but having it for completeness. Happens if the stack is empty.
None => Err(PrintError::InvalidDocument(Self::invalid_document_error(
kind, None,
))),
}
}
#[cold]
fn invalid_document_error(
end_kind: TagKind,
start_kind: Option<TagKind>,
) -> InvalidDocumentError {
match start_kind {
None => InvalidDocumentError::StartTagMissing { kind: end_kind },
Some(start_kind) => InvalidDocumentError::StartEndTagMismatch {
start_kind,
end_kind,
},
}
}
/// Returns the [PrintElementArgs] for the current stack frame.
fn top(&self) -> PrintElementArgs {
self.stack()
.top()
.expect("Expected `stack` to never be empty.")
.args
}
/// Returns the [TagKind] of the current stack frame or [None] if this is the root stack frame.
fn top_kind(&self) -> Option<TagKind> {
match self
.stack()
.top()
.expect("Expected `stack` to never be empty.")
.kind
{
StackFrameKind::Root => None,
StackFrameKind::Tag(kind) => Some(kind),
}
}
/// Creates a new stack frame for a [FormatElement::Tag] of `kind` with `args` as the call arguments.
fn push(&mut self, kind: TagKind, args: PrintElementArgs) {
self.stack_mut().push(StackFrame {
kind: StackFrameKind::Tag(kind),
args,
})
}
}
/// Call stack used for printing the [FormatElement]s
#[derive(Debug, Clone)]
pub(super) struct PrintCallStack(Vec<StackFrame>);
impl PrintCallStack {
pub(super) fn new(args: PrintElementArgs) -> Self {
Self(vec![StackFrame {
kind: StackFrameKind::Root,
args,
}])
}
}
impl CallStack for PrintCallStack {
type Stack = Vec<StackFrame>;
fn stack(&self) -> &Self::Stack {
&self.0
}
fn stack_mut(&mut self) -> &mut Self::Stack {
&mut self.0
}
}
/// Call stack used for measuring if some content fits on the line.
///
/// The stack is a view on top of the [PrintCallStack] because the stack frames are still necessary for printing.
#[must_use]
pub(super) struct FitsCallStack<'print> {
stack: StackedStack<'print, StackFrame>,
}
impl<'print> FitsCallStack<'print> {
pub(super) fn new(print: &'print PrintCallStack, saved: Vec<StackFrame>) -> Self {
let stack = StackedStack::with_vec(&print.0, saved);
Self { stack }
}
pub(super) fn finish(self) -> Vec<StackFrame> {
self.stack.into_vec()
}
}
impl<'a> CallStack for FitsCallStack<'a> {
type Stack = StackedStack<'a, StackFrame>;
fn stack(&self) -> &Self::Stack {
&self.stack
}
fn stack_mut(&mut self) -> &mut Self::Stack {
&mut self.stack
}
}

View file

@ -0,0 +1,42 @@
use crate::printer::call_stack::PrintElementArgs;
use crate::FormatElement;
/// Stores the queued line suffixes.
#[derive(Debug, Default)]
pub(super) struct LineSuffixes<'a> {
suffixes: Vec<LineSuffixEntry<'a>>,
}
impl<'a> LineSuffixes<'a> {
/// Extends the line suffixes with `elements`, storing their call stack arguments with them.
pub(super) fn extend<I>(&mut self, args: PrintElementArgs, elements: I)
where
I: IntoIterator<Item = &'a FormatElement>,
{
self.suffixes
.extend(elements.into_iter().map(LineSuffixEntry::Suffix));
self.suffixes.push(LineSuffixEntry::Args(args));
}
/// Takes all the pending line suffixes.
pub(super) fn take_pending<'l>(
&'l mut self,
) -> impl Iterator<Item = LineSuffixEntry<'a>> + DoubleEndedIterator + 'l + ExactSizeIterator
{
self.suffixes.drain(..)
}
/// Returns `true` if there are any line suffixes and `false` otherwise.
pub(super) fn has_pending(&self) -> bool {
!self.suffixes.is_empty()
}
}
#[derive(Debug, Copy, Clone)]
pub(super) enum LineSuffixEntry<'a> {
/// A line suffix to print
Suffix(&'a FormatElement),
/// Potentially changed call arguments that should be used to format any following items.
Args(PrintElementArgs),
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,115 @@
use crate::{FormatOptions, IndentStyle, LineWidth};
/// Options that affect how the [crate::Printer] prints the format tokens
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct PrinterOptions {
/// Width of a single tab character (does it equal 2, 4, ... spaces?)
pub tab_width: u8,
/// What's the max width of a line. Defaults to 80
pub print_width: PrintWidth,
/// The type of line ending to apply to the printed input
pub line_ending: LineEnding,
/// Whether the printer should use tabs or spaces to indent code and if spaces, by how many.
pub indent_style: IndentStyle,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct PrintWidth(u32);
impl PrintWidth {
pub fn new(width: u32) -> Self {
Self(width)
}
}
impl Default for PrintWidth {
fn default() -> Self {
LineWidth::default().into()
}
}
impl From<LineWidth> for PrintWidth {
fn from(width: LineWidth) -> Self {
Self(u16::from(width) as u32)
}
}
impl From<PrintWidth> for usize {
fn from(width: PrintWidth) -> Self {
width.0 as usize
}
}
impl<'a, O> From<&'a O> for PrinterOptions
where
O: FormatOptions,
{
fn from(options: &'a O) -> Self {
PrinterOptions::default()
.with_indent(options.indent_style())
.with_print_width(options.line_width().into())
}
}
impl PrinterOptions {
pub fn with_print_width(mut self, width: PrintWidth) -> Self {
self.print_width = width;
self
}
pub fn with_indent(mut self, style: IndentStyle) -> Self {
self.indent_style = style;
self
}
pub(crate) fn indent_style(&self) -> IndentStyle {
self.indent_style
}
/// Width of an indent in characters.
pub(super) const fn indent_width(&self) -> u8 {
match self.indent_style {
IndentStyle::Tab => self.tab_width,
IndentStyle::Space(count) => count,
}
}
}
#[allow(dead_code)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum LineEnding {
/// Line Feed only (\n), common on Linux and macOS as well as inside git repos
LineFeed,
/// Carriage Return + Line Feed characters (\r\n), common on Windows
CarriageReturnLineFeed,
/// Carriage Return character only (\r), used very rarely
CarriageReturn,
}
impl LineEnding {
#[inline]
pub const fn as_str(&self) -> &'static str {
match self {
LineEnding::LineFeed => "\n",
LineEnding::CarriageReturnLineFeed => "\r\n",
LineEnding::CarriageReturn => "\r",
}
}
}
impl Default for PrinterOptions {
fn default() -> Self {
PrinterOptions {
tab_width: 2,
print_width: PrintWidth::default(),
indent_style: Default::default(),
line_ending: LineEnding::LineFeed,
}
}
}

View file

@ -0,0 +1,404 @@
use crate::format_element::tag::TagKind;
use crate::prelude::Tag;
use crate::printer::stack::{Stack, StackedStack};
use crate::printer::{invalid_end_tag, invalid_start_tag};
use crate::{FormatElement, PrintResult};
use std::fmt::Debug;
use std::iter::FusedIterator;
use std::marker::PhantomData;
/// Queue of [FormatElement]s.
pub(super) trait Queue<'a> {
type Stack: Stack<&'a [FormatElement]>;
fn stack(&self) -> &Self::Stack;
fn stack_mut(&mut self) -> &mut Self::Stack;
fn next_index(&self) -> usize;
fn set_next_index(&mut self, index: usize);
/// Pops the element at the end of the queue.
fn pop(&mut self) -> Option<&'a FormatElement> {
match self.stack().top() {
Some(top_slice) => {
// SAFETY: Safe because queue ensures that slices inside `slices` are never empty.
let next_index = self.next_index();
let element = &top_slice[next_index];
if next_index + 1 == top_slice.len() {
self.stack_mut().pop().unwrap();
self.set_next_index(0);
} else {
self.set_next_index(next_index + 1);
}
Some(element)
}
None => None,
}
}
/// Returns the next element, not traversing into [FormatElement::Interned].
fn top_with_interned(&self) -> Option<&'a FormatElement> {
self.stack()
.top()
.map(|top_slice| &top_slice[self.next_index()])
}
/// Returns the next element, recursively resolving the first element of [FormatElement::Interned].
fn top(&self) -> Option<&'a FormatElement> {
let mut top = self.top_with_interned();
while let Some(FormatElement::Interned(interned)) = top {
top = interned.first()
}
top
}
/// Queues a single element to process before the other elements in this queue.
fn push(&mut self, element: &'a FormatElement) {
self.extend_back(std::slice::from_ref(element))
}
/// Queues a slice of elements to process before the other elements in this queue.
fn extend_back(&mut self, elements: &'a [FormatElement]) {
match elements {
[] => {
// Don't push empty slices
}
slice => {
let next_index = self.next_index();
let stack = self.stack_mut();
if let Some(top) = stack.pop() {
stack.push(&top[next_index..])
}
stack.push(slice);
self.set_next_index(0);
}
}
}
/// Removes top slice.
fn pop_slice(&mut self) -> Option<&'a [FormatElement]> {
self.set_next_index(0);
self.stack_mut().pop()
}
/// Skips all content until it finds the corresponding end tag with the given kind.
fn skip_content(&mut self, kind: TagKind)
where
Self: Sized,
{
let iter = self.iter_content(kind);
for _ in iter {
// consume whole iterator until end
}
}
/// Iterates over all elements until it finds the matching end tag of the specified kind.
fn iter_content<'q>(&'q mut self, kind: TagKind) -> QueueContentIterator<'a, 'q, Self>
where
Self: Sized,
{
QueueContentIterator::new(self, kind)
}
}
/// Queue with the elements to print.
#[derive(Debug, Default, Clone)]
pub(super) struct PrintQueue<'a> {
slices: Vec<&'a [FormatElement]>,
next_index: usize,
}
impl<'a> PrintQueue<'a> {
pub(super) fn new(slice: &'a [FormatElement]) -> Self {
let slices = match slice {
[] => Vec::default(),
slice => vec![slice],
};
Self {
slices,
next_index: 0,
}
}
pub(super) fn is_empty(&self) -> bool {
self.slices.is_empty()
}
}
impl<'a> Queue<'a> for PrintQueue<'a> {
type Stack = Vec<&'a [FormatElement]>;
fn stack(&self) -> &Self::Stack {
&self.slices
}
fn stack_mut(&mut self) -> &mut Self::Stack {
&mut self.slices
}
fn next_index(&self) -> usize {
self.next_index
}
fn set_next_index(&mut self, index: usize) {
self.next_index = index
}
}
/// Queue for measuring if an element fits on the line.
///
/// The queue is a view on top of the [PrintQueue] because no elements should be removed
/// from the [PrintQueue] while measuring.
#[must_use]
#[derive(Debug)]
pub(super) struct FitsQueue<'a, 'print> {
stack: StackedStack<'print, &'a [FormatElement]>,
next_index: usize,
}
impl<'a, 'print> FitsQueue<'a, 'print> {
pub(super) fn new(
print_queue: &'print PrintQueue<'a>,
saved: Vec<&'a [FormatElement]>,
) -> Self {
let stack = StackedStack::with_vec(&print_queue.slices, saved);
Self {
stack,
next_index: print_queue.next_index,
}
}
pub(super) fn finish(self) -> Vec<&'a [FormatElement]> {
self.stack.into_vec()
}
}
impl<'a, 'print> Queue<'a> for FitsQueue<'a, 'print> {
type Stack = StackedStack<'print, &'a [FormatElement]>;
fn stack(&self) -> &Self::Stack {
&self.stack
}
fn stack_mut(&mut self) -> &mut Self::Stack {
&mut self.stack
}
fn next_index(&self) -> usize {
self.next_index
}
fn set_next_index(&mut self, index: usize) {
self.next_index = index;
}
}
/// Iterator that calls [Queue::pop] until it reaches the end of the document.
///
/// The iterator traverses into the content of any [FormatElement::Interned].
pub(super) struct QueueIterator<'a, 'q, Q: Queue<'a>> {
queue: &'q mut Q,
lifetime: PhantomData<&'a ()>,
}
impl<'a, Q> Iterator for QueueIterator<'a, '_, Q>
where
Q: Queue<'a>,
{
type Item = &'a FormatElement;
fn next(&mut self) -> Option<Self::Item> {
self.queue.pop()
}
}
impl<'a, Q> FusedIterator for QueueIterator<'a, '_, Q> where Q: Queue<'a> {}
pub(super) struct QueueContentIterator<'a, 'q, Q: Queue<'a>> {
queue: &'q mut Q,
kind: TagKind,
depth: usize,
lifetime: PhantomData<&'a ()>,
}
impl<'a, 'q, Q> QueueContentIterator<'a, 'q, Q>
where
Q: Queue<'a>,
{
fn new(queue: &'q mut Q, kind: TagKind) -> Self {
Self {
queue,
kind,
depth: 1,
lifetime: PhantomData,
}
}
}
impl<'a, Q> Iterator for QueueContentIterator<'a, '_, Q>
where
Q: Queue<'a>,
{
type Item = &'a FormatElement;
fn next(&mut self) -> Option<Self::Item> {
match self.depth {
0 => None,
_ => {
let mut top = self.queue.pop();
while let Some(FormatElement::Interned(interned)) = top {
self.queue.extend_back(interned);
top = self.queue.pop();
}
match top.expect("Missing end signal.") {
element @ FormatElement::Tag(tag) if tag.kind() == self.kind => {
if tag.is_start() {
self.depth += 1;
} else {
self.depth -= 1;
if self.depth == 0 {
return None;
}
}
Some(element)
}
element => Some(element),
}
}
}
}
}
impl<'a, Q> FusedIterator for QueueContentIterator<'a, '_, Q> where Q: Queue<'a> {}
/// A predicate determining when to end measuring if some content fits on the line.
///
/// Called for every [`element`](FormatElement) in the [FitsQueue] when measuring if a content
/// fits on the line. The measuring of the content ends after the first element [`element`](FormatElement) for which this
/// predicate returns `true` (similar to a take while iterator except that it takes while the predicate returns `false`).
pub(super) trait FitsEndPredicate {
fn is_end(&mut self, element: &FormatElement) -> PrintResult<bool>;
}
/// Filter that includes all elements until it reaches the end of the document.
pub(super) struct AllPredicate;
impl FitsEndPredicate for AllPredicate {
fn is_end(&mut self, _element: &FormatElement) -> PrintResult<bool> {
Ok(false)
}
}
/// Filter that takes all elements between two matching [Tag::StartEntry] and [Tag::EndEntry] tags.
#[derive(Debug)]
pub(super) enum SingleEntryPredicate {
Entry { depth: usize },
Done,
}
impl SingleEntryPredicate {
pub(super) const fn is_done(&self) -> bool {
matches!(self, SingleEntryPredicate::Done)
}
}
impl Default for SingleEntryPredicate {
fn default() -> Self {
SingleEntryPredicate::Entry { depth: 0 }
}
}
impl FitsEndPredicate for SingleEntryPredicate {
fn is_end(&mut self, element: &FormatElement) -> PrintResult<bool> {
let result = match self {
SingleEntryPredicate::Done => true,
SingleEntryPredicate::Entry { depth } => match element {
FormatElement::Tag(Tag::StartEntry) => {
*depth += 1;
false
}
FormatElement::Tag(Tag::EndEntry) => {
if *depth == 0 {
return invalid_end_tag(TagKind::Entry, None);
}
*depth -= 1;
let is_end = *depth == 0;
if is_end {
*self = SingleEntryPredicate::Done;
}
is_end
}
FormatElement::Interned(_) => false,
element if *depth == 0 => {
return invalid_start_tag(TagKind::Entry, Some(element));
}
_ => false,
},
};
Ok(result)
}
}
#[cfg(test)]
mod tests {
use crate::format_element::LineMode;
use crate::prelude::Tag;
use crate::printer::queue::{PrintQueue, Queue};
use crate::FormatElement;
#[test]
fn extend_back_pop_last() {
let mut queue =
PrintQueue::new(&[FormatElement::Tag(Tag::StartEntry), FormatElement::Space]);
assert_eq!(queue.pop(), Some(&FormatElement::Tag(Tag::StartEntry)));
queue.extend_back(&[FormatElement::Line(LineMode::SoftOrSpace)]);
assert_eq!(
queue.pop(),
Some(&FormatElement::Line(LineMode::SoftOrSpace))
);
assert_eq!(queue.pop(), Some(&FormatElement::Space));
assert_eq!(queue.pop(), None);
}
#[test]
fn extend_back_empty_queue() {
let mut queue =
PrintQueue::new(&[FormatElement::Tag(Tag::StartEntry), FormatElement::Space]);
assert_eq!(queue.pop(), Some(&FormatElement::Tag(Tag::StartEntry)));
assert_eq!(queue.pop(), Some(&FormatElement::Space));
queue.extend_back(&[FormatElement::Line(LineMode::SoftOrSpace)]);
assert_eq!(
queue.pop(),
Some(&FormatElement::Line(LineMode::SoftOrSpace))
);
assert_eq!(queue.pop(), None);
}
}

View file

@ -0,0 +1,141 @@
/// A school book stack. Allows adding, removing, and inspecting elements at the back.
pub(super) trait Stack<T> {
/// Removes the last element if any and returns it
fn pop(&mut self) -> Option<T>;
/// Pushes a new element at the back
fn push(&mut self, value: T);
/// Returns the last element if any
fn top(&self) -> Option<&T>;
/// Returns `true` if the stack is empty
fn is_empty(&self) -> bool;
}
impl<T> Stack<T> for Vec<T> {
fn pop(&mut self) -> Option<T> {
self.pop()
}
fn push(&mut self, value: T) {
self.push(value)
}
fn top(&self) -> Option<&T> {
self.last()
}
fn is_empty(&self) -> bool {
self.is_empty()
}
}
/// A Stack that is stacked on top of another stack. Guarantees that the underlying stack remains unchanged.
#[derive(Debug, Clone)]
pub(super) struct StackedStack<'a, T> {
/// The content of the original stack.
original: &'a [T],
/// Items that have been pushed since the creation of this stack and aren't part of the `original` stack.
stack: Vec<T>,
}
impl<'a, T> StackedStack<'a, T> {
#[cfg(test)]
pub(super) fn new(original: &'a [T]) -> Self {
Self::with_vec(original, Vec::new())
}
/// Creates a new stack that uses `stack` for storing its elements.
pub(super) fn with_vec(original: &'a [T], stack: Vec<T>) -> Self {
Self { original, stack }
}
/// Returns the underlying `stack` vector.
pub(super) fn into_vec(self) -> Vec<T> {
self.stack
}
}
impl<T> Stack<T> for StackedStack<'_, T>
where
T: Copy,
{
fn pop(&mut self) -> Option<T> {
self.stack.pop().or_else(|| match self.original {
[rest @ .., last] => {
self.original = rest;
Some(*last)
}
_ => None,
})
}
fn push(&mut self, value: T) {
self.stack.push(value);
}
fn top(&self) -> Option<&T> {
self.stack.last().or_else(|| self.original.last())
}
fn is_empty(&self) -> bool {
self.original.is_empty() && self.stack.is_empty()
}
}
#[cfg(test)]
mod tests {
use crate::printer::stack::{Stack, StackedStack};
#[test]
fn restore_consumed_stack() {
let original = vec![1, 2, 3];
let mut restorable = StackedStack::new(&original);
restorable.push(4);
assert_eq!(restorable.pop(), Some(4));
assert_eq!(restorable.pop(), Some(3));
assert_eq!(restorable.pop(), Some(2));
assert_eq!(restorable.pop(), Some(1));
assert_eq!(restorable.pop(), None);
assert_eq!(original, vec![1, 2, 3]);
}
#[test]
fn restore_partially_consumed_stack() {
let original = vec![1, 2, 3];
let mut restorable = StackedStack::new(&original);
restorable.push(4);
assert_eq!(restorable.pop(), Some(4));
assert_eq!(restorable.pop(), Some(3));
assert_eq!(restorable.pop(), Some(2));
restorable.push(5);
restorable.push(6);
restorable.push(7);
assert_eq!(original, vec![1, 2, 3]);
}
#[test]
fn restore_stack() {
let original = vec![1, 2, 3];
let mut restorable = StackedStack::new(&original);
restorable.push(4);
restorable.push(5);
restorable.push(6);
restorable.push(7);
assert_eq!(restorable.pop(), Some(7));
assert_eq!(restorable.pop(), Some(6));
assert_eq!(restorable.pop(), Some(5));
assert_eq!(original, vec![1, 2, 3]);
}
}

View file

@ -0,0 +1,229 @@
use crate::prelude::*;
use crate::{write, CstFormatContext, GroupId};
use ruff_rowan::{AstNode, AstSeparatedElement, SyntaxResult, SyntaxToken};
pub trait FormatSeparatedElementRule<N>
where
N: AstNode,
{
type Context;
type FormatNode<'a>: Format<Self::Context>
where
N: 'a;
type FormatSeparator<'a>: Format<Self::Context>
where
N: 'a;
fn format_node<'a>(&self, node: &'a N) -> Self::FormatNode<'a>;
fn format_separator<'a>(
&self,
separator: &'a SyntaxToken<N::Language>,
) -> Self::FormatSeparator<'a>;
}
/// Formats a single element inside a separated list.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct FormatSeparatedElement<N, R>
where
N: AstNode,
R: FormatSeparatedElementRule<N>,
{
element: AstSeparatedElement<N::Language, N>,
rule: R,
is_last: bool,
/// The separator to write if the element has no separator yet.
separator: &'static str,
options: FormatSeparatedOptions,
}
impl<N, R> FormatSeparatedElement<N, R>
where
N: AstNode,
R: FormatSeparatedElementRule<N>,
{
/// Returns the node belonging to the element.
pub fn node(&self) -> SyntaxResult<&N> {
self.element.node()
}
}
impl<N, R, C> Format<C> for FormatSeparatedElement<N, R>
where
N: AstNode,
N::Language: 'static,
R: FormatSeparatedElementRule<N, Context = C>,
C: CstFormatContext<Language = N::Language>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
let node = self.element.node()?;
let separator = self.element.trailing_separator()?;
let format_node = self.rule.format_node(node);
if !self.options.nodes_grouped {
format_node.fmt(f)?;
} else {
group(&format_node).fmt(f)?;
}
// Reuse the existing trailing separator or create it if it wasn't in the
// input source. Only print the last trailing token if the outer group breaks
if let Some(separator) = separator {
let format_separator = self.rule.format_separator(separator);
if self.is_last {
match self.options.trailing_separator {
TrailingSeparator::Allowed => {
// Use format_replaced instead of wrapping the result of format_token
// in order to remove only the token itself when the group doesn't break
// but still print its associated trivia unconditionally
format_only_if_breaks(separator, &format_separator)
.with_group_id(self.options.group_id)
.fmt(f)?;
}
TrailingSeparator::Mandatory => {
write!(f, [format_separator])?;
}
TrailingSeparator::Disallowed => {
// A trailing separator was present where it wasn't allowed, opt out of formatting
return Err(FormatError::SyntaxError);
}
TrailingSeparator::Omit => {
write!(f, [format_removed(separator)])?;
}
}
} else {
write!(f, [format_separator])?;
}
} else if self.is_last {
match self.options.trailing_separator {
TrailingSeparator::Allowed => {
write!(
f,
[if_group_breaks(&text(self.separator))
.with_group_id(self.options.group_id)]
)?;
}
TrailingSeparator::Mandatory => {
text(self.separator).fmt(f)?;
}
TrailingSeparator::Omit | TrailingSeparator::Disallowed => { /* no op */ }
}
} else {
unreachable!(
"This is a syntax error, separator must be present between every two elements"
);
};
Ok(())
}
}
/// Iterator for formatting separated elements. Prints the separator between each element and
/// inserts a trailing separator if necessary
pub struct FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
{
next: Option<AstSeparatedElement<Node::Language, Node>>,
rule: Rule,
inner: I,
separator: &'static str,
options: FormatSeparatedOptions,
}
impl<I, Node, Rule> FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
{
pub fn new(inner: I, separator: &'static str, rule: Rule) -> Self {
Self {
inner,
rule,
separator,
next: None,
options: FormatSeparatedOptions::default(),
}
}
/// Wraps every node inside of a group
pub fn nodes_grouped(mut self) -> Self {
self.options.nodes_grouped = true;
self
}
pub fn with_trailing_separator(mut self, separator: TrailingSeparator) -> Self {
self.options.trailing_separator = separator;
self
}
#[allow(unused)]
pub fn with_group_id(mut self, group_id: Option<GroupId>) -> Self {
self.options.group_id = group_id;
self
}
}
impl<I, Node, Rule> Iterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>>,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
type Item = FormatSeparatedElement<Node, Rule>;
fn next(&mut self) -> Option<Self::Item> {
let element = self.next.take().or_else(|| self.inner.next())?;
self.next = self.inner.next();
let is_last = self.next.is_none();
Some(FormatSeparatedElement {
element,
rule: self.rule.clone(),
is_last,
separator: self.separator,
options: self.options,
})
}
}
impl<I, Node, Rule> std::iter::FusedIterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>> + std::iter::FusedIterator,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
}
impl<I, Node, Rule> std::iter::ExactSizeIterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>> + ExactSizeIterator,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum TrailingSeparator {
/// A trailing separator is allowed and preferred
#[default]
Allowed,
/// A trailing separator is not allowed
Disallowed,
/// A trailing separator is mandatory for the syntax to be correct
Mandatory,
/// A trailing separator might be present, but the consumer
/// decides to remove it
Omit,
}
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub struct FormatSeparatedOptions {
trailing_separator: TrailingSeparator,
group_id: Option<GroupId>,
nodes_grouped: bool,
}

View file

@ -0,0 +1,770 @@
use crate::{Printed, SourceMarker, TextRange};
use ruff_rowan::TextLen;
use ruff_rowan::{Language, SyntaxNode, TextSize};
use rustc_hash::FxHashMap;
use std::cmp::Ordering;
use std::iter::FusedIterator;
/// A source map for mapping positions of a pre-processed tree back to the locations in the source tree.
///
/// This is not a generic purpose source map but instead focused on supporting the case where
/// a language removes or re-orders nodes that would otherwise complicate the formatting logic.
/// A common use case for pre-processing is the removal of all parenthesized nodes.
/// Removing parenthesized nodes simplifies the formatting logic when it has different behaviour
/// depending if a child or parent is of a specific node kind. Performing such a test with parenthesized
/// nodes present in the source code means that the formatting logic has to skip over all parenthesized nodes
/// until it finds the first non-parenthesized node and then test if that node is of the expected kind.
///
/// This source map implementation supports removing tokens or re-structuring nodes
/// without changing the order of the tokens in the tree (requires no source map).
///
/// The following section uses parentheses as a concrete example to explain the functionality of the source map.
/// However, the source map implementation isn't restricted to removing parentheses only, it supports mapping
/// transformed to source position for any use case where a transform deletes text from the source tree.
///
/// ## Position Mapping
///
/// The source map internally tracks all the ranges that have been deleted from the source code sorted by the start of the deleted range.
/// It further stores the absolute count of deleted bytes preceding a range. The deleted range together
/// with the absolute count allows to re-compute the source location for every transformed location
/// and has the benefit that it requires significantly fewer memory
/// than source maps that use a source to destination position marker for every token.
///
/// ## Map Node Ranges
///
/// Only having the deleted ranges to resolve the original text of a node isn't sufficient.
/// Resolving the original text of a node is needed when formatting a node as verbatim, either because
/// formatting the node failed because of a syntax error, or formatting is suppressed with a `rome-ignore format:` comment.
///
/// ```text
/// // Source // Transformed
/// (a+b) + (c + d) a + b + c + d;
/// ```
///
/// Using the above example, the following source ranges should be returned when querying with the transformed ranges:
///
/// * `a` -> `a`: Should not include the leading `(`
/// * `b` -> `b`: Should not include the trailing `)`
/// * `a + b` -> `(a + b)`: Should include the leading `(` and trailing `)`.
/// * `a + b + c + d` -> `(a + b) + (c + d)`: Should include the fist `(` token and the last `)` token because the expression statement
/// fully encloses the `a + b` and `c + d` nodes.
///
/// This is why the source map also tracks the mapped trimmed ranges for every node.
#[derive(Debug, Clone)]
pub struct TransformSourceMap {
source_text: String,
/// The mappings stored in increasing order
deleted_ranges: Vec<DeletedRange>,
/// Key: Start or end position of node for which the trimmed range should be extended
/// Value: The trimmed range.
mapped_node_ranges: FxHashMap<TextSize, TrimmedNodeRangeMapping>,
}
impl TransformSourceMap {
/// Returns the text of the source document as it was before the transformation.
pub fn text(&self) -> &str {
&self.source_text
}
/// Maps a range of the transformed document to a range in the source document.
///
/// Complexity: `O(log(n))`
pub fn source_range(&self, transformed_range: TextRange) -> TextRange {
let range = TextRange::new(
self.source_offset(transformed_range.start(), RangePosition::Start),
self.source_offset(transformed_range.end(), RangePosition::End),
);
debug_assert!(range.end() <= self.source_text.text_len(), "Mapped range {:?} exceeds the length of the source document {:?}. Please check if the passed `transformed_range` is a range of the transformed tree and not of the source tree, and that it belongs to the tree for which the source map was created for.", range, self.source_text.len());
range
}
/// Maps the trimmed range of the transformed node to the trimmed range in the source document.
///
/// Average Complexity: `O(log(n))`
pub fn trimmed_source_range<L: Language>(&self, node: &SyntaxNode<L>) -> TextRange {
self.trimmed_source_range_from_transformed_range(node.text_trimmed_range())
}
fn resolve_trimmed_range(&self, mut source_range: TextRange) -> TextRange {
let start_mapping = self.mapped_node_ranges.get(&source_range.start());
if let Some(mapping) = start_mapping {
// If the queried node fully encloses the original range of the node, then extend the range
if source_range.contains_range(mapping.original_range) {
source_range = TextRange::new(mapping.extended_range.start(), source_range.end());
}
}
let end_mapping = self.mapped_node_ranges.get(&source_range.end());
if let Some(mapping) = end_mapping {
// If the queried node fully encloses the original range of the node, then extend the range
if source_range.contains_range(mapping.original_range) {
source_range = TextRange::new(source_range.start(), mapping.extended_range.end());
}
}
source_range
}
fn trimmed_source_range_from_transformed_range(
&self,
transformed_range: TextRange,
) -> TextRange {
let source_range = self.source_range(transformed_range);
let mut mapped_range = source_range;
loop {
let resolved = self.resolve_trimmed_range(mapped_range);
if resolved == mapped_range {
break resolved;
} else {
mapped_range = resolved;
}
}
}
/// Returns the source text of the trimmed range of `node`.
pub fn trimmed_source_text<L: Language>(&self, node: &SyntaxNode<L>) -> &str {
let range = self.trimmed_source_range(node);
&self.source_text[range]
}
/// Returns an iterator over all deleted ranges in increasing order by their start position.
pub fn deleted_ranges(&self) -> DeletedRanges {
DeletedRanges {
source_text: &self.source_text,
deleted_ranges: self.deleted_ranges.iter(),
}
}
#[cfg(test)]
fn trimmed_source_text_from_transformed_range(&self, range: TextRange) -> &str {
let range = self.trimmed_source_range_from_transformed_range(range);
&self.source_text[range]
}
fn source_offset(&self, transformed_offset: TextSize, position: RangePosition) -> TextSize {
let index = self
.deleted_ranges
.binary_search_by_key(&transformed_offset, |range| range.transformed_start());
let range = match index {
Ok(index) => Some(&self.deleted_ranges[index]),
Err(index) => {
if index == 0 {
None
} else {
self.deleted_ranges.get(index - 1)
}
}
};
self.source_offset_with_range(transformed_offset, position, range)
}
fn source_offset_with_range(
&self,
transformed_offset: TextSize,
position: RangePosition,
deleted_range: Option<&DeletedRange>,
) -> TextSize {
match deleted_range {
Some(range) => {
debug_assert!(
range.transformed_start() <= transformed_offset,
"Transformed start {:?} must be less than or equal to transformed offset {:?}.",
range.transformed_start(),
transformed_offset
);
// Transformed position directly falls onto a position where a deleted range starts or ends (depending on the position)
// For example when querying: `a` in `(a)` or (a + b)`, or `b`
if range.transformed_start() == transformed_offset {
match position {
RangePosition::Start => range.source_end(),
// `a)`, deleted range is right after the token. That's why `source_start` is the offset
// that truncates the `)` and `source_end` includes it
RangePosition::End => range.source_start(),
}
}
// The position falls outside of a position that has a leading/trailing deleted range.
// For example, if you get the position of `+` in `(a + b)`.
// That means, the trimmed and non-trimmed offsets are the same
else {
let transformed_delta = transformed_offset - range.transformed_start();
range.source_start() + range.len() + transformed_delta
}
}
None => transformed_offset,
}
}
/// Maps the source code positions relative to the transformed tree of `printed` to the location
/// in the original, untransformed source code.
///
/// The printer creates a source map that allows mapping positions from the newly formatted document
/// back to the locations of the tree. However, the source positions stored in [crate::FormatElement::DynamicText]
/// and [crate::FormatElement::SyntaxTokenTextSlice] are relative to the transformed tree
/// and not the original tree passed to [crate::format_node].
///
/// This function re-maps the positions from the positions in the transformed tree back to the positions
/// in the original, untransformed tree.
pub fn map_printed(&self, mut printed: Printed) -> Printed {
self.map_markers(&mut printed.sourcemap);
printed
}
/// Maps the printers source map marker to the source positions.
fn map_markers(&self, markers: &mut [SourceMarker]) {
if self.deleted_ranges.is_empty() {
return;
}
let mut previous_marker: Option<SourceMarker> = None;
let mut next_range_index = 0;
for marker in markers {
// It's not guaranteed that markers are sorted by source location (line suffix comments).
// It can, therefore, be necessary to navigate backwards again.
// In this case, do a binary search for the index of the next deleted range (`O(log(n)`).
let out_of_order_marker =
previous_marker.map_or(false, |previous| previous.source > marker.source);
if out_of_order_marker {
let index = self
.deleted_ranges
.binary_search_by_key(&marker.source, |range| range.transformed_start());
match index {
// Direct match
Ok(index) => {
next_range_index = index + 1;
}
Err(index) => next_range_index = index,
}
} else {
// Find the range for this mapping. In most cases this is a no-op or only involves a single step
// because markers are most of the time in increasing source order.
while next_range_index < self.deleted_ranges.len() {
let next_range = &self.deleted_ranges[next_range_index];
if next_range.transformed_start() > marker.source {
break;
}
next_range_index += 1;
}
}
previous_marker = Some(*marker);
let current_range = if next_range_index == 0 {
None
} else {
self.deleted_ranges.get(next_range_index - 1)
};
let source =
self.source_offset_with_range(marker.source, RangePosition::Start, current_range);
marker.source = source;
}
}
}
#[derive(Debug, Copy, Clone)]
struct TrimmedNodeRangeMapping {
/// The original trimmed range of the node.
///
/// ```javascript
/// (a + b)
/// ```
///
/// `1..6` `a + b`
original_range: TextRange,
/// The range to which the trimmed range of the node should be extended
/// ```javascript
/// (a + b)
/// ```
///
/// `0..7` for `a + b` if its range should also include the parenthesized range.
extended_range: TextRange,
}
#[derive(Copy, Clone, Debug)]
enum RangePosition {
Start,
End,
}
/// Stores the information about a range in the source document that isn't present in the transformed document
/// and provides means to map the transformed position back to the source position.
///
/// # Examples
///
/// ```javascript
/// (a + b)
/// ```
///
/// A transform that removes the parentheses from the above expression removes the ranges `0..1` (`(` token)
/// and `6..7` (`)` token) and the source map creates one [DeletedRange] for each:
///
/// ```text
/// DeletedRange {
/// source_range: 0..1,
/// total_length_preceding_deleted_ranges: 0,
/// },
/// DeletedRange {
/// source_range: 6..7,
/// total_length_preceding_deleted_ranges: 1,
/// }
/// ```
///
/// The first range indicates that the range `0..1` for the `(` token has been removed. The second range
/// indicates that the range `6..7` for the `)` token has been removed and it stores that, up to this point,
/// but not including, 1 more byte has been removed.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct DeletedRange {
/// The range in the source document of the bytes that have been omitted from the transformed document.
source_range: TextRange,
/// The accumulated count of all removed bytes up to (but not including) the start of this range.
total_length_preceding_deleted_ranges: TextSize,
}
impl DeletedRange {
fn new(source_range: TextRange, total_length_preceding_deleted_ranges: TextSize) -> Self {
debug_assert!(source_range.start() >= total_length_preceding_deleted_ranges, "The total number of deleted bytes ({:?}) can not exceed the offset from the start in the source document ({:?}). This is a bug in the source map implementation.", total_length_preceding_deleted_ranges, source_range.start());
Self {
source_range,
total_length_preceding_deleted_ranges,
}
}
/// The number of deleted characters starting from [source offset](DeletedRange::source_start).
fn len(&self) -> TextSize {
self.source_range.len()
}
/// The start position in bytes in the source document of the omitted sequence in the transformed document.
fn source_start(&self) -> TextSize {
self.source_range.start()
}
/// The end position in bytes in the source document of the omitted sequence in the transformed document.
fn source_end(&self) -> TextSize {
self.source_range.end()
}
/// Returns the byte position of [DeleteRange::source_start] in the transformed document.
fn transformed_start(&self) -> TextSize {
self.source_range.start() - self.total_length_preceding_deleted_ranges
}
}
/// Builder for creating a source map.
#[derive(Debug, Default)]
pub struct TransformSourceMapBuilder {
/// The original source text of the tree before it was transformed.
source_text: String,
/// The mappings in increasing order by transformed offset.
deleted_ranges: Vec<TextRange>,
/// The keys are a position in the source map where a trimmed node starts or ends.
/// The values are the metadata about a trimmed node range
mapped_node_ranges: FxHashMap<TextSize, TrimmedNodeRangeMapping>,
}
impl TransformSourceMapBuilder {
/// Creates a new builder.
pub fn new() -> Self {
Self {
..Default::default()
}
}
/// Creates a new builder for a document with the given source.
pub fn with_source(source: String) -> Self {
Self {
source_text: source,
..Default::default()
}
}
/// Appends `text` to the source text of the original document.
pub fn push_source_text(&mut self, text: &str) {
self.source_text.push_str(text);
}
/// Adds a new mapping for a deleted character range.
pub fn add_deleted_range(&mut self, source_range: TextRange) {
self.deleted_ranges.push(source_range);
}
/// Adds a mapping to widen a nodes trimmed range.
///
/// The formatter uses the trimmed range when formatting a node in verbatim either because the node
/// failed to format because of a syntax error or because it's formatting is suppressed with a `rome-ignore format:` comment.
///
/// This method adds a mapping to widen a nodes trimmed range to enclose another range instead. This is
/// e.g. useful when removing parentheses around expressions where `(/* comment */ a /* comment */)` because
/// the trimmed range of `a` should now enclose the full range including the `(` and `)` tokens to ensure
/// that the parentheses are retained when printing that node in verbatim style.
pub fn extend_trimmed_node_range(
&mut self,
original_range: TextRange,
extended_range: TextRange,
) {
let mapping = TrimmedNodeRangeMapping {
original_range,
extended_range,
};
self.mapped_node_ranges
.insert(original_range.start(), mapping);
self.mapped_node_ranges
.insert(original_range.end(), mapping);
}
/// Creates a source map that performs single position lookups in `O(log(n))`.
pub fn finish(mut self) -> TransformSourceMap {
let mut merged_mappings = Vec::with_capacity(self.deleted_ranges.len());
if !self.deleted_ranges.is_empty() {
self.deleted_ranges
.sort_by(|a, b| match a.start().cmp(&b.start()) {
Ordering::Equal => a.end().cmp(&b.end()),
ordering => ordering,
});
let mut last_mapping = DeletedRange::new(
// SAFETY: Safe because of the not empty check above
self.deleted_ranges[0],
TextSize::default(),
);
let mut transformed_offset = last_mapping.len();
for range in self.deleted_ranges.drain(1..) {
// Merge adjacent ranges to ensure there's only ever a single mapping starting at the same transformed offset.
if last_mapping.source_range.end() == range.start() {
last_mapping.source_range = last_mapping.source_range.cover(range);
} else {
merged_mappings.push(last_mapping);
last_mapping = DeletedRange::new(range, transformed_offset);
}
transformed_offset += range.len();
}
merged_mappings.push(last_mapping);
}
TransformSourceMap {
source_text: self.source_text,
deleted_ranges: merged_mappings,
mapped_node_ranges: self.mapped_node_ranges,
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct DeletedRangeEntry<'a> {
/// The start position of the removed range in the source document
pub source: TextSize,
/// The position in the transformed document where the removed range would have been (but is not, because it was removed)
pub transformed: TextSize,
/// The text of the removed range
pub text: &'a str,
}
/// Iterator over all removed ranges in a document.
///
/// Returns the ranges in increased order by their start position.
pub struct DeletedRanges<'a> {
source_text: &'a str,
/// The mappings stored in increasing order
deleted_ranges: std::slice::Iter<'a, DeletedRange>,
}
impl<'a> Iterator for DeletedRanges<'a> {
type Item = DeletedRangeEntry<'a>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.deleted_ranges.next()?;
Some(DeletedRangeEntry {
source: next.source_range.start(),
transformed: next.transformed_start(),
text: &self.source_text[next.source_range],
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.deleted_ranges.size_hint()
}
fn last(self) -> Option<Self::Item>
where
Self: Sized,
{
let last = self.deleted_ranges.last()?;
Some(DeletedRangeEntry {
source: last.source_range.start(),
transformed: last.transformed_start(),
text: &self.source_text[last.source_range],
})
}
}
impl DoubleEndedIterator for DeletedRanges<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let back = self.deleted_ranges.next_back()?;
Some(DeletedRangeEntry {
source: back.source_range.start(),
transformed: back.transformed_start(),
text: &self.source_text[back.source_range],
})
}
}
impl FusedIterator for DeletedRanges<'_> {}
impl ExactSizeIterator for DeletedRanges<'_> {}
#[cfg(test)]
mod tests {
use crate::source_map::DeletedRangeEntry;
use crate::{TextRange, TextSize, TransformSourceMapBuilder};
use ruff_rowan::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
#[test]
fn range_mapping() {
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
// The shape of the tree doesn't matter for the test case
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(a + (((b + c)) + d)) + e");
cst_builder.finish_node();
let root = cst_builder.finish();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
// `(`
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(1)));
// `(((`
builder.add_deleted_range(TextRange::new(TextSize::from(5), TextSize::from(6)));
// Ranges can be added out of order
builder.add_deleted_range(TextRange::new(TextSize::from(7), TextSize::from(8)));
builder.add_deleted_range(TextRange::new(TextSize::from(6), TextSize::from(7)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(13), TextSize::from(14)));
builder.add_deleted_range(TextRange::new(TextSize::from(14), TextSize::from(15)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(19), TextSize::from(20)));
builder.add_deleted_range(TextRange::new(TextSize::from(20), TextSize::from(21)));
let source_map = builder.finish();
// The following mapping assume the transformed string to be (including whitespace):
// "a + b + c + d + e";
// `a`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(0), TextSize::from(1))),
TextRange::new(TextSize::from(1), TextSize::from(2))
);
// `b`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(4), TextSize::from(5))),
TextRange::new(TextSize::from(8), TextSize::from(9))
);
// `c`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(8), TextSize::from(9))),
TextRange::new(TextSize::from(12), TextSize::from(13))
);
// `d`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(12), TextSize::from(13))),
TextRange::new(TextSize::from(18), TextSize::from(19))
);
// `e`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(16), TextSize::from(17))),
TextRange::new(TextSize::from(24), TextSize::from(25))
);
}
#[test]
fn trimmed_range() {
// Build up a tree for `((a))`
// Don't mind the unknown nodes, it doesn't really matter what the nodes are.
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
cst_builder.start_node(RawLanguageKind::BOGUS);
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(");
cst_builder.start_node(RawLanguageKind::BOGUS);
cst_builder.token(RawLanguageKind::BOGUS, "(");
cst_builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
cst_builder.token(RawLanguageKind::STRING_TOKEN, "a");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ")");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ")");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ";");
cst_builder.finish_node();
let root = cst_builder.finish();
assert_eq!(&root.text(), "((a));");
let mut bogus = root
.descendants()
.filter(|node| node.kind() == RawLanguageKind::BOGUS);
// `((a))`
let outer = bogus.next().unwrap();
// `(a)`
let inner = bogus.next().unwrap();
// `a`
let expression = root
.descendants()
.find(|node| node.kind() == RawLanguageKind::LITERAL_EXPRESSION)
.unwrap();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(2)));
builder.add_deleted_range(TextRange::new(TextSize::from(3), TextSize::from(5)));
// Extend `a` to the range of `(a)`
builder
.extend_trimmed_node_range(expression.text_trimmed_range(), inner.text_trimmed_range());
// Extend `(a)` to the range of `((a))`
builder.extend_trimmed_node_range(inner.text_trimmed_range(), outer.text_trimmed_range());
let source_map = builder.finish();
// Query `a`
assert_eq!(
source_map.trimmed_source_text_from_transformed_range(TextRange::new(
TextSize::from(0),
TextSize::from(1)
)),
"((a))"
);
// Query `a;` expression
assert_eq!(
source_map.trimmed_source_text_from_transformed_range(TextRange::new(
TextSize::from(0),
TextSize::from(2)
)),
"((a));"
);
}
#[test]
fn deleted_ranges() {
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
// The shape of the tree doesn't matter for the test case
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(a + (((b + c)) + d)) + e");
cst_builder.finish_node();
let root = cst_builder.finish();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
// `(`
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(1)));
// `(((`
builder.add_deleted_range(TextRange::new(TextSize::from(5), TextSize::from(6)));
// Ranges can be added out of order
builder.add_deleted_range(TextRange::new(TextSize::from(7), TextSize::from(8)));
builder.add_deleted_range(TextRange::new(TextSize::from(6), TextSize::from(7)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(13), TextSize::from(14)));
builder.add_deleted_range(TextRange::new(TextSize::from(14), TextSize::from(15)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(19), TextSize::from(20)));
builder.add_deleted_range(TextRange::new(TextSize::from(20), TextSize::from(21)));
let source_map = builder.finish();
let deleted_ranges = source_map.deleted_ranges().collect::<Vec<_>>();
assert_eq!(
deleted_ranges,
vec![
DeletedRangeEntry {
source: TextSize::from(0),
transformed: TextSize::from(0),
text: "("
},
DeletedRangeEntry {
source: TextSize::from(5),
transformed: TextSize::from(4),
text: "((("
},
DeletedRangeEntry {
source: TextSize::from(13),
transformed: TextSize::from(9),
text: "))"
},
DeletedRangeEntry {
source: TextSize::from(19),
transformed: TextSize::from(13),
text: "))"
},
]
);
assert_eq!(
source_map.deleted_ranges().last(),
Some(DeletedRangeEntry {
source: TextSize::from(19),
transformed: TextSize::from(13),
text: "))"
})
);
}
}

View file

@ -0,0 +1,2 @@
pub mod number;
pub mod string;

View file

@ -0,0 +1,296 @@
use crate::token::string::ToAsciiLowercaseCow;
use ruff_rowan::{Language, SyntaxToken};
use std::borrow::Cow;
use std::num::NonZeroUsize;
use crate::prelude::*;
use crate::{CstFormatContext, Format};
pub fn format_number_token<L>(token: &SyntaxToken<L>) -> CleanedNumberLiteralText<L>
where
L: Language,
{
CleanedNumberLiteralText { token }
}
pub struct CleanedNumberLiteralText<'token, L>
where
L: Language,
{
token: &'token SyntaxToken<L>,
}
impl<L, C> Format<C> for CleanedNumberLiteralText<'_, L>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
format_replaced(
self.token,
&syntax_token_cow_slice(
format_trimmed_number(self.token.text_trimmed()),
self.token,
self.token.text_trimmed_range().start(),
),
)
.fmt(f)
}
}
enum FormatNumberLiteralState {
IntegerPart,
DecimalPart(FormatNumberLiteralDecimalPart),
Exponent(FormatNumberLiteralExponent),
}
struct FormatNumberLiteralDecimalPart {
dot_index: usize,
last_non_zero_index: Option<NonZeroUsize>,
}
struct FormatNumberLiteralExponent {
e_index: usize,
is_negative: bool,
first_digit_index: Option<NonZeroUsize>,
first_non_zero_index: Option<NonZeroUsize>,
}
// Regex-free version of https://github.com/prettier/prettier/blob/ca246afacee8e6d5db508dae01730c9523bbff1d/src/common/util.js#L341-L356
fn format_trimmed_number(text: &str) -> Cow<str> {
use FormatNumberLiteralState::*;
let text = text.to_ascii_lowercase_cow();
let mut copied_or_ignored_chars = 0usize;
let mut iter = text.chars().enumerate();
let mut curr = iter.next();
let mut state = IntegerPart;
// Will be filled only if and when the first place that needs reformatting is detected.
let mut cleaned_text = String::new();
// Look at only the start of the text, ignore any sign, and make sure numbers always start with a digit. Add 0 if missing.
if let Some((_, '+' | '-')) = curr {
curr = iter.next();
}
if let Some((curr_index, '.')) = curr {
cleaned_text.push_str(&text[copied_or_ignored_chars..curr_index]);
copied_or_ignored_chars = curr_index;
cleaned_text.push('0');
}
// Loop over the rest of the text, applying the remaining rules.
loop {
// We use a None pseudo-char at the end of the string to simplify the match cases that follow
let curr_or_none_terminator_char = match curr {
Some((curr_index, curr_char)) => (curr_index, Some(curr_char)),
None => (text.len(), None),
};
// Look for termination of the decimal part or exponent and see if we need to print it differently.
match (&state, curr_or_none_terminator_char) {
(
DecimalPart(FormatNumberLiteralDecimalPart {
dot_index,
last_non_zero_index: None,
}),
(curr_index, Some('e') | None),
) => {
// The decimal part equals zero, ignore it completely.
// Caveat: Prettier still prints a single `.0` unless there was *only* a trailing dot.
if curr_index > dot_index + 1 {
cleaned_text.push_str(&text[copied_or_ignored_chars..=*dot_index]);
cleaned_text.push('0');
} else {
cleaned_text.push_str(&text[copied_or_ignored_chars..*dot_index]);
}
copied_or_ignored_chars = curr_index;
}
(
DecimalPart(FormatNumberLiteralDecimalPart {
last_non_zero_index: Some(last_non_zero_index),
..
}),
(curr_index, Some('e') | None),
) if last_non_zero_index.get() < curr_index - 1 => {
// The decimal part ends with at least one zero, ignore them but copy the part from the dot until the last non-zero.
cleaned_text.push_str(&text[copied_or_ignored_chars..=last_non_zero_index.get()]);
copied_or_ignored_chars = curr_index;
}
(
Exponent(FormatNumberLiteralExponent {
e_index,
first_non_zero_index: None,
..
}),
(curr_index, None),
) => {
// The exponent equals zero, ignore it completely.
cleaned_text.push_str(&text[copied_or_ignored_chars..*e_index]);
copied_or_ignored_chars = curr_index;
}
(
Exponent(FormatNumberLiteralExponent {
e_index,
is_negative,
first_digit_index: Some(first_digit_index),
first_non_zero_index: Some(first_non_zero_index),
}),
(curr_index, None),
) if (first_digit_index.get() > e_index + 1 && !is_negative)
|| (first_non_zero_index.get() > first_digit_index.get()) =>
{
// The exponent begins with a plus or at least one zero, ignore them but copy the part from the first non-zero until the end.
cleaned_text.push_str(&text[copied_or_ignored_chars..=*e_index]);
if *is_negative {
cleaned_text.push('-');
}
cleaned_text.push_str(&text[first_non_zero_index.get()..curr_index]);
copied_or_ignored_chars = curr_index;
}
_ => {}
}
// Update state after the current char
match (&state, curr) {
// Cases entering or remaining in decimal part
(_, Some((curr_index, '.'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
dot_index: curr_index,
last_non_zero_index: None,
});
}
(DecimalPart(decimal_part), Some((curr_index, '1'..='9'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
last_non_zero_index: Some(unsafe {
// We've already entered InDecimalPart, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
}),
..*decimal_part
});
}
// Cases entering or remaining in exponent
(_, Some((curr_index, 'e'))) => {
state = Exponent(FormatNumberLiteralExponent {
e_index: curr_index,
is_negative: false,
first_digit_index: None,
first_non_zero_index: None,
});
}
(Exponent(exponent), Some((_, '-'))) => {
state = Exponent(FormatNumberLiteralExponent {
is_negative: true,
..*exponent
});
}
(
Exponent(
exponent @ FormatNumberLiteralExponent {
first_digit_index: None,
..
},
),
Some((curr_index, curr_char @ '0'..='9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_digit_index: Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
}),
first_non_zero_index: if curr_char != '0' {
Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
})
} else {
None
},
..*exponent
});
}
(
Exponent(
exponent @ FormatNumberLiteralExponent {
first_non_zero_index: None,
..
},
),
Some((curr_index, '1'..='9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_non_zero_index: Some(unsafe { NonZeroUsize::new_unchecked(curr_index) }),
..*exponent
});
}
_ => {}
}
// Repeat or exit
match curr {
None | Some((_, 'x') /* hex bailout */) => break,
Some(_) => curr = iter.next(),
}
}
if cleaned_text.is_empty() {
text
} else {
// Append any unconsidered text
cleaned_text.push_str(&text[copied_or_ignored_chars..]);
Cow::Owned(cleaned_text)
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use super::format_trimmed_number;
#[test]
fn removes_unnecessary_plus_and_zeros_from_scientific_notation() {
assert_eq!("1e2", format_trimmed_number("1e02"));
assert_eq!("1e2", format_trimmed_number("1e+2"));
}
#[test]
fn removes_unnecessary_scientific_notation() {
assert_eq!("1", format_trimmed_number("1e0"));
assert_eq!("1", format_trimmed_number("1e-0"));
}
#[test]
fn does_not_get_bamboozled_by_hex() {
assert_eq!("0xe0", format_trimmed_number("0xe0"));
assert_eq!("0x10e0", format_trimmed_number("0x10e0"));
}
#[test]
fn makes_sure_numbers_always_start_with_a_digit() {
assert_eq!("0.2", format_trimmed_number(".2"));
}
#[test]
fn removes_extraneous_trailing_decimal_zeroes() {
assert_eq!("0.1", format_trimmed_number("0.10"));
}
#[test]
fn keeps_one_trailing_decimal_zero() {
assert_eq!("0.0", format_trimmed_number("0.00"));
}
#[test]
fn removes_trailing_dot() {
assert_eq!("1", format_trimmed_number("1."));
}
#[test]
fn cleans_all_at_once() {
assert_eq!("0.0", format_trimmed_number(".00e-0"));
}
#[test]
fn keeps_the_input_string_if_no_change_needed() {
assert!(matches!(
format_trimmed_number("0.1e2"),
Cow::Borrowed("0.1e2")
));
}
}

View file

@ -0,0 +1,259 @@
use std::borrow::Cow;
pub trait ToAsciiLowercaseCow {
/// Returns the same value as String::to_lowercase. The only difference
/// is that this functions returns ```Cow``` and does not allocate
/// if the string is already in lowercase.
fn to_ascii_lowercase_cow(&self) -> Cow<str>;
}
impl ToAsciiLowercaseCow for str {
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
debug_assert!(self.is_ascii());
let bytes = self.as_bytes();
for idx in 0..bytes.len() {
let chr = bytes[idx];
if chr != chr.to_ascii_lowercase() {
let mut s = bytes.to_vec();
for b in &mut s[idx..] {
b.make_ascii_lowercase();
}
return Cow::Owned(unsafe { String::from_utf8_unchecked(s) });
}
}
Cow::Borrowed(self)
}
}
impl ToAsciiLowercaseCow for String {
#[inline(always)]
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
self.as_str().to_ascii_lowercase_cow()
}
}
/// This signal is used to tell to the next character what it should do
#[derive(Eq, PartialEq)]
pub enum CharSignal {
/// There hasn't been any signal
None,
/// The function decided to keep the previous character
Keep,
/// The function has decided to print the character. Saves the character that was
/// already written
AlreadyPrinted(char),
}
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub enum Quote {
Double,
Single,
}
impl Quote {
pub fn as_char(&self) -> char {
match self {
Quote::Double => '"',
Quote::Single => '\'',
}
}
pub fn as_string(&self) -> &str {
match self {
Quote::Double => "\"",
Quote::Single => "'",
}
}
/// Returns the quote, prepended with a backslash (escaped)
pub fn as_escaped(&self) -> &str {
match self {
Quote::Double => "\\\"",
Quote::Single => "\\'",
}
}
pub fn as_bytes(&self) -> u8 {
self.as_char() as u8
}
/// Given the current quote, it returns the other one
pub fn other(&self) -> Self {
match self {
Quote::Double => Quote::Single,
Quote::Single => Quote::Double,
}
}
}
/// This function is responsible of:
///
/// - reducing the number of escapes
/// - normalising the new lines
///
/// # Escaping
///
/// The way it works is the following: we split the content by analyzing all the
/// characters that could keep the escape.
///
/// Each time we retrieve one of this character, we push inside a new string all the content
/// found **before** the current character.
///
/// After that the function checks if the current character should be also be printed or not.
/// These characters (like quotes) can have an escape that might be removed. If that happens,
/// we use [CharSignal] to tell to the next iteration what it should do with that character.
///
/// For example, let's take this example:
/// ```js
/// ("hello! \'")
/// ```
///
/// Here, we want to remove the backslash (\) from the content. So when we encounter `\`,
/// the algorithm checks if after `\` there's a `'`, and if so, then we push inside the final string
/// only `'` and we ignore the backlash. Then we signal the next iteration with [CharSignal::AlreadyPrinted],
/// so when we process the next `'`, we decide to ignore it and reset the signal.
///
/// Another example is the following:
///
/// ```js
/// (" \\' ")
/// ```
///
/// Here, we need to keep all the backslash. We check the first one and we look ahead. We find another
/// `\`, so we keep it the first and we signal the next iteration with [CharSignal::Keep].
/// Then the next iteration comes along. We have the second `\`, we look ahead we find a `'`. Although,
/// as opposed to the previous example, we have a signal that says that we should keep the current
/// character. Then we do so. The third iteration comes along and we find `'`. We still have the
/// [CharSignal::Keep]. We do so and then we set the signal to [CharSignal::None]
///
/// # Newlines
///
/// By default the formatter uses `\n` as a newline. The function replaces
/// `\r\n` with `\n`,
pub fn normalize_string(raw_content: &str, preferred_quote: Quote) -> Cow<str> {
let alternate_quote = preferred_quote.other();
// A string should be manipulated only if its raw content contains backslash or quotes
if !raw_content.contains(['\\', preferred_quote.as_char(), alternate_quote.as_char()]) {
return Cow::Borrowed(raw_content);
}
let mut reduced_string = String::new();
let mut signal = CharSignal::None;
let mut chars = raw_content.char_indices().peekable();
while let Some((_, current_char)) = chars.next() {
let next_character = chars.peek();
if let CharSignal::AlreadyPrinted(char) = signal {
if char == current_char {
continue;
}
}
match current_char {
'\\' => {
let bytes = raw_content.as_bytes();
if let Some((next_index, next_character)) = next_character {
// If we encounter an alternate quote that is escaped, we have to
// remove the escape from it.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if *next_character as u8 == alternate_quote.as_bytes()
// This check is a safety net for cases where the backslash is at the end
// of the raw content:
// ("\\")
// The second backslash is at the end.
&& *next_index < bytes.len()
{
match signal {
CharSignal::Keep => {
reduced_string.push(current_char);
}
_ => {
reduced_string.push(alternate_quote.as_char());
signal = CharSignal::AlreadyPrinted(alternate_quote.as_char());
}
}
} else if signal == CharSignal::Keep {
reduced_string.push(current_char);
signal = CharSignal::None;
}
// The next character is another backslash, or
// a character that should be kept in the next iteration
else if "^\n\r\"'01234567\\bfnrtuvx\u{2028}\u{2029}".contains(*next_character)
{
signal = CharSignal::Keep;
// fallback, keep the backslash
reduced_string.push(current_char);
} else {
// these, usually characters that can have their
// escape removed: "\a" => "a"
// So we ignore the current slash and we continue
// to the next iteration
continue;
}
} else {
// fallback, keep the backslash
reduced_string.push(current_char);
}
}
'\n' | '\t' => {
if let CharSignal::AlreadyPrinted(the_char) = signal {
if matches!(the_char, '\n' | '\t') {
signal = CharSignal::None
}
} else {
reduced_string.push(current_char);
}
}
// If the current character is \r and the
// next is \n, skip over the entire sequence
'\r' if next_character.map_or(false, |(_, c)| *c == '\n') => {
reduced_string.push('\n');
signal = CharSignal::AlreadyPrinted('\n');
}
_ => {
// If we encounter a preferred quote and it's not escaped, we have to replace it with
// an escaped version.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if current_char == preferred_quote.as_char() {
let last_char = &reduced_string.chars().last();
if let Some('\\') = last_char {
reduced_string.push(preferred_quote.as_char());
} else {
reduced_string.push_str(preferred_quote.as_escaped());
}
} else if current_char == alternate_quote.as_char() {
match signal {
CharSignal::None | CharSignal::Keep => {
reduced_string.push(alternate_quote.as_char());
}
CharSignal::AlreadyPrinted(_) => (),
}
} else {
reduced_string.push(current_char);
}
signal = CharSignal::None;
}
}
}
// Don't allocate a new string of this is empty
if reduced_string.is_empty() {
Cow::Borrowed(raw_content)
} else {
// don't allocate a new string if the new string is still equals to the input string
if reduced_string == raw_content {
Cow::Borrowed(raw_content)
} else {
Cow::Owned(reduced_string)
}
}
}

View file

@ -0,0 +1,597 @@
//! Provides builders for comments and skipped token trivia.
use crate::format_element::tag::VerbatimKind;
use crate::prelude::*;
use crate::{
comments::{CommentKind, CommentStyle},
write, Argument, Arguments, CstFormatContext, FormatRefWithRule, GroupId, SourceComment,
TextRange,
};
use ruff_rowan::{Language, SyntaxNode, SyntaxToken};
#[cfg(debug_assertions)]
use std::cell::Cell;
/// Formats the leading comments of `node`
pub const fn format_leading_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatLeadingComments<L> {
FormatLeadingComments::Node(node)
}
/// Formats the leading comments of a node.
#[derive(Debug, Copy, Clone)]
pub enum FormatLeadingComments<'a, L: Language> {
Node(&'a SyntaxNode<L>),
Comments(&'a [SourceComment<L>]),
}
impl<Context> Format<Context> for FormatLeadingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let leading_comments = match self {
FormatLeadingComments::Node(node) => comments.leading_comments(node),
FormatLeadingComments::Comments(comments) => comments,
};
for comment in leading_comments {
let format_comment = FormatRefWithRule::new(comment, Context::CommentRule::default());
write!(f, [format_comment])?;
match comment.kind() {
CommentKind::Block | CommentKind::InlineBlock => {
match comment.lines_after() {
0 => write!(f, [space()])?,
1 => {
if comment.lines_before() == 0 {
write!(f, [soft_line_break_or_space()])?;
} else {
write!(f, [hard_line_break()])?;
}
}
_ => write!(f, [empty_line()])?,
};
}
CommentKind::Line => match comment.lines_after() {
0 | 1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
},
}
comment.mark_formatted()
}
Ok(())
}
}
/// Formats the trailing comments of `node`.
pub const fn format_trailing_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatTrailingComments<L> {
FormatTrailingComments::Node(node)
}
/// Formats the trailing comments of `node`
#[derive(Debug, Clone, Copy)]
pub enum FormatTrailingComments<'a, L: Language> {
Node(&'a SyntaxNode<L>),
Comments(&'a [SourceComment<L>]),
}
impl<Context> Format<Context> for FormatTrailingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let trailing_comments = match self {
FormatTrailingComments::Node(node) => comments.trailing_comments(node),
FormatTrailingComments::Comments(comments) => comments,
};
let mut total_lines_before = 0;
for comment in trailing_comments {
total_lines_before += comment.lines_before();
let format_comment = FormatRefWithRule::new(comment, Context::CommentRule::default());
// This allows comments at the end of nested structures:
// {
// x: 1,
// y: 2
// // A comment
// }
// Those kinds of comments are almost always leading comments, but
// here it doesn't go "outside" the block and turns it into a
// trailing comment for `2`. We can simulate the above by checking
// if this a comment on its own line; normal trailing comments are
// always at the end of another expression.
if total_lines_before > 0 {
write!(
f,
[
line_suffix(&format_with(|f| {
match comment.lines_before() {
0 | 1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
};
write!(f, [format_comment])
})),
expand_parent()
]
)?;
} else {
let content = format_with(|f| write!(f, [space(), format_comment]));
if comment.kind().is_line() {
write!(f, [line_suffix(&content), expand_parent()])?;
} else {
write!(f, [content])?;
}
}
comment.mark_formatted();
}
Ok(())
}
}
/// Formats the dangling comments of `node`.
pub const fn format_dangling_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatDanglingComments<L> {
FormatDanglingComments::Node {
node,
indent: DanglingIndentMode::None,
}
}
/// Formats the dangling trivia of `token`.
pub enum FormatDanglingComments<'a, L: Language> {
Node {
node: &'a SyntaxNode<L>,
indent: DanglingIndentMode,
},
Comments {
comments: &'a [SourceComment<L>],
indent: DanglingIndentMode,
},
}
#[derive(Copy, Clone, Debug)]
pub enum DanglingIndentMode {
/// Writes every comment on its own line and indents them with a block indent.
///
/// # Examples
/// ```ignore
/// [
/// /* comment */
/// ]
///
/// [
/// /* comment */
/// /* multiple */
/// ]
/// ```
Block,
/// Writes every comment on its own line and indents them with a soft line indent.
/// Guarantees to write a line break if the last formatted comment is a [line](CommentKind::Line) comment.
///
/// # Examples
///
/// ```ignore
/// [/* comment */]
///
/// [
/// /* comment */
/// /* other */
/// ]
///
/// [
/// // line
/// ]
/// ```
Soft,
/// Writes every comment on its own line.
None,
}
impl<L: Language> FormatDanglingComments<'_, L> {
/// Indents the comments with a [block](DanglingIndentMode::Block) indent.
pub fn with_block_indent(self) -> Self {
self.with_indent_mode(DanglingIndentMode::Block)
}
/// Indents the comments with a [soft block](DanglingIndentMode::Soft) indent.
pub fn with_soft_block_indent(self) -> Self {
self.with_indent_mode(DanglingIndentMode::Soft)
}
fn with_indent_mode(mut self, mode: DanglingIndentMode) -> Self {
match &mut self {
FormatDanglingComments::Node { indent, .. } => *indent = mode,
FormatDanglingComments::Comments { indent, .. } => *indent = mode,
}
self
}
const fn indent(&self) -> DanglingIndentMode {
match self {
FormatDanglingComments::Node { indent, .. } => *indent,
FormatDanglingComments::Comments { indent, .. } => *indent,
}
}
}
impl<Context> Format<Context> for FormatDanglingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let dangling_comments = match self {
FormatDanglingComments::Node { node, .. } => comments.dangling_comments(node),
FormatDanglingComments::Comments { comments, .. } => *comments,
};
if dangling_comments.is_empty() {
return Ok(());
}
let format_dangling_comments = format_with(|f| {
// Write all comments up to the first skipped token trivia or the token
let mut join = f.join_with(hard_line_break());
for comment in dangling_comments {
let format_comment =
FormatRefWithRule::new(comment, Context::CommentRule::default());
join.entry(&format_comment);
comment.mark_formatted();
}
join.finish()?;
if matches!(self.indent(), DanglingIndentMode::Soft)
&& dangling_comments
.last()
.map_or(false, |comment| comment.kind().is_line())
{
write!(f, [hard_line_break()])?;
}
Ok(())
});
match self.indent() {
DanglingIndentMode::Block => {
write!(f, [block_indent(&format_dangling_comments)])
}
DanglingIndentMode::Soft => {
write!(f, [group(&soft_block_indent(&format_dangling_comments))])
}
DanglingIndentMode::None => {
write!(f, [format_dangling_comments])
}
}
}
}
/// Formats a token without its skipped token trivia
///
/// ## Warning
/// It's your responsibility to format any skipped trivia.
pub const fn format_trimmed_token<L: Language>(token: &SyntaxToken<L>) -> FormatTrimmedToken<L> {
FormatTrimmedToken { token }
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct FormatTrimmedToken<'a, L: Language> {
token: &'a SyntaxToken<L>,
}
impl<L: Language + 'static, C> Format<C> for FormatTrimmedToken<'_, L>
where
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
let trimmed_range = self.token.text_trimmed_range();
syntax_token_text_slice(self.token, trimmed_range).fmt(f)
}
}
/// Formats the skipped token trivia of a removed token and marks the token as tracked.
pub const fn format_removed<L>(token: &SyntaxToken<L>) -> FormatRemoved<L>
where
L: Language,
{
FormatRemoved { token }
}
/// Formats the trivia of a token that is present in the source text but should be omitted in the
/// formatted output.
pub struct FormatRemoved<'a, L>
where
L: Language,
{
token: &'a SyntaxToken<L>,
}
impl<C, L> Format<C> for FormatRemoved<'_, L>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
f.state_mut().track_token(self.token);
write!(f, [format_skipped_token_trivia(self.token)])
}
}
/// Print out a `token` from the original source with a different `content`.
///
/// This will print the skipped token trivia that belong to `token` to `content`;
/// `token` is then marked as consumed by the formatter.
pub fn format_replaced<'a, 'content, L, Context>(
token: &'a SyntaxToken<L>,
content: &'content impl Format<Context>,
) -> FormatReplaced<'a, 'content, L, Context>
where
L: Language,
{
FormatReplaced {
token,
content: Argument::new(content),
}
}
/// Formats a token's skipped token trivia but uses the provided content instead
/// of the token in the formatted output.
#[derive(Copy, Clone)]
pub struct FormatReplaced<'a, 'content, L, C>
where
L: Language,
{
token: &'a SyntaxToken<L>,
content: Argument<'content, C>,
}
impl<L, C> Format<C> for FormatReplaced<'_, '_, L, C>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
f.state_mut().track_token(self.token);
write!(f, [format_skipped_token_trivia(self.token)])?;
f.write_fmt(Arguments::from(&self.content))
}
}
/// Formats the given token only if the group does break and otherwise retains the token's skipped token trivia.
pub fn format_only_if_breaks<'a, 'content, L, Content, Context>(
token: &'a SyntaxToken<L>,
content: &'content Content,
) -> FormatOnlyIfBreaks<'a, 'content, L, Context>
where
L: Language,
Content: Format<Context>,
{
FormatOnlyIfBreaks {
token,
content: Argument::new(content),
group_id: None,
}
}
/// Formats a token with its skipped token trivia that only gets printed if its enclosing
/// group does break but otherwise gets omitted from the formatted output.
pub struct FormatOnlyIfBreaks<'a, 'content, L, C>
where
L: Language,
{
token: &'a SyntaxToken<L>,
content: Argument<'content, C>,
group_id: Option<GroupId>,
}
impl<'a, 'content, L, C> FormatOnlyIfBreaks<'a, 'content, L, C>
where
L: Language,
{
pub fn with_group_id(mut self, group_id: Option<GroupId>) -> Self {
self.group_id = group_id;
self
}
}
impl<L, C> Format<C> for FormatOnlyIfBreaks<'_, '_, L, C>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
write!(
f,
[if_group_breaks(&Arguments::from(&self.content)).with_group_id(self.group_id),]
)?;
if f.comments().has_skipped(self.token) {
// Print the trivia otherwise
write!(
f,
[
if_group_fits_on_line(&format_skipped_token_trivia(self.token))
.with_group_id(self.group_id)
]
)?;
}
Ok(())
}
}
/// Formats the skipped token trivia of `token`.
pub const fn format_skipped_token_trivia<L: Language>(
token: &SyntaxToken<L>,
) -> FormatSkippedTokenTrivia<L> {
FormatSkippedTokenTrivia { token }
}
/// Formats the skipped token trivia of `token`.
pub struct FormatSkippedTokenTrivia<'a, L: Language> {
token: &'a SyntaxToken<L>,
}
impl<L: Language> FormatSkippedTokenTrivia<'_, L> {
#[cold]
fn fmt_skipped<Context>(&self, f: &mut Formatter<Context>) -> FormatResult<()>
where
Context: CstFormatContext<Language = L>,
{
// Lines/spaces before the next token/comment
let (mut lines, mut spaces) = match self.token.prev_token() {
Some(token) => {
let mut lines = 0u32;
let mut spaces = 0u32;
for piece in token.trailing_trivia().pieces().rev() {
if piece.is_whitespace() {
spaces += 1;
} else if piece.is_newline() {
spaces = 0;
lines += 1;
} else {
break;
}
}
(lines, spaces)
}
None => (0, 0),
};
// The comments between the last skipped token trivia and the token
let mut dangling_comments = Vec::new();
let mut skipped_range: Option<TextRange> = None;
// Iterate over the remaining pieces to find the full range from the first to the last skipped token trivia.
// Extract the comments between the last skipped token trivia and the token.
for piece in self.token.leading_trivia().pieces() {
if piece.is_whitespace() {
spaces += 1;
continue;
}
if piece.is_newline() {
lines += 1;
spaces = 0;
} else if let Some(comment) = piece.as_comments() {
let source_comment = SourceComment {
kind: Context::Style::get_comment_kind(&comment),
lines_before: lines,
lines_after: 0,
piece: comment,
#[cfg(debug_assertions)]
formatted: Cell::new(true),
};
dangling_comments.push(source_comment);
lines = 0;
spaces = 0;
} else if piece.is_skipped() {
skipped_range = Some(match skipped_range {
Some(range) => range.cover(piece.text_range()),
None => {
if dangling_comments.is_empty() {
match lines {
0 if spaces == 0 => {
// Token had no space to previous token nor any preceding comment. Keep it that way
}
0 => write!(f, [space()])?,
_ => write!(f, [hard_line_break()])?,
};
} else {
match lines {
0 => write!(f, [space()])?,
1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
};
}
piece.text_range()
}
});
lines = 0;
spaces = 0;
dangling_comments.clear();
}
}
let skipped_range =
skipped_range.unwrap_or_else(|| TextRange::empty(self.token.text_range().start()));
f.write_element(FormatElement::Tag(Tag::StartVerbatim(
VerbatimKind::Verbatim {
length: skipped_range.len(),
},
)))?;
write!(f, [syntax_token_text_slice(self.token, skipped_range)])?;
f.write_element(FormatElement::Tag(Tag::EndVerbatim))?;
// Write whitespace separator between skipped/last comment and token
if dangling_comments.is_empty() {
match lines {
0 if spaces == 0 => {
// Don't write a space if there was non in the source document
Ok(())
}
0 => write!(f, [space()]),
_ => write!(f, [hard_line_break()]),
}
} else {
match dangling_comments.first().unwrap().lines_before {
0 => write!(f, [space()])?,
1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
}
write!(
f,
[FormatDanglingComments::Comments {
comments: &dangling_comments,
indent: DanglingIndentMode::None
}]
)?;
match lines {
0 => write!(f, [space()]),
_ => write!(f, [hard_line_break()]),
}
}
}
}
impl<Context> Format<Context> for FormatSkippedTokenTrivia<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
if f.comments().has_skipped(self.token) {
self.fmt_skipped(f)
} else {
Ok(())
}
}
}

View file

@ -0,0 +1,205 @@
use crate::format_element::tag::VerbatimKind;
use crate::prelude::*;
use crate::trivia::{FormatLeadingComments, FormatTrailingComments};
use crate::{write, CstFormatContext, FormatWithRule};
use ruff_rowan::{AstNode, Direction, Language, SyntaxElement, SyntaxNode, TextRange};
/// "Formats" a node according to its original formatting in the source text. Being able to format
/// a node "as is" is useful if a node contains syntax errors. Formatting a node with syntax errors
/// has the risk that Rome misinterprets the structure of the code and formatting it could
/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors.
///
/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes
/// nor its children source mapping information, resulting in incorrect source maps for this subtree.
///
/// These nodes and tokens get tracked as [VerbatimKind::Verbatim], useful to understand
/// if these nodes still need to have their own implementation.
pub fn format_verbatim_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Verbatim {
length: node.text_range().len(),
},
format_comments: true,
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub struct FormatVerbatimNode<'node, L: Language> {
node: &'node SyntaxNode<L>,
kind: VerbatimKind,
format_comments: bool,
}
impl<Context> Format<Context> for FormatVerbatimNode<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
for element in self.node.descendants_with_tokens(Direction::Next) {
match element {
SyntaxElement::Token(token) => f.state_mut().track_token(&token),
SyntaxElement::Node(node) => {
let comments = f.context().comments();
comments.mark_suppression_checked(&node);
for comment in comments.leading_dangling_trailing_comments(&node) {
comment.mark_formatted();
}
}
}
}
// The trimmed range of a node is its range without any of its leading or trailing trivia.
// Except for nodes that used to be parenthesized, the range than covers the source from the
// `(` to the `)` (the trimmed range of the parenthesized expression, not the inner expression)
let trimmed_source_range = f.context().source_map().map_or_else(
|| self.node.text_trimmed_range(),
|source_map| source_map.trimmed_source_range(self.node),
);
f.write_element(FormatElement::Tag(Tag::StartVerbatim(self.kind)))?;
fn source_range<Context>(f: &Formatter<Context>, range: TextRange) -> TextRange
where
Context: CstFormatContext,
{
f.context()
.source_map()
.map_or_else(|| range, |source_map| source_map.source_range(range))
}
// Format all leading comments that are outside of the node's source range.
if self.format_comments {
let comments = f.context().comments().clone();
let leading_comments = comments.leading_comments(self.node);
let outside_trimmed_range = leading_comments.partition_point(|comment| {
comment.piece().text_range().end() <= trimmed_source_range.start()
});
let (outside_trimmed_range, in_trimmed_range) =
leading_comments.split_at(outside_trimmed_range);
write!(f, [FormatLeadingComments::Comments(outside_trimmed_range)])?;
for comment in in_trimmed_range {
comment.mark_formatted();
}
}
// Find the first skipped token trivia, if any, and include it in the verbatim range because
// the comments only format **up to** but not including skipped token trivia.
let start_source = self
.node
.first_leading_trivia()
.into_iter()
.flat_map(|trivia| trivia.pieces())
.filter(|trivia| trivia.is_skipped())
.map(|trivia| source_range(f, trivia.text_range()).start())
.take_while(|start| *start < trimmed_source_range.start())
.next()
.unwrap_or_else(|| trimmed_source_range.start());
let original_source = f.context().source_map().map_or_else(
|| self.node.text_trimmed().to_string(),
|source_map| {
source_map.text()[trimmed_source_range.cover_offset(start_source)].to_string()
},
);
dynamic_text(
&normalize_newlines(&original_source, LINE_TERMINATORS),
self.node.text_trimmed_range().start(),
)
.fmt(f)?;
for comment in f.context().comments().dangling_comments(self.node) {
comment.mark_formatted();
}
// Format all trailing comments that are outside of the trimmed range.
if self.format_comments {
let comments = f.context().comments().clone();
let trailing_comments = comments.trailing_comments(self.node);
let outside_trimmed_range_start = trailing_comments.partition_point(|comment| {
source_range(f, comment.piece().text_range()).end() <= trimmed_source_range.end()
});
let (in_trimmed_range, outside_trimmed_range) =
trailing_comments.split_at(outside_trimmed_range_start);
for comment in in_trimmed_range {
comment.mark_formatted();
}
write!(f, [FormatTrailingComments::Comments(outside_trimmed_range)])?;
}
f.write_element(FormatElement::Tag(Tag::EndVerbatim))
}
}
impl<L: Language> FormatVerbatimNode<'_, L> {
pub fn skip_comments(mut self) -> Self {
self.format_comments = false;
self
}
}
/// Formats bogus nodes. The difference between this method and `format_verbatim` is that this method
/// doesn't track nodes/tokens as [VerbatimKind::Verbatim]. They are just printed as they are.
pub fn format_bogus_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Bogus,
format_comments: true,
}
}
/// Format a node having formatter suppression comment applied to it
pub fn format_suppressed_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Suppressed,
format_comments: true,
}
}
/// Formats an object using its [`Format`] implementation but falls back to printing the object as
/// it is in the source document if formatting it returns an [`FormatError::SyntaxError`].
pub const fn format_or_verbatim<F>(inner: F) -> FormatNodeOrVerbatim<F> {
FormatNodeOrVerbatim { inner }
}
/// Formats a node or falls back to verbatim printing if formatting this node fails.
#[derive(Copy, Clone)]
pub struct FormatNodeOrVerbatim<F> {
inner: F,
}
impl<F, Context, Item> Format<Context> for FormatNodeOrVerbatim<F>
where
F: FormatWithRule<Context, Item = Item>,
Item: AstNode,
Context: CstFormatContext<Language = Item::Language>,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let snapshot = Formatter::state_snapshot(f);
match self.inner.fmt(f) {
Ok(result) => Ok(result),
Err(FormatError::SyntaxError) => {
f.restore_state_snapshot(snapshot);
// Lists that yield errors are formatted as they were suppressed nodes.
// Doing so, the formatter formats the nodes/tokens as is.
format_suppressed_node(self.inner.item().syntax()).fmt(f)
}
Err(err) => Err(err),
}
}
}