ruff/crates/ruff_formatter/src/lib.rs
Micha Reiser 86ced3516b
Introduce SourceCodeSlice to reduce the size of FormatElement (#4622)
Introduce `SourceCodeSlice` to reduce the size of `FormatElement`
2023-05-24 15:04:52 +00:00

826 lines
23 KiB
Rust

//! Infrastructure for code formatting
//!
//! This module defines [FormatElement], an IR to format code documents and provides a mean to print
//! such a document to a string. Objects that know how to format themselves implement the [Format] trait.
//!
//! ## Formatting Traits
//!
//! * [Format]: Implemented by objects that can be formatted.
//! * [FormatRule]: Rule that knows how to format an object of another type. Necessary in the situation where
//! it's necessary to implement [Format] on an object from another crate. This module defines the
//! [FormatRefWithRule] and [FormatOwnedWithRule] structs to pass an item with its corresponding rule.
//! * [FormatWithRule] implemented by objects that know how to format another type. Useful for implementing
//! some reusable formatting logic inside of this module if the type itself doesn't implement [Format]
//!
//! ## Formatting Macros
//!
//! This crate defines two macros to construct the IR. These are inspired by Rust's `fmt` macros
//! * [`format!`]: Formats a formatable object
//! * [`format_args!`]: Concatenates a sequence of Format objects.
//! * [`write!`]: Writes a sequence of formatable objects into an output buffer.
#![allow(clippy::pedantic, unsafe_code)]
#![deny(rustdoc::broken_intra_doc_links)]
mod arguments;
mod buffer;
mod builders;
pub mod diagnostics;
pub mod format_element;
mod format_extensions;
pub mod formatter;
pub mod group_id;
pub mod macros;
pub mod prelude;
pub mod printer;
mod source_code;
use crate::formatter::Formatter;
use crate::group_id::UniqueGroupIdBuilder;
use crate::prelude::TagKind;
use std::fmt::Debug;
use crate::format_element::document::Document;
use crate::printer::{Printer, PrinterOptions};
pub use arguments::{Argument, Arguments};
pub use buffer::{
Buffer, BufferExtensions, BufferSnapshot, Inspect, PreambleBuffer, RemoveSoftLinesBuffer,
VecBuffer,
};
pub use builders::BestFitting;
pub use source_code::{SourceCode, SourceCodeSlice};
pub use crate::diagnostics::{ActualStart, FormatError, InvalidDocumentError, PrintError};
pub use format_element::{normalize_newlines, FormatElement, LINE_TERMINATORS};
pub use group_id::GroupId;
use ruff_text_size::{TextRange, TextSize};
use std::num::ParseIntError;
use std::str::FromStr;
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)
)]
#[derive(Default)]
pub enum IndentStyle {
/// Tab
#[default]
Tab,
/// Space, with its quantity
Space(u8),
}
impl IndentStyle {
pub const DEFAULT_SPACES: u8 = 2;
/// Returns `true` if this is an [IndentStyle::Tab].
pub const fn is_tab(&self) -> bool {
matches!(self, IndentStyle::Tab)
}
/// Returns `true` if this is an [IndentStyle::Space].
pub const fn is_space(&self) -> bool {
matches!(self, IndentStyle::Space(_))
}
}
impl FromStr for IndentStyle {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"tab" | "Tabs" => Ok(Self::Tab),
"space" | "Spaces" => Ok(Self::Space(IndentStyle::DEFAULT_SPACES)),
// TODO: replace this error with a diagnostic
_ => Err("Value not supported for IndentStyle"),
}
}
}
impl std::fmt::Display for IndentStyle {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IndentStyle::Tab => std::write!(f, "Tab"),
IndentStyle::Space(size) => std::write!(f, "Spaces, size: {size}"),
}
}
}
/// Validated value for the `line_width` formatter options
///
/// The allowed range of values is 1..=320
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)
)]
pub struct LineWidth(u16);
impl LineWidth {
/// Maximum allowed value for a valid [LineWidth]
pub const MAX: u16 = 320;
/// Return the numeric value for this [LineWidth]
pub fn value(&self) -> u16 {
self.0
}
}
impl Default for LineWidth {
fn default() -> Self {
Self(80)
}
}
/// Error type returned when parsing a [LineWidth] from a string fails
pub enum ParseLineWidthError {
/// The string could not be parsed as a valid [u16]
ParseError(ParseIntError),
/// The [u16] value of the string is not a valid [LineWidth]
TryFromIntError(LineWidthFromIntError),
}
impl Debug for ParseLineWidthError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
impl std::fmt::Display for ParseLineWidthError {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParseLineWidthError::ParseError(err) => std::fmt::Display::fmt(err, fmt),
ParseLineWidthError::TryFromIntError(err) => std::fmt::Display::fmt(err, fmt),
}
}
}
impl FromStr for LineWidth {
type Err = ParseLineWidthError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let value = u16::from_str(s).map_err(ParseLineWidthError::ParseError)?;
let value = Self::try_from(value).map_err(ParseLineWidthError::TryFromIntError)?;
Ok(value)
}
}
/// Error type returned when converting a u16 to a [LineWidth] fails
#[derive(Clone, Copy, Debug)]
pub struct LineWidthFromIntError(pub u16);
impl TryFrom<u16> for LineWidth {
type Error = LineWidthFromIntError;
fn try_from(value: u16) -> Result<Self, Self::Error> {
if value > 0 && value <= Self::MAX {
Ok(Self(value))
} else {
Err(LineWidthFromIntError(value))
}
}
}
impl std::fmt::Display for LineWidthFromIntError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(
f,
"The line width exceeds the maximum value ({})",
LineWidth::MAX
)
}
}
impl From<LineWidth> for u16 {
fn from(value: LineWidth) -> Self {
value.0
}
}
/// Context object storing data relevant when formatting an object.
pub trait FormatContext {
type Options: FormatOptions;
/// Returns the formatting options
fn options(&self) -> &Self::Options;
/// Returns the source code from the document that gets formatted.
fn source_code(&self) -> SourceCode;
}
/// Options customizing how the source code should be formatted.
pub trait FormatOptions {
/// The indent style.
fn indent_style(&self) -> IndentStyle;
/// What's the max width of a line. Defaults to 80.
fn line_width(&self) -> LineWidth;
/// Derives the print options from the these format options
fn as_print_options(&self) -> PrinterOptions;
}
#[derive(Debug, Default, Eq, PartialEq)]
pub struct SimpleFormatContext {
options: SimpleFormatOptions,
source_code: String,
}
impl SimpleFormatContext {
pub fn new(options: SimpleFormatOptions) -> Self {
Self {
options,
source_code: String::new(),
}
}
pub fn with_source_code(mut self, code: &str) -> Self {
self.source_code = String::from(code);
self
}
}
impl FormatContext for SimpleFormatContext {
type Options = SimpleFormatOptions;
fn options(&self) -> &Self::Options {
&self.options
}
fn source_code(&self) -> SourceCode {
SourceCode::new(&self.source_code)
}
}
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct SimpleFormatOptions {
pub indent_style: IndentStyle,
pub line_width: LineWidth,
}
impl FormatOptions for SimpleFormatOptions {
fn indent_style(&self) -> IndentStyle {
self.indent_style
}
fn line_width(&self) -> LineWidth {
self.line_width
}
fn as_print_options(&self) -> PrinterOptions {
PrinterOptions::default()
.with_indent(self.indent_style)
.with_print_width(self.line_width.into())
}
}
/// Lightweight sourcemap marker between source and output tokens
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)
)]
pub struct SourceMarker {
/// Position of the marker in the original source
pub source: TextSize,
/// Position of the marker in the output code
pub dest: TextSize,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Formatted<Context> {
document: Document,
context: Context,
}
impl<Context> Formatted<Context> {
pub fn new(document: Document, context: Context) -> Self {
Self { document, context }
}
/// Returns the context used during formatting.
pub fn context(&self) -> &Context {
&self.context
}
/// Returns the formatted document.
pub fn document(&self) -> &Document {
&self.document
}
/// Consumes `self` and returns the formatted document.
pub fn into_document(self) -> Document {
self.document
}
}
impl<Context> Formatted<Context>
where
Context: FormatContext,
{
pub fn print(&self) -> PrintResult<Printed> {
let source_code = self.context.source_code();
let print_options = self.context.options().as_print_options();
let printed = Printer::new(source_code, print_options).print(&self.document)?;
Ok(printed)
}
pub fn print_with_indent(&self, indent: u16) -> PrintResult<Printed> {
let source_code = self.context.source_code();
let print_options = self.context.options().as_print_options();
let printed =
Printer::new(source_code, print_options).print_with_indent(&self.document, indent)?;
Ok(printed)
}
}
pub type PrintResult<T> = Result<T, PrintError>;
#[derive(Debug, Clone, Eq, PartialEq)]
#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)
)]
pub struct Printed {
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_ranges: Vec<TextRange>,
}
impl Printed {
pub fn new(
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<TextRange>,
) -> Self {
Self {
code,
range,
sourcemap,
verbatim_ranges: verbatim_source,
}
}
/// Construct an empty formatter result
pub fn new_empty() -> Self {
Self {
code: String::new(),
range: None,
sourcemap: Vec::new(),
verbatim_ranges: Vec::new(),
}
}
/// Range of the input source file covered by this formatted code,
/// or None if the entire file is covered in this instance
pub fn range(&self) -> Option<TextRange> {
self.range
}
/// Returns a list of [SourceMarker] mapping byte positions
/// in the output string to the input source code.
/// It's not guaranteed that the markers are sorted by source position.
pub fn sourcemap(&self) -> &[SourceMarker] {
&self.sourcemap
}
/// Returns a list of [SourceMarker] mapping byte positions
/// in the output string to the input source code, consuming the result
pub fn into_sourcemap(self) -> Vec<SourceMarker> {
self.sourcemap
}
/// Takes the list of [SourceMarker] mapping byte positions in the output string
/// to the input source code.
pub fn take_sourcemap(&mut self) -> Vec<SourceMarker> {
std::mem::take(&mut self.sourcemap)
}
/// Access the resulting code, borrowing the result
pub fn as_code(&self) -> &str {
&self.code
}
/// Access the resulting code, consuming the result
pub fn into_code(self) -> String {
self.code
}
/// The text in the formatted code that has been formatted as verbatim.
pub fn verbatim(&self) -> impl Iterator<Item = (TextRange, &str)> {
self.verbatim_ranges
.iter()
.map(|range| (*range, &self.code[*range]))
}
/// Ranges of the formatted code that have been formatted as verbatim.
pub fn verbatim_ranges(&self) -> &[TextRange] {
&self.verbatim_ranges
}
/// Takes the ranges of nodes that have been formatted as verbatim, replacing them with an empty list.
pub fn take_verbatim_ranges(&mut self) -> Vec<TextRange> {
std::mem::take(&mut self.verbatim_ranges)
}
}
/// Public return type of the formatter
pub type FormatResult<F> = Result<F, FormatError>;
/// Formatting trait for types that can create a formatted representation. The `ruff_formatter` equivalent
/// to [std::fmt::Display].
///
/// ## Example
/// Implementing `Format` for a custom struct
///
/// ```
/// use ruff_formatter::{format, write, IndentStyle, LineWidth};
/// use ruff_formatter::prelude::*;
/// use ruff_text_size::TextSize;
///
/// struct Paragraph(String);
///
/// impl Format<SimpleFormatContext> for Paragraph {
/// fn fmt(&self, f: &mut Formatter<SimpleFormatContext>) -> FormatResult<()> {
/// write!(f, [
/// hard_line_break(),
/// dynamic_text(&self.0, None),
/// hard_line_break(),
/// ])
/// }
/// }
///
/// # fn main() -> FormatResult<()> {
/// let paragraph = Paragraph(String::from("test"));
/// let formatted = format!(SimpleFormatContext::default(), [paragraph])?;
///
/// assert_eq!("test\n", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
pub trait Format<Context> {
/// Formats the object using the given formatter.
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()>;
}
impl<T, Context> Format<Context> for &T
where
T: ?Sized + Format<Context>,
{
#[inline(always)]
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
Format::fmt(&**self, f)
}
}
impl<T, Context> Format<Context> for &mut T
where
T: ?Sized + Format<Context>,
{
#[inline(always)]
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
Format::fmt(&**self, f)
}
}
impl<T, Context> Format<Context> for Option<T>
where
T: Format<Context>,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
match self {
Some(value) => value.fmt(f),
None => Ok(()),
}
}
}
impl<Context> Format<Context> for () {
#[inline]
fn fmt(&self, _: &mut Formatter<Context>) -> FormatResult<()> {
// Intentionally left empty
Ok(())
}
}
/// Rule that knows how to format an object of type `T`.
///
/// Implementing [Format] on the object itself is preferred over implementing [FormatRule] but
/// this isn't possible inside of a dependent crate for external type.
///
/// For example, the `ruff_js_formatter` crate isn't able to implement [Format] on `JsIfStatement`
/// because both the [Format] trait and `JsIfStatement` are external types (Rust's orphan rule).
///
/// That's why the `ruff_js_formatter` crate must define a new-type that implements the formatting
/// of `JsIfStatement`.
pub trait FormatRule<T> {
type Context;
fn fmt(&self, item: &T, f: &mut Formatter<Self::Context>) -> FormatResult<()>;
}
/// Rule that supports customizing how it formats an object of type `T`.
pub trait FormatRuleWithOptions<T>: FormatRule<T> {
type Options;
/// Returns a new rule that uses the given options to format an object.
fn with_options(self, options: Self::Options) -> Self;
}
/// Trait for an object that formats an object with a specified rule.
///
/// Gives access to the underlying item.
///
/// Useful in situation where a type itself doesn't implement [Format] (e.g. because of Rust's orphan rule)
/// but you want to implement some common formatting logic.
///
/// ## Examples
///
/// This can be useful if you want to format a `SyntaxNode` inside ruff_formatter.. `SyntaxNode` doesn't implement [Format]
/// itself but the language specific crate implements `AsFormat` and `IntoFormat` for it and the returned [Format]
/// implement [FormatWithRule].
///
/// ```ignore
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, Formatted, FormatWithRule};
/// use ruff_rowan::{Language, SyntaxNode};
/// fn format_node<L: Language, F: FormatWithRule<SimpleFormatContext, Item=SyntaxNode<L>>>(node: F) -> FormatResult<Formatted<SimpleFormatContext>> {
/// let formatted = format!(SimpleFormatContext::default(), [node]);
/// let syntax = node.item();
/// // Do something with syntax
/// formatted;
/// }
/// ```
pub trait FormatWithRule<Context>: Format<Context> {
type Item;
/// Returns the associated item
fn item(&self) -> &Self::Item;
}
/// Formats the referenced `item` with the specified rule.
#[derive(Debug, Copy, Clone)]
pub struct FormatRefWithRule<'a, T, R>
where
R: FormatRule<T>,
{
item: &'a T,
rule: R,
}
impl<'a, T, R> FormatRefWithRule<'a, T, R>
where
R: FormatRule<T>,
{
pub fn new(item: &'a T, rule: R) -> Self {
Self { item, rule }
}
}
impl<T, R, O> FormatRefWithRule<'_, T, R>
where
R: FormatRuleWithOptions<T, Options = O>,
{
pub fn with_options(mut self, options: O) -> Self {
self.rule = self.rule.with_options(options);
self
}
}
impl<T, R> FormatWithRule<R::Context> for FormatRefWithRule<'_, T, R>
where
R: FormatRule<T>,
{
type Item = T;
fn item(&self) -> &Self::Item {
self.item
}
}
impl<T, R> Format<R::Context> for FormatRefWithRule<'_, T, R>
where
R: FormatRule<T>,
{
#[inline(always)]
fn fmt(&self, f: &mut Formatter<R::Context>) -> FormatResult<()> {
self.rule.fmt(self.item, f)
}
}
/// Formats the `item` with the specified rule.
#[derive(Debug, Clone)]
pub struct FormatOwnedWithRule<T, R>
where
R: FormatRule<T>,
{
item: T,
rule: R,
}
impl<T, R> FormatOwnedWithRule<T, R>
where
R: FormatRule<T>,
{
pub fn new(item: T, rule: R) -> Self {
Self { item, rule }
}
pub fn with_item(mut self, item: T) -> Self {
self.item = item;
self
}
pub fn into_item(self) -> T {
self.item
}
}
impl<T, R> Format<R::Context> for FormatOwnedWithRule<T, R>
where
R: FormatRule<T>,
{
#[inline(always)]
fn fmt(&self, f: &mut Formatter<R::Context>) -> FormatResult<()> {
self.rule.fmt(&self.item, f)
}
}
impl<T, R, O> FormatOwnedWithRule<T, R>
where
R: FormatRuleWithOptions<T, Options = O>,
{
pub fn with_options(mut self, options: O) -> Self {
self.rule = self.rule.with_options(options);
self
}
}
impl<T, R> FormatWithRule<R::Context> for FormatOwnedWithRule<T, R>
where
R: FormatRule<T>,
{
type Item = T;
fn item(&self) -> &Self::Item {
&self.item
}
}
/// The `write` function takes a target buffer and an `Arguments` struct that can be precompiled with the `format_args!` macro.
///
/// The arguments will be formatted in-order into the output buffer provided.
///
/// # Examples
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{VecBuffer, format_args, FormatState, write, Formatted};
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// write!(&mut buffer, [format_args!(text("Hello World"))])?;
///
/// let formatted = Formatted::new(Document::from(buffer.into_vec()), SimpleFormatContext::default());
///
/// assert_eq!("Hello World", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
///
/// Please note that using [`write!`] might be preferable. Example:
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{VecBuffer, format_args, FormatState, write, Formatted};
///
/// # fn main() -> FormatResult<()> {
/// let mut state = FormatState::new(SimpleFormatContext::default());
/// let mut buffer = VecBuffer::new(&mut state);
///
/// write!(&mut buffer, [text("Hello World")])?;
///
/// let formatted = Formatted::new(Document::from(buffer.into_vec()), SimpleFormatContext::default());
///
/// assert_eq!("Hello World", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
///
#[inline(always)]
pub fn write<Context>(
output: &mut dyn Buffer<Context = Context>,
args: Arguments<Context>,
) -> FormatResult<()> {
let mut f = Formatter::new(output);
f.write_fmt(args)
}
/// The `format` function takes an [`Arguments`] struct and returns the resulting formatting IR.
///
/// The [`Arguments`] instance can be created with the [`format_args!`].
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format, format_args};
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [&format_args!(text("test"))])?;
/// assert_eq!("test", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
///
/// Please note that using [`format!`] might be preferable. Example:
///
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{format};
///
/// # fn main() -> FormatResult<()> {
/// let formatted = format!(SimpleFormatContext::default(), [text("test")])?;
/// assert_eq!("test", formatted.print()?.as_code());
/// # Ok(())
/// # }
/// ```
pub fn format<Context>(
context: Context,
arguments: Arguments<Context>,
) -> FormatResult<Formatted<Context>>
where
Context: FormatContext,
{
let mut state = FormatState::new(context);
let mut buffer = VecBuffer::with_capacity(arguments.items().len(), &mut state);
buffer.write_fmt(arguments)?;
let mut document = Document::from(buffer.into_vec());
document.propagate_expand();
Ok(Formatted::new(document, state.into_context()))
}
/// This structure stores the state that is relevant for the formatting of the whole document.
///
/// This structure is different from [crate::Formatter] in that the formatting infrastructure
/// creates a new [crate::Formatter] for every [crate::write!] call, whereas this structure stays alive
/// for the whole process of formatting a root with [crate::format!].
pub struct FormatState<Context> {
context: Context,
group_id_builder: UniqueGroupIdBuilder,
}
impl<Context> std::fmt::Debug for FormatState<Context>
where
Context: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("FormatState")
.field("context", &self.context)
.finish()
}
}
impl<Context> FormatState<Context> {
/// Creates a new state with the given language specific context
pub fn new(context: Context) -> Self {
Self {
context,
group_id_builder: Default::default(),
}
}
pub fn into_context(self) -> Context {
self.context
}
/// Returns the context specifying how to format the current CST
pub fn context(&self) -> &Context {
&self.context
}
/// Returns a mutable reference to the context
pub fn context_mut(&mut self) -> &mut Context {
&mut self.context
}
/// Creates a new group id that is unique to this document. The passed debug name is used in the
/// [std::fmt::Debug] of the document if this is a debug build.
/// The name is unused for production builds and has no meaning on the equality of two group ids.
pub fn group_id(&self, debug_name: &'static str) -> GroupId {
self.group_id_builder.group_id(debug_name)
}
}