mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-19 01:51:30 +00:00
Use memchr
to speedup newline search on x86 (#3985)
This commit is contained in:
parent
f3e6ddda62
commit
e04ef42334
10 changed files with 147 additions and 114 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2204,6 +2204,7 @@ dependencies = [
|
||||||
"is-macro",
|
"is-macro",
|
||||||
"itertools",
|
"itertools",
|
||||||
"log",
|
"log",
|
||||||
|
"memchr",
|
||||||
"num-bigint",
|
"num-bigint",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
|
|
@ -234,11 +234,12 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) ->
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use ruff_python_ast::newlines::LineEnding;
|
||||||
use ruff_text_size::TextSize;
|
use ruff_text_size::TextSize;
|
||||||
use rustpython_parser as parser;
|
use rustpython_parser as parser;
|
||||||
use rustpython_parser::lexer::LexResult;
|
use rustpython_parser::lexer::LexResult;
|
||||||
|
|
||||||
use ruff_python_ast::source_code::{LineEnding, Locator, Stylist};
|
use ruff_python_ast::source_code::{Locator, Stylist};
|
||||||
|
|
||||||
use crate::importer::{top_of_file_insertion, Insertion};
|
use crate::importer::{top_of_file_insertion, Insertion};
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use crate::fs::{relativize_path, relativize_path_to};
|
use crate::fs::{relativize_path, relativize_path_to};
|
||||||
use crate::message::{Emitter, EmitterContext, Message};
|
use crate::message::{Emitter, EmitterContext, Message};
|
||||||
use crate::registry::AsRule;
|
use crate::registry::AsRule;
|
||||||
|
use ruff_python_ast::source_code::SourceLocation;
|
||||||
use serde::ser::SerializeSeq;
|
use serde::ser::SerializeSeq;
|
||||||
use serde::{Serialize, Serializer};
|
use serde::{Serialize, Serializer};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
@ -56,6 +57,9 @@ impl Serialize for SerializedMessages<'_> {
|
||||||
let mut s = serializer.serialize_seq(Some(self.messages.len()))?;
|
let mut s = serializer.serialize_seq(Some(self.messages.len()))?;
|
||||||
|
|
||||||
for message in self.messages {
|
for message in self.messages {
|
||||||
|
let start_location = message.compute_start_location();
|
||||||
|
let end_location = message.compute_end_location();
|
||||||
|
|
||||||
let lines = if self.context.is_jupyter_notebook(message.filename()) {
|
let lines = if self.context.is_jupyter_notebook(message.filename()) {
|
||||||
// We can't give a reasonable location for the structured formats,
|
// We can't give a reasonable location for the structured formats,
|
||||||
// so we show one that's clearly a fallback
|
// so we show one that's clearly a fallback
|
||||||
|
@ -64,8 +68,6 @@ impl Serialize for SerializedMessages<'_> {
|
||||||
"end": 1
|
"end": 1
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
let start_location = message.compute_start_location();
|
|
||||||
let end_location = message.compute_end_location();
|
|
||||||
json!({
|
json!({
|
||||||
"begin": start_location.row,
|
"begin": start_location.row,
|
||||||
"end": end_location.row
|
"end": end_location.row
|
||||||
|
@ -80,7 +82,7 @@ impl Serialize for SerializedMessages<'_> {
|
||||||
let value = json!({
|
let value = json!({
|
||||||
"description": format!("({}) {}", message.kind.rule().noqa_code(), message.kind.body),
|
"description": format!("({}) {}", message.kind.rule().noqa_code(), message.kind.body),
|
||||||
"severity": "major",
|
"severity": "major",
|
||||||
"fingerprint": fingerprint(message),
|
"fingerprint": fingerprint(message, &start_location, &end_location),
|
||||||
"location": {
|
"location": {
|
||||||
"path": path,
|
"path": path,
|
||||||
"lines": lines
|
"lines": lines
|
||||||
|
@ -95,10 +97,14 @@ impl Serialize for SerializedMessages<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate a unique fingerprint to identify a violation.
|
/// Generate a unique fingerprint to identify a violation.
|
||||||
fn fingerprint(message: &Message) -> String {
|
fn fingerprint(
|
||||||
|
message: &Message,
|
||||||
|
start_location: &SourceLocation,
|
||||||
|
end_location: &SourceLocation,
|
||||||
|
) -> String {
|
||||||
let Message {
|
let Message {
|
||||||
kind,
|
kind,
|
||||||
range,
|
range: _,
|
||||||
fix: _fix,
|
fix: _fix,
|
||||||
file,
|
file,
|
||||||
noqa_offset: _,
|
noqa_offset: _,
|
||||||
|
@ -107,7 +113,8 @@ fn fingerprint(message: &Message) -> String {
|
||||||
let mut hasher = DefaultHasher::new();
|
let mut hasher = DefaultHasher::new();
|
||||||
|
|
||||||
kind.rule().hash(&mut hasher);
|
kind.rule().hash(&mut hasher);
|
||||||
range.hash(&mut hasher);
|
start_location.hash(&mut hasher);
|
||||||
|
end_location.hash(&mut hasher);
|
||||||
file.name().hash(&mut hasher);
|
file.name().hash(&mut hasher);
|
||||||
|
|
||||||
format!("{:x}", hasher.finish())
|
format!("{:x}", hasher.finish())
|
||||||
|
|
|
@ -11,7 +11,8 @@ use regex::Regex;
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use ruff_diagnostics::Diagnostic;
|
use ruff_diagnostics::Diagnostic;
|
||||||
use ruff_python_ast::source_code::{LineEnding, Locator};
|
use ruff_python_ast::newlines::LineEnding;
|
||||||
|
use ruff_python_ast::source_code::Locator;
|
||||||
|
|
||||||
use crate::codes::NoqaCode;
|
use crate::codes::NoqaCode;
|
||||||
use crate::registry::{AsRule, Rule, RuleSet};
|
use crate::registry::{AsRule, Rule, RuleSet};
|
||||||
|
@ -511,7 +512,8 @@ mod tests {
|
||||||
use ruff_text_size::{TextRange, TextSize};
|
use ruff_text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
use ruff_diagnostics::Diagnostic;
|
use ruff_diagnostics::Diagnostic;
|
||||||
use ruff_python_ast::source_code::{LineEnding, Locator};
|
use ruff_python_ast::newlines::LineEnding;
|
||||||
|
use ruff_python_ast::source_code::Locator;
|
||||||
|
|
||||||
use crate::noqa::{add_noqa_inner, NoqaMapping, NOQA_LINE_REGEX};
|
use crate::noqa::{add_noqa_inner, NoqaMapping, NOQA_LINE_REGEX};
|
||||||
use crate::rules::pycodestyle::rules::AmbiguousVariableName;
|
use crate::rules::pycodestyle::rules::AmbiguousVariableName;
|
||||||
|
|
|
@ -16,6 +16,7 @@ bitflags = { workspace = true }
|
||||||
is-macro = { workspace = true }
|
is-macro = { workspace = true }
|
||||||
itertools = { workspace = true }
|
itertools = { workspace = true }
|
||||||
log = { workspace = true }
|
log = { workspace = true }
|
||||||
|
memchr = "2.5.0"
|
||||||
num-bigint = { version = "0.4.3" }
|
num-bigint = { version = "0.4.3" }
|
||||||
num-traits = { version = "0.2.15" }
|
num-traits = { version = "0.2.15" }
|
||||||
once_cell = { workspace = true }
|
once_cell = { workspace = true }
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use memchr::{memchr2, memrchr2};
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
use std::iter::FusedIterator;
|
use std::iter::FusedIterator;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
@ -50,6 +51,30 @@ impl<'a> UniversalNewlineIterator<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Finds the next newline character. Returns its position and the [`LineEnding`].
|
||||||
|
#[inline]
|
||||||
|
pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
|
||||||
|
let bytes = text.as_bytes();
|
||||||
|
if let Some(position) = memchr2(b'\n', b'\r', bytes) {
|
||||||
|
// SAFETY: memchr guarantees to return valid positions
|
||||||
|
#[allow(unsafe_code)]
|
||||||
|
let newline_character = unsafe { *bytes.get_unchecked(position) };
|
||||||
|
|
||||||
|
let line_ending = match newline_character {
|
||||||
|
// Explicit branch for `\n` as this is the most likely path
|
||||||
|
b'\n' => LineEnding::Lf,
|
||||||
|
// '\r\n'
|
||||||
|
b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
|
||||||
|
// '\r'
|
||||||
|
_ => LineEnding::Cr,
|
||||||
|
};
|
||||||
|
|
||||||
|
Some((position, line_ending))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
||||||
type Item = Line<'a>;
|
type Item = Line<'a>;
|
||||||
|
|
||||||
|
@ -59,35 +84,25 @@ impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let line = match self.text.find(['\n', '\r']) {
|
let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
|
||||||
// Non-last line
|
let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
|
||||||
Some(line_end) => {
|
|
||||||
let offset: usize = match self.text.as_bytes()[line_end] {
|
|
||||||
// Explicit branch for `\n` as this is the most likely path
|
|
||||||
b'\n' => 1,
|
|
||||||
// '\r\n'
|
|
||||||
b'\r' if self.text.as_bytes().get(line_end + 1) == Some(&b'\n') => 2,
|
|
||||||
// '\r'
|
|
||||||
_ => 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
let (text, remainder) = self.text.split_at(line_end + offset);
|
let line = Line {
|
||||||
|
offset: self.offset,
|
||||||
|
text,
|
||||||
|
};
|
||||||
|
|
||||||
let line = Line {
|
self.text = remainder;
|
||||||
offset: self.offset,
|
self.offset += text.text_len();
|
||||||
text,
|
|
||||||
};
|
|
||||||
|
|
||||||
self.text = remainder;
|
line
|
||||||
self.offset += text.text_len();
|
}
|
||||||
|
// Last line
|
||||||
line
|
else {
|
||||||
}
|
Line {
|
||||||
// Last line
|
|
||||||
None => Line {
|
|
||||||
offset: self.offset,
|
offset: self.offset,
|
||||||
text: std::mem::take(&mut self.text),
|
text: std::mem::take(&mut self.text),
|
||||||
},
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Some(line)
|
Some(line)
|
||||||
|
@ -116,7 +131,7 @@ impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
|
||||||
|
|
||||||
// Find the end of the previous line. The previous line is the text up to, but not including
|
// Find the end of the previous line. The previous line is the text up to, but not including
|
||||||
// the newline character.
|
// the newline character.
|
||||||
let line = if let Some(line_end) = haystack.rfind(['\n', '\r']) {
|
let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
|
||||||
// '\n' or '\r' or '\r\n'
|
// '\n' or '\r' or '\r\n'
|
||||||
let (remainder, line) = self.text.split_at(line_end + 1);
|
let (remainder, line) = self.text.split_at(line_end + 1);
|
||||||
self.text = remainder;
|
self.text = remainder;
|
||||||
|
@ -268,6 +283,58 @@ impl PartialEq<Line<'_>> for &str {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The line ending style used in Python source code.
|
||||||
|
/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
|
||||||
|
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||||
|
pub enum LineEnding {
|
||||||
|
Lf,
|
||||||
|
Cr,
|
||||||
|
CrLf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for LineEnding {
|
||||||
|
fn default() -> Self {
|
||||||
|
if cfg!(windows) {
|
||||||
|
LineEnding::CrLf
|
||||||
|
} else {
|
||||||
|
LineEnding::Lf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LineEnding {
|
||||||
|
pub const fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
LineEnding::Lf => "\n",
|
||||||
|
LineEnding::CrLf => "\r\n",
|
||||||
|
LineEnding::Cr => "\r",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::len_without_is_empty)]
|
||||||
|
pub const fn len(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
LineEnding::Lf | LineEnding::Cr => 1,
|
||||||
|
LineEnding::CrLf => 2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn text_len(&self) -> TextSize {
|
||||||
|
match self {
|
||||||
|
LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
|
||||||
|
LineEnding::CrLf => TextSize::new(2),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for LineEnding {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::UniversalNewlineIterator;
|
use super::UniversalNewlineIterator;
|
||||||
|
|
|
@ -9,9 +9,10 @@ use rustpython_parser::ast::{
|
||||||
};
|
};
|
||||||
use rustpython_parser::ConversionFlag;
|
use rustpython_parser::ConversionFlag;
|
||||||
|
|
||||||
|
use crate::newlines::LineEnding;
|
||||||
use ruff_rustpython::vendor::{bytes, str};
|
use ruff_rustpython::vendor::{bytes, str};
|
||||||
|
|
||||||
use crate::source_code::stylist::{Indentation, LineEnding, Quote, Stylist};
|
use crate::source_code::stylist::{Indentation, Quote, Stylist};
|
||||||
|
|
||||||
mod precedence {
|
mod precedence {
|
||||||
pub const ASSIGN: u8 = 3;
|
pub const ASSIGN: u8 = 3;
|
||||||
|
@ -1256,9 +1257,10 @@ impl<'a> Generator<'a> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::newlines::LineEnding;
|
||||||
use rustpython_parser as parser;
|
use rustpython_parser as parser;
|
||||||
|
|
||||||
use crate::source_code::stylist::{Indentation, LineEnding, Quote};
|
use crate::source_code::stylist::{Indentation, Quote};
|
||||||
use crate::source_code::Generator;
|
use crate::source_code::Generator;
|
||||||
|
|
||||||
fn round_trip(contents: &str) -> String {
|
fn round_trip(contents: &str) -> String {
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||||
|
|
||||||
|
use crate::newlines::find_newline;
|
||||||
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
||||||
|
use memchr::{memchr2, memrchr2};
|
||||||
use once_cell::unsync::OnceCell;
|
use once_cell::unsync::OnceCell;
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
use std::ops::Add;
|
use std::ops::Add;
|
||||||
|
@ -68,7 +70,8 @@ impl<'a> Locator<'a> {
|
||||||
/// ## Panics
|
/// ## Panics
|
||||||
/// If `offset` is out of bounds.
|
/// If `offset` is out of bounds.
|
||||||
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
||||||
if let Some(index) = self.contents[TextRange::up_to(offset)].rfind(['\n', '\r']) {
|
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
|
||||||
|
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
|
||||||
// SAFETY: Safe because `index < offset`
|
// SAFETY: Safe because `index < offset`
|
||||||
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
||||||
} else {
|
} else {
|
||||||
|
@ -101,19 +104,8 @@ impl<'a> Locator<'a> {
|
||||||
/// If `offset` is passed the end of the content.
|
/// If `offset` is passed the end of the content.
|
||||||
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
||||||
let slice = &self.contents[usize::from(offset)..];
|
let slice = &self.contents[usize::from(offset)..];
|
||||||
if let Some(index) = slice.find(['\n', '\r']) {
|
if let Some((index, line_ending)) = find_newline(slice) {
|
||||||
let bytes = slice.as_bytes();
|
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
|
||||||
|
|
||||||
// `\r\n`
|
|
||||||
let relative_offset = if bytes[index] == b'\r' && bytes.get(index + 1) == Some(&b'\n') {
|
|
||||||
TextSize::try_from(index + 2).unwrap()
|
|
||||||
}
|
|
||||||
// `\r` or `\n`
|
|
||||||
else {
|
|
||||||
TextSize::try_from(index + 1).unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
offset.add(relative_offset)
|
|
||||||
} else {
|
} else {
|
||||||
self.contents.text_len()
|
self.contents.text_len()
|
||||||
}
|
}
|
||||||
|
@ -139,7 +131,7 @@ impl<'a> Locator<'a> {
|
||||||
/// If `offset` is passed the end of the content.
|
/// If `offset` is passed the end of the content.
|
||||||
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
||||||
let slice = &self.contents[usize::from(offset)..];
|
let slice = &self.contents[usize::from(offset)..];
|
||||||
if let Some(index) = slice.find(['\n', '\r']) {
|
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
|
||||||
offset + TextSize::try_from(index).unwrap()
|
offset + TextSize::try_from(index).unwrap()
|
||||||
} else {
|
} else {
|
||||||
self.contents.text_len()
|
self.contents.text_len()
|
||||||
|
|
|
@ -15,8 +15,7 @@ use rustpython_parser::{lexer, Mode, ParseError};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt::{Debug, Formatter};
|
use std::fmt::{Debug, Formatter};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
pub use stylist::Stylist;
|
||||||
pub use stylist::{LineEnding, Stylist};
|
|
||||||
|
|
||||||
/// Run round-trip source code generation on a given Python code.
|
/// Run round-trip source code generation on a given Python code.
|
||||||
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
|
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
|
||||||
|
|
|
@ -7,6 +7,7 @@ use once_cell::unsync::OnceCell;
|
||||||
use rustpython_parser::lexer::LexResult;
|
use rustpython_parser::lexer::LexResult;
|
||||||
use rustpython_parser::Tok;
|
use rustpython_parser::Tok;
|
||||||
|
|
||||||
|
use crate::newlines::{find_newline, LineEnding};
|
||||||
use ruff_rustpython::vendor;
|
use ruff_rustpython::vendor;
|
||||||
|
|
||||||
use crate::source_code::Locator;
|
use crate::source_code::Locator;
|
||||||
|
@ -29,9 +30,12 @@ impl<'a> Stylist<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn line_ending(&'a self) -> LineEnding {
|
pub fn line_ending(&'a self) -> LineEnding {
|
||||||
*self
|
*self.line_ending.get_or_init(|| {
|
||||||
.line_ending
|
let contents = self.locator.contents();
|
||||||
.get_or_init(|| detect_line_ending(self.locator.contents()).unwrap_or_default())
|
find_newline(contents)
|
||||||
|
.map(|(_, ending)| ending)
|
||||||
|
.unwrap_or_default()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
||||||
|
@ -158,65 +162,13 @@ impl Deref for Indentation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The line ending style used in Python source code.
|
|
||||||
/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
|
|
||||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
|
||||||
pub enum LineEnding {
|
|
||||||
Lf,
|
|
||||||
Cr,
|
|
||||||
CrLf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for LineEnding {
|
|
||||||
fn default() -> Self {
|
|
||||||
if cfg!(windows) {
|
|
||||||
LineEnding::CrLf
|
|
||||||
} else {
|
|
||||||
LineEnding::Lf
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LineEnding {
|
|
||||||
pub const fn as_str(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
LineEnding::CrLf => "\r\n",
|
|
||||||
LineEnding::Lf => "\n",
|
|
||||||
LineEnding::Cr => "\r",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for LineEnding {
|
|
||||||
type Target = str;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
self.as_str()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detect the line ending style of the given contents.
|
|
||||||
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
|
|
||||||
if let Some(position) = contents.find(['\n', '\r']) {
|
|
||||||
let bytes = contents.as_bytes();
|
|
||||||
if bytes[position] == b'\n' {
|
|
||||||
Some(LineEnding::Lf)
|
|
||||||
} else if bytes.get(position.saturating_add(1)) == Some(&b'\n') {
|
|
||||||
Some(LineEnding::CrLf)
|
|
||||||
} else {
|
|
||||||
Some(LineEnding::Cr)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::newlines::{find_newline, LineEnding};
|
||||||
use rustpython_parser::lexer::lex;
|
use rustpython_parser::lexer::lex;
|
||||||
use rustpython_parser::Mode;
|
use rustpython_parser::Mode;
|
||||||
|
|
||||||
use crate::source_code::stylist::{detect_line_ending, Indentation, LineEnding, Quote};
|
use crate::source_code::stylist::{Indentation, Quote};
|
||||||
use crate::source_code::{Locator, Stylist};
|
use crate::source_code::{Locator, Stylist};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -354,15 +306,24 @@ a = "v"
|
||||||
#[test]
|
#[test]
|
||||||
fn line_ending() {
|
fn line_ending() {
|
||||||
let contents = "x = 1";
|
let contents = "x = 1";
|
||||||
assert_eq!(detect_line_ending(contents), None);
|
assert_eq!(find_newline(contents).map(|(_, ending)| ending), None);
|
||||||
|
|
||||||
let contents = "x = 1\n";
|
let contents = "x = 1\n";
|
||||||
assert_eq!(detect_line_ending(contents), Some(LineEnding::Lf));
|
assert_eq!(
|
||||||
|
find_newline(contents).map(|(_, ending)| ending),
|
||||||
|
Some(LineEnding::Lf)
|
||||||
|
);
|
||||||
|
|
||||||
let contents = "x = 1\r";
|
let contents = "x = 1\r";
|
||||||
assert_eq!(detect_line_ending(contents), Some(LineEnding::Cr));
|
assert_eq!(
|
||||||
|
find_newline(contents).map(|(_, ending)| ending),
|
||||||
|
Some(LineEnding::Cr)
|
||||||
|
);
|
||||||
|
|
||||||
let contents = "x = 1\r\n";
|
let contents = "x = 1\r\n";
|
||||||
assert_eq!(detect_line_ending(contents), Some(LineEnding::CrLf));
|
assert_eq!(
|
||||||
|
find_newline(contents).map(|(_, ending)| ending),
|
||||||
|
Some(LineEnding::CrLf)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue