mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-23 13:05:06 +00:00
Move RustPython vendored and helper code into its own crate (#3171)
This commit is contained in:
parent
0f04aa2a5f
commit
095f005bf4
19 changed files with 58 additions and 57 deletions
14
crates/ruff_rustpython/Cargo.toml
Normal file
14
crates/ruff_rustpython/Cargo.toml
Normal file
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
name = "ruff_rustpython"
|
||||
version = "0.0.0"
|
||||
publish = false
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
rustpython-common = { workspace = true }
|
||||
rustpython-parser = { workspace = true }
|
30
crates/ruff_rustpython/src/lib.rs
Normal file
30
crates/ruff_rustpython/src/lib.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use rustpython_parser as parser;
|
||||
use rustpython_parser::ast::{Mod, Suite};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{lexer, Mode, ParseError};
|
||||
|
||||
pub mod vendor;
|
||||
|
||||
/// Collect tokens up to and including the first error.
|
||||
pub fn tokenize(contents: &str) -> Vec<LexResult> {
|
||||
let mut tokens: Vec<LexResult> = vec![];
|
||||
for tok in lexer::lex(contents, Mode::Module) {
|
||||
let is_err = tok.is_err();
|
||||
tokens.push(tok);
|
||||
if is_err {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
/// Parse a full Python program from its tokens.
|
||||
pub fn parse_program_tokens(
|
||||
lxr: Vec<LexResult>,
|
||||
source_path: &str,
|
||||
) -> anyhow::Result<Suite, ParseError> {
|
||||
parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top {
|
||||
Mod::Module { body, .. } => body,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
68
crates/ruff_rustpython/src/vendor/bytes.rs
vendored
Normal file
68
crates/ruff_rustpython/src/vendor/bytes.rs
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
//! Vendored from [bytes.rs in rustpython-common](https://github.com/RustPython/RustPython/blob/1d8269fb729c91fc56064e975172d3a11bd62d07/common/src/bytes.rs).
|
||||
//! The only changes we make are to remove dead code and make the default quote
|
||||
//! type configurable.
|
||||
|
||||
use crate::vendor;
|
||||
use crate::vendor::str::Quote;
|
||||
|
||||
pub fn repr(b: &[u8], quote: Quote) -> String {
|
||||
repr_with(b, &[], "", quote)
|
||||
}
|
||||
|
||||
pub fn repr_with(b: &[u8], prefixes: &[&str], suffix: &str, quote: Quote) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut out_len = 0usize;
|
||||
let mut squote = 0;
|
||||
let mut dquote = 0;
|
||||
|
||||
for &ch in b {
|
||||
let incr = match ch {
|
||||
b'\'' => {
|
||||
squote += 1;
|
||||
1
|
||||
}
|
||||
b'"' => {
|
||||
dquote += 1;
|
||||
1
|
||||
}
|
||||
b'\\' | b'\t' | b'\r' | b'\n' => 2,
|
||||
0x20..=0x7e => 1,
|
||||
_ => 4, // \xHH
|
||||
};
|
||||
// TODO: OverflowError
|
||||
out_len = out_len.checked_add(incr).unwrap();
|
||||
}
|
||||
|
||||
let (quote, num_escaped_quotes) = vendor::str::choose_quotes_for_repr(squote, dquote, quote);
|
||||
// we'll be adding backslashes in front of the existing inner quotes
|
||||
out_len += num_escaped_quotes;
|
||||
|
||||
// 3 is for b prefix + outer quotes
|
||||
out_len += 3 + prefixes.iter().map(|s| s.len()).sum::<usize>() + suffix.len();
|
||||
|
||||
let mut res = String::with_capacity(out_len);
|
||||
res.extend(prefixes.iter().copied());
|
||||
res.push('b');
|
||||
res.push(quote);
|
||||
for &ch in b {
|
||||
match ch {
|
||||
b'\t' => res.push_str("\\t"),
|
||||
b'\n' => res.push_str("\\n"),
|
||||
b'\r' => res.push_str("\\r"),
|
||||
// printable ascii range
|
||||
0x20..=0x7e => {
|
||||
let ch = ch as char;
|
||||
if ch == quote || ch == '\\' {
|
||||
res.push('\\');
|
||||
}
|
||||
res.push(ch);
|
||||
}
|
||||
_ => write!(res, "\\x{ch:02x}").unwrap(),
|
||||
}
|
||||
}
|
||||
res.push(quote);
|
||||
res.push_str(suffix);
|
||||
|
||||
res
|
||||
}
|
2
crates/ruff_rustpython/src/vendor/mod.rs
vendored
Normal file
2
crates/ruff_rustpython/src/vendor/mod.rs
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
pub mod bytes;
|
||||
pub mod str;
|
182
crates/ruff_rustpython/src/vendor/str.rs
vendored
Normal file
182
crates/ruff_rustpython/src/vendor/str.rs
vendored
Normal file
|
@ -0,0 +1,182 @@
|
|||
//! Vendored from [str.rs in rustpython-common](https://github.com/RustPython/RustPython/blob/1d8269fb729c91fc56064e975172d3a11bd62d07/common/src/str.rs).
|
||||
//! The only changes we make are to remove dead code and make the default quote
|
||||
//! type configurable.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use once_cell::unsync::OnceCell;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Quote {
|
||||
Single,
|
||||
Double,
|
||||
}
|
||||
|
||||
/// Get a Display-able type that formats to the python `repr()` of the string
|
||||
/// value.
|
||||
#[inline]
|
||||
pub fn repr(s: &str, quote: Quote) -> Repr<'_> {
|
||||
Repr {
|
||||
s,
|
||||
quote,
|
||||
info: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[non_exhaustive]
|
||||
pub struct ReprOverflowError;
|
||||
|
||||
impl fmt::Display for ReprOverflowError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str("string is too long to generate repr")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct ReprInfo {
|
||||
dquoted: bool,
|
||||
out_len: usize,
|
||||
}
|
||||
|
||||
impl ReprInfo {
|
||||
fn get(s: &str, quote: Quote) -> Result<Self, ReprOverflowError> {
|
||||
let mut out_len = 0usize;
|
||||
let mut squote = 0;
|
||||
let mut dquote = 0;
|
||||
|
||||
for ch in s.chars() {
|
||||
let incr = match ch {
|
||||
'\'' => {
|
||||
squote += 1;
|
||||
1
|
||||
}
|
||||
'"' => {
|
||||
dquote += 1;
|
||||
1
|
||||
}
|
||||
'\\' | '\t' | '\r' | '\n' => 2,
|
||||
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
|
||||
ch if ch.is_ascii() => 1,
|
||||
ch if rustpython_common::char::is_printable(ch) => {
|
||||
// max = std::cmp::max(ch, max);
|
||||
ch.len_utf8()
|
||||
}
|
||||
ch if (ch as u32) < 0x100 => 4, // \xHH
|
||||
ch if (ch as u32) < 0x10000 => 6, // \uHHHH
|
||||
_ => 10, // \uHHHHHHHH
|
||||
};
|
||||
out_len += incr;
|
||||
if out_len > std::isize::MAX as usize {
|
||||
return Err(ReprOverflowError);
|
||||
}
|
||||
}
|
||||
|
||||
let (quote, num_escaped_quotes) = choose_quotes_for_repr(squote, dquote, quote);
|
||||
// we'll be adding backslashes in front of the existing inner quotes
|
||||
out_len += num_escaped_quotes;
|
||||
|
||||
// start and ending quotes
|
||||
out_len += 2;
|
||||
|
||||
let dquoted = quote == '"';
|
||||
|
||||
Ok(ReprInfo { dquoted, out_len })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Repr<'a> {
|
||||
s: &'a str,
|
||||
// the quote type we prefer to use
|
||||
quote: Quote,
|
||||
// the tuple is dquouted, out_len
|
||||
info: OnceCell<Result<ReprInfo, ReprOverflowError>>,
|
||||
}
|
||||
|
||||
impl Repr<'_> {
|
||||
fn get_info(&self) -> Result<ReprInfo, ReprOverflowError> {
|
||||
*self.info.get_or_init(|| ReprInfo::get(self.s, self.quote))
|
||||
}
|
||||
|
||||
fn _fmt<W: fmt::Write>(&self, repr: &mut W, info: ReprInfo) -> fmt::Result {
|
||||
let s = self.s;
|
||||
let in_len = s.len();
|
||||
let ReprInfo { dquoted, out_len } = info;
|
||||
|
||||
let quote = if dquoted { '"' } else { '\'' };
|
||||
// if we don't need to escape anything we can just copy
|
||||
let unchanged = out_len == in_len;
|
||||
|
||||
repr.write_char(quote)?;
|
||||
if unchanged {
|
||||
repr.write_str(s)?;
|
||||
} else {
|
||||
for ch in s.chars() {
|
||||
match ch {
|
||||
'\n' => repr.write_str("\\n"),
|
||||
'\t' => repr.write_str("\\t"),
|
||||
'\r' => repr.write_str("\\r"),
|
||||
// these 2 branches *would* be handled below, but we shouldn't have to do a
|
||||
// unicodedata lookup just for ascii characters
|
||||
'\x20'..='\x7e' => {
|
||||
// printable ascii range
|
||||
if ch == quote || ch == '\\' {
|
||||
repr.write_char('\\')?;
|
||||
}
|
||||
repr.write_char(ch)
|
||||
}
|
||||
ch if ch.is_ascii() => {
|
||||
write!(repr, "\\x{:02x}", ch as u8)
|
||||
}
|
||||
ch if rustpython_common::char::is_printable(ch) => repr.write_char(ch),
|
||||
'\0'..='\u{ff}' => {
|
||||
write!(repr, "\\x{:02x}", ch as u32)
|
||||
}
|
||||
'\0'..='\u{ffff}' => {
|
||||
write!(repr, "\\u{:04x}", ch as u32)
|
||||
}
|
||||
_ => {
|
||||
write!(repr, "\\U{:08x}", ch as u32)
|
||||
}
|
||||
}?;
|
||||
}
|
||||
}
|
||||
repr.write_char(quote)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Repr<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let info = self.get_info().unwrap();
|
||||
self._fmt(f, info)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the outer quotes to use and the number of quotes that need to be
|
||||
/// escaped.
|
||||
pub(crate) const fn choose_quotes_for_repr(
|
||||
num_squotes: usize,
|
||||
num_dquotes: usize,
|
||||
quote: Quote,
|
||||
) -> (char, usize) {
|
||||
match quote {
|
||||
Quote::Single => {
|
||||
// always use squote unless we have squotes but no dquotes
|
||||
let use_dquote = num_squotes > 0 && num_dquotes == 0;
|
||||
if use_dquote {
|
||||
('"', num_dquotes)
|
||||
} else {
|
||||
('\'', num_squotes)
|
||||
}
|
||||
}
|
||||
Quote::Double => {
|
||||
// always use dquote unless we have dquotes but no squotes
|
||||
let use_squote = num_dquotes > 0 && num_squotes == 0;
|
||||
if use_squote {
|
||||
('\'', num_squotes)
|
||||
} else {
|
||||
('"', num_dquotes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue