Move RustPython vendored and helper code into its own crate (#3171)

This commit is contained in:
Charlie Marsh 2023-02-23 09:14:16 -05:00 committed by GitHub
parent 0f04aa2a5f
commit 095f005bf4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 58 additions and 57 deletions

View file

@ -0,0 +1,14 @@
[package]
name = "ruff_rustpython"
version = "0.0.0"
publish = false
edition = { workspace = true }
rust-version = { workspace = true }
[lib]
[dependencies]
anyhow = { workspace = true }
once_cell = { workspace = true }
rustpython-common = { workspace = true }
rustpython-parser = { workspace = true }

View file

@ -0,0 +1,30 @@
use rustpython_parser as parser;
use rustpython_parser::ast::{Mod, Suite};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode, ParseError};
pub mod vendor;
/// Collect tokens up to and including the first error.
pub fn tokenize(contents: &str) -> Vec<LexResult> {
let mut tokens: Vec<LexResult> = vec![];
for tok in lexer::lex(contents, Mode::Module) {
let is_err = tok.is_err();
tokens.push(tok);
if is_err {
break;
}
}
tokens
}
/// Parse a full Python program from its tokens.
pub fn parse_program_tokens(
lxr: Vec<LexResult>,
source_path: &str,
) -> anyhow::Result<Suite, ParseError> {
parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top {
Mod::Module { body, .. } => body,
_ => unreachable!(),
})
}

View file

@ -0,0 +1,68 @@
//! Vendored from [bytes.rs in rustpython-common](https://github.com/RustPython/RustPython/blob/1d8269fb729c91fc56064e975172d3a11bd62d07/common/src/bytes.rs).
//! The only changes we make are to remove dead code and make the default quote
//! type configurable.
use crate::vendor;
use crate::vendor::str::Quote;
pub fn repr(b: &[u8], quote: Quote) -> String {
repr_with(b, &[], "", quote)
}
pub fn repr_with(b: &[u8], prefixes: &[&str], suffix: &str, quote: Quote) -> String {
use std::fmt::Write;
let mut out_len = 0usize;
let mut squote = 0;
let mut dquote = 0;
for &ch in b {
let incr = match ch {
b'\'' => {
squote += 1;
1
}
b'"' => {
dquote += 1;
1
}
b'\\' | b'\t' | b'\r' | b'\n' => 2,
0x20..=0x7e => 1,
_ => 4, // \xHH
};
// TODO: OverflowError
out_len = out_len.checked_add(incr).unwrap();
}
let (quote, num_escaped_quotes) = vendor::str::choose_quotes_for_repr(squote, dquote, quote);
// we'll be adding backslashes in front of the existing inner quotes
out_len += num_escaped_quotes;
// 3 is for b prefix + outer quotes
out_len += 3 + prefixes.iter().map(|s| s.len()).sum::<usize>() + suffix.len();
let mut res = String::with_capacity(out_len);
res.extend(prefixes.iter().copied());
res.push('b');
res.push(quote);
for &ch in b {
match ch {
b'\t' => res.push_str("\\t"),
b'\n' => res.push_str("\\n"),
b'\r' => res.push_str("\\r"),
// printable ascii range
0x20..=0x7e => {
let ch = ch as char;
if ch == quote || ch == '\\' {
res.push('\\');
}
res.push(ch);
}
_ => write!(res, "\\x{ch:02x}").unwrap(),
}
}
res.push(quote);
res.push_str(suffix);
res
}

View file

@ -0,0 +1,2 @@
pub mod bytes;
pub mod str;

182
crates/ruff_rustpython/src/vendor/str.rs vendored Normal file
View file

@ -0,0 +1,182 @@
//! Vendored from [str.rs in rustpython-common](https://github.com/RustPython/RustPython/blob/1d8269fb729c91fc56064e975172d3a11bd62d07/common/src/str.rs).
//! The only changes we make are to remove dead code and make the default quote
//! type configurable.
use std::fmt;
use once_cell::unsync::OnceCell;
#[derive(Debug, Clone, Copy)]
pub enum Quote {
Single,
Double,
}
/// Get a Display-able type that formats to the python `repr()` of the string
/// value.
#[inline]
pub fn repr(s: &str, quote: Quote) -> Repr<'_> {
Repr {
s,
quote,
info: OnceCell::new(),
}
}
#[derive(Debug, Copy, Clone)]
#[non_exhaustive]
pub struct ReprOverflowError;
impl fmt::Display for ReprOverflowError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("string is too long to generate repr")
}
}
#[derive(Copy, Clone)]
struct ReprInfo {
dquoted: bool,
out_len: usize,
}
impl ReprInfo {
fn get(s: &str, quote: Quote) -> Result<Self, ReprOverflowError> {
let mut out_len = 0usize;
let mut squote = 0;
let mut dquote = 0;
for ch in s.chars() {
let incr = match ch {
'\'' => {
squote += 1;
1
}
'"' => {
dquote += 1;
1
}
'\\' | '\t' | '\r' | '\n' => 2,
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
ch if ch.is_ascii() => 1,
ch if rustpython_common::char::is_printable(ch) => {
// max = std::cmp::max(ch, max);
ch.len_utf8()
}
ch if (ch as u32) < 0x100 => 4, // \xHH
ch if (ch as u32) < 0x10000 => 6, // \uHHHH
_ => 10, // \uHHHHHHHH
};
out_len += incr;
if out_len > std::isize::MAX as usize {
return Err(ReprOverflowError);
}
}
let (quote, num_escaped_quotes) = choose_quotes_for_repr(squote, dquote, quote);
// we'll be adding backslashes in front of the existing inner quotes
out_len += num_escaped_quotes;
// start and ending quotes
out_len += 2;
let dquoted = quote == '"';
Ok(ReprInfo { dquoted, out_len })
}
}
pub struct Repr<'a> {
s: &'a str,
// the quote type we prefer to use
quote: Quote,
// the tuple is dquouted, out_len
info: OnceCell<Result<ReprInfo, ReprOverflowError>>,
}
impl Repr<'_> {
fn get_info(&self) -> Result<ReprInfo, ReprOverflowError> {
*self.info.get_or_init(|| ReprInfo::get(self.s, self.quote))
}
fn _fmt<W: fmt::Write>(&self, repr: &mut W, info: ReprInfo) -> fmt::Result {
let s = self.s;
let in_len = s.len();
let ReprInfo { dquoted, out_len } = info;
let quote = if dquoted { '"' } else { '\'' };
// if we don't need to escape anything we can just copy
let unchanged = out_len == in_len;
repr.write_char(quote)?;
if unchanged {
repr.write_str(s)?;
} else {
for ch in s.chars() {
match ch {
'\n' => repr.write_str("\\n"),
'\t' => repr.write_str("\\t"),
'\r' => repr.write_str("\\r"),
// these 2 branches *would* be handled below, but we shouldn't have to do a
// unicodedata lookup just for ascii characters
'\x20'..='\x7e' => {
// printable ascii range
if ch == quote || ch == '\\' {
repr.write_char('\\')?;
}
repr.write_char(ch)
}
ch if ch.is_ascii() => {
write!(repr, "\\x{:02x}", ch as u8)
}
ch if rustpython_common::char::is_printable(ch) => repr.write_char(ch),
'\0'..='\u{ff}' => {
write!(repr, "\\x{:02x}", ch as u32)
}
'\0'..='\u{ffff}' => {
write!(repr, "\\u{:04x}", ch as u32)
}
_ => {
write!(repr, "\\U{:08x}", ch as u32)
}
}?;
}
}
repr.write_char(quote)
}
}
impl fmt::Display for Repr<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let info = self.get_info().unwrap();
self._fmt(f, info)
}
}
/// Returns the outer quotes to use and the number of quotes that need to be
/// escaped.
pub(crate) const fn choose_quotes_for_repr(
num_squotes: usize,
num_dquotes: usize,
quote: Quote,
) -> (char, usize) {
match quote {
Quote::Single => {
// always use squote unless we have squotes but no dquotes
let use_dquote = num_squotes > 0 && num_dquotes == 0;
if use_dquote {
('"', num_dquotes)
} else {
('\'', num_squotes)
}
}
Quote::Double => {
// always use dquote unless we have dquotes but no squotes
let use_squote = num_dquotes > 0 && num_squotes == 0;
if use_squote {
('\'', num_squotes)
} else {
('"', num_dquotes)
}
}
}
}