mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-03 15:15:33 +00:00
Extend shrinking script to also remove tokens and characters (#5898)
This shrinks a good bit more than previously, which was helpful for all the formatter bugs. fwiw i treat this as a very ad-hoc script since it's mainly my ecosystem bug processing companion.
This commit is contained in:
parent
6fd8574a0b
commit
c2b7b46717
1 changed files with 64 additions and 20 deletions
|
@ -37,6 +37,7 @@ use ruff_python_ast::statement_visitor::{walk_body, walk_stmt, StatementVisitor}
|
||||||
use ruff_python_ast::visitor::{walk_expr, Visitor};
|
use ruff_python_ast::visitor::{walk_expr, Visitor};
|
||||||
use rustpython_ast::text_size::TextRange;
|
use rustpython_ast::text_size::TextRange;
|
||||||
use rustpython_ast::{Expr, Ranged, Stmt, Suite};
|
use rustpython_ast::{Expr, Ranged, Stmt, Suite};
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::{Command, ExitCode};
|
use std::process::{Command, ExitCode};
|
||||||
use std::str;
|
use std::str;
|
||||||
|
@ -47,9 +48,10 @@ const STRATEGIES: &[&dyn Strategy] = &[
|
||||||
(&StrategyRemoveModuleMember),
|
(&StrategyRemoveModuleMember),
|
||||||
(&StrategyRemoveStatement),
|
(&StrategyRemoveStatement),
|
||||||
(&StrategyRemoveExpression),
|
(&StrategyRemoveExpression),
|
||||||
(&StrategyReplaceStatementWithPass),
|
|
||||||
(&StrategyRemoveLine),
|
(&StrategyRemoveLine),
|
||||||
(&StrategyRemoveNewline),
|
(&StrategyRemoveNewline),
|
||||||
|
(&StrategyRemoveToken),
|
||||||
|
(&StrategyRemoveChar),
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Each strategy is a way of producing possible minimizations
|
/// Each strategy is a way of producing possible minimizations
|
||||||
|
@ -160,22 +162,6 @@ impl Strategy for StrategyRemoveStatement {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A body is invalid without any statements, but maybe replacing it with a pass works.
|
|
||||||
struct StrategyReplaceStatementWithPass;
|
|
||||||
|
|
||||||
impl Strategy for StrategyReplaceStatementWithPass {
|
|
||||||
fn name(&self) -> &'static str {
|
|
||||||
"replace statement with pass"
|
|
||||||
}
|
|
||||||
fn candidates<'a>(
|
|
||||||
&self,
|
|
||||||
input: &'a str,
|
|
||||||
ast: &'a Suite,
|
|
||||||
) -> Result<Box<dyn ExactSizeStringIter + 'a>> {
|
|
||||||
Ok(strategy_statement(input, ast, true))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Finds the ranges of all expressions.
|
/// Finds the ranges of all expressions.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct ExpressionCollector {
|
struct ExpressionCollector {
|
||||||
|
@ -269,12 +255,62 @@ impl Strategy for StrategyRemoveNewline {
|
||||||
.collect();
|
.collect();
|
||||||
let iter = newline_positions.into_iter().map(move |newline_position| {
|
let iter = newline_positions.into_iter().map(move |newline_position| {
|
||||||
// trim to remove the indentation
|
// trim to remove the indentation
|
||||||
input[..newline_position].to_string() + input[newline_position + 1..].trim_start()
|
input[..newline_position].to_string()
|
||||||
|
+ input[newline_position + '\n'.len_utf8()..].trim_start()
|
||||||
});
|
});
|
||||||
Ok(Box::new(iter))
|
Ok(Box::new(iter))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try removing each python token. This is really slow and runs at the end
|
||||||
|
struct StrategyRemoveToken;
|
||||||
|
|
||||||
|
impl Strategy for StrategyRemoveToken {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"remove token"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn candidates<'a>(
|
||||||
|
&self,
|
||||||
|
input: &'a str,
|
||||||
|
_ast: &'a Suite,
|
||||||
|
) -> Result<Box<dyn ExactSizeStringIter + 'a>> {
|
||||||
|
let token_ranges: Vec<_> = ruff_rustpython::tokenize(input)
|
||||||
|
.into_iter()
|
||||||
|
// At this point we know we have valid python code
|
||||||
|
.map(Result::unwrap)
|
||||||
|
.filter(|token| token.1.len().to_usize() > 0)
|
||||||
|
.map(|token| token.1)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let iter = token_ranges.into_iter().map(move |range| {
|
||||||
|
input[..range.start().to_usize()].to_string() + &input[range.end().to_usize()..]
|
||||||
|
});
|
||||||
|
Ok(Box::new(iter))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try removing each individual character in the file. This is really slow and runs at the end
|
||||||
|
struct StrategyRemoveChar;
|
||||||
|
|
||||||
|
impl Strategy for StrategyRemoveChar {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"remove character"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn candidates<'a>(
|
||||||
|
&self,
|
||||||
|
input: &'a str,
|
||||||
|
_ast: &'a Suite,
|
||||||
|
) -> Result<Box<dyn ExactSizeStringIter + 'a>> {
|
||||||
|
let char_indices: Vec<_> = input.char_indices().collect();
|
||||||
|
let iter = char_indices
|
||||||
|
.into_iter()
|
||||||
|
.map(move |(pos, char)| input[..pos].to_string() + &input[pos + char.len_utf8()..]);
|
||||||
|
Ok(Box::new(iter))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns strategy, posing in the iteration (so they can be skipped in the next attempt) and
|
/// Returns strategy, posing in the iteration (so they can be skipped in the next attempt) and
|
||||||
/// minimized code.
|
/// minimized code.
|
||||||
fn minimization_step(
|
fn minimization_step(
|
||||||
|
@ -384,6 +420,7 @@ fn run() -> Result<()> {
|
||||||
let command_args = shlex::split(&args.command).context("Couldn't split command input")?;
|
let command_args = shlex::split(&args.command).context("Couldn't split command input")?;
|
||||||
|
|
||||||
let loop_start = Instant::now();
|
let loop_start = Instant::now();
|
||||||
|
let mut stats = HashMap::new();
|
||||||
|
|
||||||
let mut num_iterations = 0;
|
let mut num_iterations = 0;
|
||||||
// normalize line endings for the remove newline dependent rules
|
// normalize line endings for the remove newline dependent rules
|
||||||
|
@ -402,22 +439,29 @@ fn run() -> Result<()> {
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
if let Some((strategy, idx, smaller_failure)) = smaller_failure {
|
if let Some((strategy, idx, smaller_failure)) = smaller_failure {
|
||||||
println!(
|
println!(
|
||||||
"Match found with {} {idx} in {:.1}s, {} bytes remaining",
|
"Match found with {} {idx} in {:.2}s, {} bytes remaining",
|
||||||
strategy.name(),
|
strategy.name(),
|
||||||
duration.as_secs_f32(),
|
duration.as_secs_f32(),
|
||||||
smaller_failure.len()
|
smaller_failure.len()
|
||||||
);
|
);
|
||||||
|
*stats.entry(strategy.name()).or_insert(0) += 1;
|
||||||
input = smaller_failure;
|
input = smaller_failure;
|
||||||
last_strategy_and_idx = Some((strategy, idx));
|
last_strategy_and_idx = Some((strategy, idx));
|
||||||
} else {
|
} else {
|
||||||
// The last minimization failed, write back the original content
|
// The last minimization failed, write back the original content
|
||||||
fs::write(&args.output_file, input.as_bytes())?;
|
fs::write(&args.output_file, input.as_bytes())?;
|
||||||
|
println!(
|
||||||
|
"Last iteration in {:.2}s, {} bytes remaining",
|
||||||
|
duration.as_secs_f32(),
|
||||||
|
input.as_bytes().len()
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
println!("Strategies taken: {stats:?}");
|
||||||
println!(
|
println!(
|
||||||
"Done with {num_iterations} iterations in {:.1}s. Find your minimized example in {}",
|
"Done with {num_iterations} iterations in {:.2}s. Find your minimized example in {}",
|
||||||
loop_start.elapsed().as_secs_f32(),
|
loop_start.elapsed().as_secs_f32(),
|
||||||
args.output_file.display()
|
args.output_file.display()
|
||||||
);
|
);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue