[numpy] numpy-legacy-random (#2960)

The new `Generator` in NumPy uses bits provided by [PCG64](https://numpy.org/doc/stable/reference/random/bit_generators/pcg64.html#numpy.random.PCG64) which has better statistical properties than the legacy [MT19937](https://numpy.org/doc/stable/reference/random/bit_generators/mt19937.html#numpy.random.MT19937) used in [RandomState](https://numpy.org/doc/stable/reference/random/legacy.html#numpy.random.RandomState). Global random functions can also be problematic with parallel processing.

This rule is probably quite useful for data scientists (perhaps in combination with `nbqa`)

References:
- [Legacy Random Generation](https://numpy.org/doc/stable/reference/random/legacy.html#legacy)
- [Random Sampling](https://numpy.org/doc/stable/reference/random/index.html#random-quick-start)
- [Using PyTorch + NumPy? You're making a mistake.](https://tanelp.github.io/posts/a-bug-that-plagues-thousands-of-open-source-ml-projects/)
This commit is contained in:
Simon Brugman 2023-02-17 03:06:30 +01:00 committed by GitHub
parent e081455b06
commit 34664a0ca0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 757 additions and 0 deletions

View file

@ -1506,6 +1506,7 @@ For more, see [tryceratops](https://pypi.org/project/tryceratops/1.1.0/) on PyPI
| Code | Name | Message | Fix |
| ---- | ---- | ------- | --- |
| NPY001 | [numpy-deprecated-type-alias](https://beta.ruff.rs/docs/rules/numpy-deprecated-type-alias/) | Type alias `np.{type_name}` is deprecated, replace with builtin type | 🛠 |
| NPY002 | [numpy-legacy-random](https://beta.ruff.rs/docs/rules/numpy-legacy-random/) | Replace legacy `np.random.{method_name}` call with `np.random.Generator` | |
### Ruff-specific rules (RUF)

View file

@ -0,0 +1,62 @@
# Do this (new version)
from numpy.random import default_rng
rng = default_rng()
vals = rng.standard_normal(10)
more_vals = rng.standard_normal(10)
numbers = rng.integers(high, size=5)
# instead of this (legacy version)
from numpy import random
vals = random.standard_normal(10)
more_vals = random.standard_normal(10)
numbers = random.integers(high, size=5)
import numpy
numpy.random.seed()
numpy.random.get_state()
numpy.random.set_state()
numpy.random.rand()
numpy.random.randn()
numpy.random.randint()
numpy.random.random_integers()
numpy.random.random_sample()
numpy.random.choice()
numpy.random.bytes()
numpy.random.shuffle()
numpy.random.permutation()
numpy.random.beta()
numpy.random.binomial()
numpy.random.chisquare()
numpy.random.dirichlet()
numpy.random.exponential()
numpy.random.f()
numpy.random.gamma()
numpy.random.geometric()
numpy.random.get_state()
numpy.random.gumbel()
numpy.random.hypergeometric()
numpy.random.laplace()
numpy.random.logistic()
numpy.random.lognormal()
numpy.random.logseries()
numpy.random.multinomial()
numpy.random.multivariate_normal()
numpy.random.negative_binomial()
numpy.random.noncentral_chisquare()
numpy.random.noncentral_f()
numpy.random.normal()
numpy.random.pareto()
numpy.random.poisson()
numpy.random.power()
numpy.random.rayleigh()
numpy.random.standard_cauchy()
numpy.random.standard_exponential()
numpy.random.standard_gamma()
numpy.random.standard_normal()
numpy.random.standard_t()
numpy.random.triangular()
numpy.random.uniform()
numpy.random.vonmises()
numpy.random.wald()
numpy.random.weibull()
numpy.random.zipf()

View file

@ -2841,6 +2841,11 @@ where
flake8_use_pathlib::helpers::replaceable_by_pathlib(self, func);
}
// numpy
if self.settings.rules.enabled(&Rule::NumpyLegacyRandom) {
numpy::rules::numpy_legacy_random(self, func);
}
// flake8-logging-format
if self.settings.rules.enabled(&Rule::LoggingStringFormat)
|| self.settings.rules.enabled(&Rule::LoggingPercentFormat)

View file

@ -590,6 +590,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
// numpy
(Numpy, "001") => Rule::NumpyDeprecatedTypeAlias,
(Numpy, "002") => Rule::NumpyLegacyRandom,
// ruff
(Ruff, "001") => Rule::AmbiguousUnicodeCharacterString,

View file

@ -552,6 +552,7 @@ ruff_macros::register_rules!(
rules::flake8_self::rules::PrivateMemberAccess,
// numpy
rules::numpy::rules::NumpyDeprecatedTypeAlias,
rules::numpy::rules::NumpyLegacyRandom,
// ruff
rules::ruff::rules::AmbiguousUnicodeCharacterString,
rules::ruff::rules::AmbiguousUnicodeCharacterDocstring,

View file

@ -14,6 +14,7 @@ mod tests {
use crate::{assert_yaml_snapshot, settings};
#[test_case(Rule::NumpyDeprecatedTypeAlias, Path::new("NPY001.py"); "NPY001")]
#[test_case(Rule::NumpyLegacyRandom, Path::new("NPY002.py"); "NPY002")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.as_ref(), path.to_string_lossy());
let diagnostics = test_path(

View file

@ -1,3 +1,5 @@
pub use deprecated_type_alias::{deprecated_type_alias, NumpyDeprecatedTypeAlias};
pub use numpy_legacy_random::{numpy_legacy_random, NumpyLegacyRandom};
mod deprecated_type_alias;
mod numpy_legacy_random;

View file

@ -0,0 +1,128 @@
use ruff_macros::{define_violation, derive_message_formats};
use rustpython_parser::ast::Expr;
use crate::ast::types::Range;
use crate::checkers::ast::Checker;
use crate::registry::Diagnostic;
use crate::violation::Violation;
define_violation!(
/// ## What it does
/// Checks for the use of legacy `np.random` function calls.
///
/// ## Why is this bad?
/// According to the NumPy documentation's [Legacy Random Generation]:
///
/// > The `RandomState` provides access to legacy generators... This class
/// > should only be used if it is essential to have randoms that are
/// > identical to what would have been produced by previous versions of
/// > NumPy.
///
/// The members exposed directly on the `random` module are convenience
/// functions that alias to methods on a global singleton `RandomState`
/// instance. NumPy recommends using a dedicated `Generator` instance
/// rather than the random variate generation methods exposed directly on
/// the `random` module, as the new `Generator` is both faster and has
/// better statistical properties.
///
/// See the documentation on [Random Sampling] and [NEP 19] for further
/// details.
///
/// ## Examples
/// ```python
/// import numpy as np
///
/// np.random.seed(1337)
/// np.random.normal()
/// ```
///
/// Use instead:
/// ```python
/// rng = np.random.default_rng(1337)
/// rng.normal()
/// ```
///
/// [Legacy Random Generation]: https://numpy.org/doc/stable/reference/random/legacy.html#legacy
/// [Random Sampling]: https://numpy.org/doc/stable/reference/random/index.html#random-quick-start
/// [NEP 19]: https://numpy.org/neps/nep-0019-rng-policy.html
pub struct NumpyLegacyRandom {
pub method_name: String,
}
);
impl Violation for NumpyLegacyRandom {
#[derive_message_formats]
fn message(&self) -> String {
let NumpyLegacyRandom { method_name } = self;
format!("Replace legacy `np.random.{method_name}` call with `np.random.Generator`")
}
}
/// NPY002
pub fn numpy_legacy_random(checker: &mut Checker, expr: &Expr) {
if let Some(method_name) = checker.resolve_call_path(expr).and_then(|call_path| {
// seeding state
if call_path.as_slice() == ["numpy", "random", "seed"]
|| call_path.as_slice() == ["numpy", "random", "get_state"]
|| call_path.as_slice() == ["numpy", "random", "set_state"]
// simple random data
|| call_path.as_slice() == ["numpy", "random", "rand"]
|| call_path.as_slice() == ["numpy", "random", "randn"]
|| call_path.as_slice() == ["numpy", "random", "randint"]
|| call_path.as_slice() == ["numpy", "random", "random_integers"]
|| call_path.as_slice() == ["numpy", "random", "random_sample"]
|| call_path.as_slice() == ["numpy", "random", "choice"]
|| call_path.as_slice() == ["numpy", "random", "bytes"]
// permutations
|| call_path.as_slice() == ["numpy", "random", "shuffle"]
|| call_path.as_slice() == ["numpy", "random", "permutation"]
// distributions
|| call_path.as_slice() == ["numpy", "random", "beta"]
|| call_path.as_slice() == ["numpy", "random", "binomial"]
|| call_path.as_slice() == ["numpy", "random", "chisquare"]
|| call_path.as_slice() == ["numpy", "random", "dirichlet"]
|| call_path.as_slice() == ["numpy", "random", "exponential"]
|| call_path.as_slice() == ["numpy", "random", "f"]
|| call_path.as_slice() == ["numpy", "random", "gamma"]
|| call_path.as_slice() == ["numpy", "random", "geometric"]
|| call_path.as_slice() == ["numpy", "random", "get_state"]
|| call_path.as_slice() == ["numpy", "random", "gumbel"]
|| call_path.as_slice() == ["numpy", "random", "hypergeometric"]
|| call_path.as_slice() == ["numpy", "random", "laplace"]
|| call_path.as_slice() == ["numpy", "random", "logistic"]
|| call_path.as_slice() == ["numpy", "random", "lognormal"]
|| call_path.as_slice() == ["numpy", "random", "logseries"]
|| call_path.as_slice() == ["numpy", "random", "multinomial"]
|| call_path.as_slice() == ["numpy", "random", "multivariate_normal"]
|| call_path.as_slice() == ["numpy", "random", "negative_binomial"]
|| call_path.as_slice() == ["numpy", "random", "noncentral_chisquare"]
|| call_path.as_slice() == ["numpy", "random", "noncentral_f"]
|| call_path.as_slice() == ["numpy", "random", "normal"]
|| call_path.as_slice() == ["numpy", "random", "pareto"]
|| call_path.as_slice() == ["numpy", "random", "poisson"]
|| call_path.as_slice() == ["numpy", "random", "power"]
|| call_path.as_slice() == ["numpy", "random", "rayleigh"]
|| call_path.as_slice() == ["numpy", "random", "standard_cauchy"]
|| call_path.as_slice() == ["numpy", "random", "standard_exponential"]
|| call_path.as_slice() == ["numpy", "random", "standard_gamma"]
|| call_path.as_slice() == ["numpy", "random", "standard_normal"]
|| call_path.as_slice() == ["numpy", "random", "standard_t"]
|| call_path.as_slice() == ["numpy", "random", "triangular"]
|| call_path.as_slice() == ["numpy", "random", "uniform"]
|| call_path.as_slice() == ["numpy", "random", "vonmises"]
|| call_path.as_slice() == ["numpy", "random", "wald"]
|| call_path.as_slice() == ["numpy", "random", "weibull"]
|| call_path.as_slice() == ["numpy", "random", "zipf"]
{
Some(call_path[2])
} else {
None
}
}) {
checker.diagnostics.push(Diagnostic::new(
NumpyLegacyRandom {
method_name: method_name.to_string(),
},
Range::from_located(expr),
));
}
}

View file

@ -0,0 +1,555 @@
---
source: crates/ruff/src/rules/numpy/mod.rs
expression: diagnostics
---
- kind:
NumpyLegacyRandom:
method_name: standard_normal
location:
row: 10
column: 7
end_location:
row: 10
column: 29
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_normal
location:
row: 11
column: 12
end_location:
row: 11
column: 34
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: seed
location:
row: 15
column: 0
end_location:
row: 15
column: 17
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: get_state
location:
row: 16
column: 0
end_location:
row: 16
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: set_state
location:
row: 17
column: 0
end_location:
row: 17
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: rand
location:
row: 18
column: 0
end_location:
row: 18
column: 17
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: randn
location:
row: 19
column: 0
end_location:
row: 19
column: 18
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: randint
location:
row: 20
column: 0
end_location:
row: 20
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: random_integers
location:
row: 21
column: 0
end_location:
row: 21
column: 28
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: random_sample
location:
row: 22
column: 0
end_location:
row: 22
column: 26
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: choice
location:
row: 23
column: 0
end_location:
row: 23
column: 19
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: bytes
location:
row: 24
column: 0
end_location:
row: 24
column: 18
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: shuffle
location:
row: 25
column: 0
end_location:
row: 25
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: permutation
location:
row: 26
column: 0
end_location:
row: 26
column: 24
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: beta
location:
row: 27
column: 0
end_location:
row: 27
column: 17
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: binomial
location:
row: 28
column: 0
end_location:
row: 28
column: 21
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: chisquare
location:
row: 29
column: 0
end_location:
row: 29
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: dirichlet
location:
row: 30
column: 0
end_location:
row: 30
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: exponential
location:
row: 31
column: 0
end_location:
row: 31
column: 24
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: f
location:
row: 32
column: 0
end_location:
row: 32
column: 14
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: gamma
location:
row: 33
column: 0
end_location:
row: 33
column: 18
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: geometric
location:
row: 34
column: 0
end_location:
row: 34
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: get_state
location:
row: 35
column: 0
end_location:
row: 35
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: gumbel
location:
row: 36
column: 0
end_location:
row: 36
column: 19
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: hypergeometric
location:
row: 37
column: 0
end_location:
row: 37
column: 27
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: laplace
location:
row: 38
column: 0
end_location:
row: 38
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: logistic
location:
row: 39
column: 0
end_location:
row: 39
column: 21
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: lognormal
location:
row: 40
column: 0
end_location:
row: 40
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: logseries
location:
row: 41
column: 0
end_location:
row: 41
column: 22
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: multinomial
location:
row: 42
column: 0
end_location:
row: 42
column: 24
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: multivariate_normal
location:
row: 43
column: 0
end_location:
row: 43
column: 32
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: negative_binomial
location:
row: 44
column: 0
end_location:
row: 44
column: 30
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: noncentral_chisquare
location:
row: 45
column: 0
end_location:
row: 45
column: 33
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: noncentral_f
location:
row: 46
column: 0
end_location:
row: 46
column: 25
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: normal
location:
row: 47
column: 0
end_location:
row: 47
column: 19
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: pareto
location:
row: 48
column: 0
end_location:
row: 48
column: 19
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: poisson
location:
row: 49
column: 0
end_location:
row: 49
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: power
location:
row: 50
column: 0
end_location:
row: 50
column: 18
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: rayleigh
location:
row: 51
column: 0
end_location:
row: 51
column: 21
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_cauchy
location:
row: 52
column: 0
end_location:
row: 52
column: 28
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_exponential
location:
row: 53
column: 0
end_location:
row: 53
column: 33
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_gamma
location:
row: 54
column: 0
end_location:
row: 54
column: 27
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_normal
location:
row: 55
column: 0
end_location:
row: 55
column: 28
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: standard_t
location:
row: 56
column: 0
end_location:
row: 56
column: 23
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: triangular
location:
row: 57
column: 0
end_location:
row: 57
column: 23
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: uniform
location:
row: 58
column: 0
end_location:
row: 58
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: vonmises
location:
row: 59
column: 0
end_location:
row: 59
column: 21
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: wald
location:
row: 60
column: 0
end_location:
row: 60
column: 17
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: weibull
location:
row: 61
column: 0
end_location:
row: 61
column: 20
fix: ~
parent: ~
- kind:
NumpyLegacyRandom:
method_name: zipf
location:
row: 62
column: 0
end_location:
row: 62
column: 17
fix: ~
parent: ~

View file

@ -1673,6 +1673,7 @@
"NPY0",
"NPY00",
"NPY001",
"NPY002",
"PD",
"PD0",
"PD00",