Check lower bounds for numeric literals, and permit 128-bit literals

This commit is contained in:
ayazhafiz 2022-02-02 23:35:57 -05:00
parent a8b02831e5
commit 097c5afc73
16 changed files with 883 additions and 220 deletions

View file

@ -1,5 +1,5 @@
use crate::env::Env;
use crate::expr::Expr;
use crate::expr::{Expr, IntValue};
use roc_parse::ast::Base;
use roc_problem::can::Problem;
use roc_problem::can::RuntimeError::*;
@ -9,11 +9,6 @@ use roc_types::subs::VarStore;
use std::i64;
use std::str;
// TODO use rust's integer parsing again
//
// We're waiting for libcore here, see https://github.com/rust-lang/rust/issues/22639
// There is a nightly API for exposing the parse error.
#[inline(always)]
pub fn num_expr_from_result(
var_store: &mut VarStore,
@ -55,7 +50,7 @@ pub fn num_expr_from_result(
#[inline(always)]
pub fn int_expr_from_result(
var_store: &mut VarStore,
result: Result<(&str, i128, NumericBound<IntWidth>), (&str, IntErrorKind)>,
result: Result<(&str, IntValue, NumericBound<IntWidth>), (&str, IntErrorKind)>,
region: Region,
base: Base,
env: &mut Env,
@ -106,9 +101,9 @@ pub fn float_expr_from_result(
}
pub enum ParsedNumResult {
Int(i64, NumericBound<IntWidth>),
Int(IntValue, NumericBound<IntWidth>),
Float(f64, NumericBound<FloatWidth>),
UnknownNum(i64),
UnknownNum(IntValue),
}
#[inline(always)]
@ -116,7 +111,7 @@ pub fn finish_parsing_num(raw: &str) -> Result<ParsedNumResult, (&str, IntErrorK
// Ignore underscores.
let radix = 10;
let (num, bound) =
from_str_radix::<i64>(raw.replace("_", "").as_str(), radix).map_err(|e| (raw, e.kind))?;
from_str_radix(raw.replace("_", "").as_str(), radix).map_err(|e| (raw, e))?;
// Let's try to specialize the number
Ok(match bound {
NumericBound::None => ParsedNumResult::UnknownNum(num),
@ -124,7 +119,11 @@ pub fn finish_parsing_num(raw: &str) -> Result<ParsedNumResult, (&str, IntErrorK
ParsedNumResult::Int(num, NumericBound::Exact(iw))
}
NumericBound::Exact(NumWidth::Float(fw)) => {
ParsedNumResult::Float(num as f64, NumericBound::Exact(fw))
let num = match num {
IntValue::I128(n) => n as f64,
IntValue::U128(n) => n as f64,
};
ParsedNumResult::Float(num, NumericBound::Exact(fw))
}
})
}
@ -134,7 +133,7 @@ pub fn finish_parsing_base(
raw: &str,
base: Base,
is_negative: bool,
) -> Result<(i64, NumericBound<IntWidth>), (&str, IntErrorKind)> {
) -> Result<(IntValue, NumericBound<IntWidth>), (&str, IntErrorKind)> {
let radix = match base {
Base::Hex => 16,
Base::Decimal => 10,
@ -144,11 +143,10 @@ pub fn finish_parsing_base(
// Ignore underscores, insert - when negative to get correct underflow/overflow behavior
(if is_negative {
from_str_radix::<i64>(format!("-{}", raw.replace("_", "")).as_str(), radix)
from_str_radix(format!("-{}", raw.replace("_", "")).as_str(), radix)
} else {
from_str_radix::<i64>(raw.replace("_", "").as_str(), radix)
from_str_radix(raw.replace("_", "").as_str(), radix)
})
.map_err(|e| e.kind)
.and_then(|(n, bound)| {
let bound = match bound {
NumericBound::None => NumericBound::None,
@ -164,15 +162,12 @@ pub fn finish_parsing_base(
pub fn finish_parsing_float(
raw: &str,
) -> Result<(f64, NumericBound<FloatWidth>), (&str, FloatErrorKind)> {
let (bound, raw_without_suffix) = parse_literal_suffix(raw.as_bytes());
// Safety: `raw` is valid UTF8, and `parse_literal_suffix` will only chop UTF8
// characters off the end, if it chops off anything at all.
let raw_without_suffix = unsafe { str::from_utf8_unchecked(raw_without_suffix) };
let (opt_bound, raw_without_suffix) = parse_literal_suffix(raw);
let bound = match bound {
NumericBound::None => NumericBound::None,
NumericBound::Exact(NumWidth::Float(fw)) => NumericBound::Exact(fw),
NumericBound::Exact(NumWidth::Int(_)) => return Err((raw, FloatErrorKind::IntSuffix)),
let bound = match opt_bound {
None => NumericBound::None,
Some(NumWidth::Float(fw)) => NumericBound::Exact(fw),
Some(NumWidth::Int(_)) => return Err((raw, FloatErrorKind::IntSuffix)),
};
// Ignore underscores.
@ -189,33 +184,33 @@ pub fn finish_parsing_float(
}
}
fn parse_literal_suffix(num_str: &[u8]) -> (NumericBound<NumWidth>, &[u8]) {
fn parse_literal_suffix(num_str: &str) -> (Option<NumWidth>, &str) {
macro_rules! parse_num_suffix {
($($suffix:expr, $width:expr)*) => {$(
if num_str.ends_with($suffix) {
return (NumericBound::Exact($width), num_str.get(0..num_str.len() - $suffix.len()).unwrap());
return (Some($width), num_str.get(0..num_str.len() - $suffix.len()).unwrap());
}
)*}
}
parse_num_suffix! {
b"u8", NumWidth::Int(IntWidth::U8)
b"u16", NumWidth::Int(IntWidth::U16)
b"u32", NumWidth::Int(IntWidth::U32)
b"u64", NumWidth::Int(IntWidth::U64)
b"u128", NumWidth::Int(IntWidth::U128)
b"i8", NumWidth::Int(IntWidth::I8)
b"i16", NumWidth::Int(IntWidth::I16)
b"i32", NumWidth::Int(IntWidth::I32)
b"i64", NumWidth::Int(IntWidth::I64)
b"i128", NumWidth::Int(IntWidth::I128)
b"nat", NumWidth::Int(IntWidth::Nat)
b"dec", NumWidth::Float(FloatWidth::Dec)
b"f32", NumWidth::Float(FloatWidth::F32)
b"f64", NumWidth::Float(FloatWidth::F64)
"u8", NumWidth::Int(IntWidth::U8)
"u16", NumWidth::Int(IntWidth::U16)
"u32", NumWidth::Int(IntWidth::U32)
"u64", NumWidth::Int(IntWidth::U64)
"u128", NumWidth::Int(IntWidth::U128)
"i8", NumWidth::Int(IntWidth::I8)
"i16", NumWidth::Int(IntWidth::I16)
"i32", NumWidth::Int(IntWidth::I32)
"i64", NumWidth::Int(IntWidth::I64)
"i128", NumWidth::Int(IntWidth::I128)
"nat", NumWidth::Int(IntWidth::Nat)
"dec", NumWidth::Float(FloatWidth::Dec)
"f32", NumWidth::Float(FloatWidth::F32)
"f64", NumWidth::Float(FloatWidth::F64)
}
(NumericBound::None, num_str)
(None, num_str)
}
/// Integer parsing code taken from the rust libcore,
@ -226,47 +221,11 @@ fn parse_literal_suffix(num_str: &[u8]) -> (NumericBound<NumWidth>, &[u8]) {
/// the LEGAL_DETAILS file in the root directory of this distribution.
///
/// Thanks to the Rust project and its contributors!
trait FromStrRadixHelper: PartialOrd + Copy {
fn min_value() -> Self;
fn max_value() -> Self;
fn from_u32(u: u32) -> Self;
fn checked_mul(&self, other: u32) -> Option<Self>;
fn checked_sub(&self, other: u32) -> Option<Self>;
fn checked_add(&self, other: u32) -> Option<Self>;
}
macro_rules! doit {
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
#[inline]
fn min_value() -> Self { Self::min_value() }
#[inline]
fn max_value() -> Self { Self::max_value() }
#[inline]
fn from_u32(u: u32) -> Self { u as Self }
#[inline]
fn checked_mul(&self, other: u32) -> Option<Self> {
Self::checked_mul(*self, other as Self)
}
#[inline]
fn checked_sub(&self, other: u32) -> Option<Self> {
Self::checked_sub(*self, other as Self)
}
#[inline]
fn checked_add(&self, other: u32) -> Option<Self> {
Self::checked_add(*self, other as Self)
}
})*)
}
// We only need the i64 implementation, but libcore defines
// doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
doit! { i64 }
fn from_str_radix<T: FromStrRadixHelper>(
fn from_str_radix(
src: &str,
radix: u32,
) -> Result<(T, NumericBound<NumWidth>), ParseIntError> {
) -> Result<(IntValue, NumericBound<NumWidth>), IntErrorKind> {
use self::IntErrorKind::*;
use self::ParseIntError as PIE;
assert!(
(2..=36).contains(&radix),
@ -274,65 +233,117 @@ fn from_str_radix<T: FromStrRadixHelper>(
radix
);
if src.is_empty() {
return Err(PIE { kind: Empty });
}
let (opt_exact_bound, src) = parse_literal_suffix(src);
let is_signed_ty = T::from_u32(0) > T::min_value();
// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();
let (is_positive, digits) = match src[0] {
b'+' => (true, &src[1..]),
b'-' if is_signed_ty => (false, &src[1..]),
_ => (true, src),
use std::num::IntErrorKind as StdIEK;
let result = match i128::from_str_radix(src, radix) {
Ok(result) => IntValue::I128(result),
Err(pie) => match pie.kind() {
StdIEK::Empty => return Err(IntErrorKind::Empty),
StdIEK::InvalidDigit => return Err(IntErrorKind::InvalidDigit),
StdIEK::NegOverflow => return Err(IntErrorKind::Underflow),
StdIEK::PosOverflow => {
// try a u128
match u128::from_str_radix(src, radix) {
Ok(result) => IntValue::U128(result),
Err(pie) => match pie.kind() {
StdIEK::InvalidDigit => return Err(IntErrorKind::InvalidDigit),
StdIEK::PosOverflow => return Err(IntErrorKind::Overflow),
StdIEK::Empty | StdIEK::Zero | StdIEK::NegOverflow => unreachable!(),
_ => unreachable!("I thought all possibilities were exhausted, but std::num added a new one")
},
}
}
StdIEK::Zero => unreachable!("Parsed a i128"),
_ => unreachable!(
"I thought all possibilities were exhausted, but std::num added a new one"
),
},
};
let (bound, digits) = parse_literal_suffix(digits);
let (lower_bound, is_negative) = match result {
IntValue::I128(num) => (lower_bound_of_int(num), num <= 0),
IntValue::U128(_) => (IntWidth::U128, false),
};
if digits.is_empty() {
return Err(PIE { kind: Empty });
match opt_exact_bound {
None => {
// TODO: use the lower bound
Ok((result, NumericBound::None))
}
Some(bound @ NumWidth::Float(_)) => {
// For now, assume floats can represent all integers
// TODO: this is somewhat incorrect, revisit
Ok((result, NumericBound::Exact(bound)))
}
Some(NumWidth::Int(exact_width)) => {
// We need to check if the exact bound >= lower bound.
if exact_width.is_superset(&lower_bound, is_negative) {
// Great! Use the exact bound.
Ok((result, NumericBound::Exact(NumWidth::Int(exact_width))))
} else {
// This is something like 200i8; the lower bound is u8, which holds strictly more
// ints on the positive side than i8 does. Report an error depending on which side
// of the integers we checked.
let err = if is_negative {
UnderflowsSuffix {
suffix_type: exact_width.type_str(),
min_value: exact_width.min_value(),
}
} else {
OverflowsSuffix {
suffix_type: exact_width.type_str(),
max_value: exact_width.max_value(),
}
};
Err(err)
}
}
}
}
let mut result = T::from_u32(0);
if is_positive {
// The number is positive
for &c in digits {
let x = match (c as char).to_digit(radix) {
Some(x) => x,
None => return Err(PIE { kind: InvalidDigit }),
};
result = match result.checked_mul(radix) {
Some(result) => result,
None => return Err(PIE { kind: Overflow }),
};
result = match result.checked_add(x) {
Some(result) => result,
None => return Err(PIE { kind: Overflow }),
};
fn lower_bound_of_int(result: i128) -> IntWidth {
use IntWidth::*;
if result >= 0 {
let result = result as u128;
if result > U64.max_value() {
I128
} else if result > I64.max_value() {
U64
} else if result > U32.max_value() {
I64
} else if result > I32.max_value() {
U32
} else if result > U16.max_value() {
I32
} else if result > I16.max_value() {
U16
} else if result > U8.max_value() {
I16
} else if result > I8.max_value() {
U8
} else {
I8
}
} else {
// The number is negative
for &c in digits {
let x = match (c as char).to_digit(radix) {
Some(x) => x,
None => return Err(PIE { kind: InvalidDigit }),
};
result = match result.checked_mul(radix) {
Some(result) => result,
None => return Err(PIE { kind: Underflow }),
};
result = match result.checked_sub(x) {
Some(result) => result,
None => return Err(PIE { kind: Underflow }),
};
if result < I64.min_value() {
I128
} else if result < I32.min_value() {
I64
} else if result < I16.min_value() {
I32
} else if result < I8.min_value() {
I16
} else {
I8
}
}
Ok((result, bound))
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum IntSign {
Unsigned,
Signed,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
@ -350,6 +361,109 @@ pub enum IntWidth {
Nat,
}
impl IntWidth {
/// Returns the `IntSign` and bit width of a variant.
fn sign_and_width(&self) -> (IntSign, u32) {
use IntSign::*;
use IntWidth::*;
match self {
U8 => (Unsigned, 8),
U16 => (Unsigned, 16),
U32 => (Unsigned, 32),
U64 => (Unsigned, 64),
U128 => (Unsigned, 128),
I8 => (Signed, 8),
I16 => (Signed, 16),
I32 => (Signed, 32),
I64 => (Signed, 64),
I128 => (Signed, 128),
// TODO: this is platform specific!
Nat => (Unsigned, 64),
}
}
fn type_str(&self) -> &'static str {
use IntWidth::*;
match self {
U8 => "U8",
U16 => "U16",
U32 => "U32",
U64 => "U64",
U128 => "U128",
I8 => "I8",
I16 => "I16",
I32 => "I32",
I64 => "I64",
I128 => "I128",
Nat => "Nat",
}
}
fn max_value(&self) -> u128 {
use IntWidth::*;
match self {
U8 => u8::MAX as u128,
U16 => u16::MAX as u128,
U32 => u32::MAX as u128,
U64 => u64::MAX as u128,
U128 => u128::MAX,
I8 => i8::MAX as u128,
I16 => i16::MAX as u128,
I32 => i32::MAX as u128,
I64 => i64::MAX as u128,
I128 => i128::MAX as u128,
// TODO: this is platform specific!
Nat => u64::MAX as u128,
}
}
fn min_value(&self) -> i128 {
use IntWidth::*;
match self {
U8 | U16 | U32 | U64 | U128 | Nat => 0,
I8 => i8::MIN as i128,
I16 => i16::MIN as i128,
I32 => i32::MIN as i128,
I64 => i64::MIN as i128,
I128 => i128::MIN,
}
}
/// Checks if `self` represents superset of integers that `lower_bound` represents, on a particular
/// side of the integers relative to 0.
///
/// If `is_negative` is true, the negative side is checked; otherwise the positive side is checked.
pub fn is_superset(&self, lower_bound: &Self, is_negative: bool) -> bool {
use IntSign::*;
if is_negative {
match (self.sign_and_width(), lower_bound.sign_and_width()) {
((Signed, us), (Signed, lower_bound)) => us >= lower_bound,
// Unsigned ints can never represent negative numbers; signed (non-zero width)
// ints always can.
((Unsigned, _), (Signed, _)) => false,
((Signed, _), (Unsigned, _)) => true,
// Trivially true; both can only express 0.
((Unsigned, _), (Unsigned, _)) => true,
}
} else {
match (self.sign_and_width(), lower_bound.sign_and_width()) {
((Signed, us), (Signed, lower_bound))
| ((Unsigned, us), (Unsigned, lower_bound)) => us >= lower_bound,
// Unsigned ints with the same bit width as their unsigned counterparts can always
// express 2x more integers on the positive side as unsigned ints.
((Unsigned, us), (Signed, lower_bound)) => us >= lower_bound,
// ...but that means signed int widths can represent less than their unsigned
// counterparts, so the below is true iff the bit width is strictly greater. E.g.
// i16 is a superset of u8, but i16 is not a superset of u16.
((Signed, us), (Unsigned, lower_bound)) => us > lower_bound,
}
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum FloatWidth {
Dec,
@ -374,28 +488,3 @@ where
/// Must have exactly the width `W`.
Exact(W),
}
/// An error which can be returned when parsing an integer.
///
/// This error is used as the error type for the `from_str_radix()` functions
/// on the primitive integer types, such as [`i8::from_str_radix`].
///
/// # Potential causes
///
/// Among other causes, `ParseIntError` can be thrown because of leading or trailing whitespace
/// in the string e.g., when it is obtained from the standard input.
/// Using the [`str.trim()`] method ensures that no whitespace remains before parsing.
///
/// [`str.trim()`]: ../../std/primitive.str.html#method.trim
/// [`i8::from_str_radix`]: ../../std/primitive.i8.html#method.from_str_radix
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseIntError {
kind: IntErrorKind,
}
impl ParseIntError {
/// Outputs the detailed cause of parsing an integer failing.
pub fn kind(&self) -> &IntErrorKind {
&self.kind
}
}