mirror of
https://github.com/tursodatabase/limbo.git
synced 2025-08-04 01:58:16 +00:00
rewrote parsing from text to integer and real
This commit is contained in:
parent
409297cfdd
commit
033d0116d6
2 changed files with 342 additions and 77 deletions
335
core/util.rs
335
core/util.rs
|
@ -1,8 +1,10 @@
|
|||
use core::num::IntErrorKind;
|
||||
use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal};
|
||||
use std::{rc::Rc, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
schema::{self, Column, Schema, Type},
|
||||
types::OwnedValue,
|
||||
Result, Statement, StepResult, IO,
|
||||
};
|
||||
|
||||
|
@ -380,6 +382,147 @@ pub fn columns_from_create_table_body(body: ast::CreateTableBody) -> Result<Vec<
|
|||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
/// Reference:
|
||||
/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L798
|
||||
pub enum CastTextToIntResultCode {
|
||||
NotInt = -1,
|
||||
Success = 0,
|
||||
ExcessSpace = 1,
|
||||
TooLargeOrMalformed = 2,
|
||||
#[allow(dead_code)]
|
||||
SpecialCase = 3,
|
||||
}
|
||||
|
||||
pub fn text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) {
|
||||
let text = text.trim();
|
||||
if text.is_empty() {
|
||||
return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt);
|
||||
}
|
||||
let mut accum = String::new();
|
||||
let mut sign = false;
|
||||
let mut has_digit = false;
|
||||
let mut excess_space = false;
|
||||
|
||||
let chars = text.chars();
|
||||
|
||||
for c in chars {
|
||||
match c {
|
||||
'0'..='9' => {
|
||||
has_digit = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'+' | '-' if !has_digit && !sign => {
|
||||
sign = true;
|
||||
accum.push(c);
|
||||
}
|
||||
_ => {
|
||||
excess_space = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match accum.parse::<i64>() {
|
||||
Ok(num) => {
|
||||
if excess_space {
|
||||
return (
|
||||
OwnedValue::Integer(num),
|
||||
CastTextToIntResultCode::ExcessSpace,
|
||||
);
|
||||
}
|
||||
|
||||
return (OwnedValue::Integer(num), CastTextToIntResultCode::Success);
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
IntErrorKind::NegOverflow | IntErrorKind::PosOverflow => (
|
||||
OwnedValue::Integer(0),
|
||||
CastTextToIntResultCode::TooLargeOrMalformed,
|
||||
),
|
||||
_ => (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
/// Reference
|
||||
/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L529
|
||||
pub enum CastTextToRealResultCode {
|
||||
PureInt = 1,
|
||||
HasDecimal = 2,
|
||||
NotValid = 0,
|
||||
NotValidButPrefix = -1,
|
||||
}
|
||||
|
||||
pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) {
|
||||
let text = text.trim();
|
||||
if text.is_empty() {
|
||||
return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid);
|
||||
}
|
||||
let mut accum = String::new();
|
||||
let mut has_decimal_separator = false;
|
||||
let mut sign = false;
|
||||
let mut exp_sign = false;
|
||||
let mut has_exponent = false;
|
||||
let mut has_digit = false;
|
||||
let mut has_decimal_digit = false;
|
||||
let mut excess_space = false;
|
||||
|
||||
let chars = text.chars();
|
||||
|
||||
for c in chars {
|
||||
match c {
|
||||
'0'..='9' if !has_decimal_separator => {
|
||||
has_digit = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'0'..='9' => {
|
||||
has_decimal_digit = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'+' | '-' if !has_digit && !sign => {
|
||||
sign = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'+' | '-' if has_exponent && !exp_sign => {
|
||||
exp_sign = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'.' if !has_decimal_separator => {
|
||||
has_decimal_separator = true;
|
||||
accum.push(c);
|
||||
}
|
||||
'E' | 'e' if !has_decimal_separator || has_decimal_digit => {
|
||||
has_exponent = true;
|
||||
accum.push(c);
|
||||
}
|
||||
_ => {
|
||||
excess_space = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(num) = accum.parse::<f64>() {
|
||||
if !has_decimal_separator && !exp_sign && !has_exponent {
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt);
|
||||
}
|
||||
|
||||
if excess_space {
|
||||
// TODO see if this branch satisfies: not a valid number, but has a valid prefix which
|
||||
// includes a decimal point and/or an eNNN clause
|
||||
return (
|
||||
OwnedValue::Float(num),
|
||||
CastTextToRealResultCode::NotValidButPrefix,
|
||||
);
|
||||
}
|
||||
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal);
|
||||
}
|
||||
|
||||
return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
|
@ -635,4 +778,196 @@ pub mod tests {
|
|||
assert!(!check_ident_equivalency("\"foo\"", "[bar]"));
|
||||
assert!(!check_ident_equivalency("foo", "\"bar\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_to_integer() {
|
||||
let pairs = vec![
|
||||
(
|
||||
text_to_integer("1"),
|
||||
(OwnedValue::Integer(1), CastTextToIntResultCode::Success),
|
||||
),
|
||||
(
|
||||
text_to_integer("-1"),
|
||||
(OwnedValue::Integer(-1), CastTextToIntResultCode::Success),
|
||||
),
|
||||
(
|
||||
text_to_integer("10000000"),
|
||||
(
|
||||
OwnedValue::Integer(10000000),
|
||||
CastTextToIntResultCode::Success,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("-10000000"),
|
||||
(
|
||||
OwnedValue::Integer(-10000000),
|
||||
CastTextToIntResultCode::Success,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("xxx"),
|
||||
(OwnedValue::Integer(0), CastTextToIntResultCode::NotInt),
|
||||
),
|
||||
(
|
||||
text_to_integer("123xxx"),
|
||||
(
|
||||
OwnedValue::Integer(123),
|
||||
CastTextToIntResultCode::ExcessSpace,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("9223372036854775807"),
|
||||
(
|
||||
OwnedValue::Integer(i64::MAX),
|
||||
CastTextToIntResultCode::Success,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("9223372036854775808"),
|
||||
(
|
||||
OwnedValue::Integer(0),
|
||||
CastTextToIntResultCode::TooLargeOrMalformed,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("-9223372036854775808"),
|
||||
(
|
||||
OwnedValue::Integer(i64::MIN),
|
||||
CastTextToIntResultCode::Success,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_integer("-9223372036854775809"),
|
||||
(
|
||||
OwnedValue::Integer(0),
|
||||
CastTextToIntResultCode::TooLargeOrMalformed,
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
for (left, right) in pairs {
|
||||
assert_eq!(left, right);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_to_real() {
|
||||
let pairs = vec![
|
||||
(
|
||||
text_to_real("1"),
|
||||
(OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt),
|
||||
),
|
||||
(
|
||||
text_to_real("-1"),
|
||||
(OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt),
|
||||
),
|
||||
(
|
||||
text_to_real("1.0"),
|
||||
(OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal),
|
||||
),
|
||||
(
|
||||
text_to_real("-1.0"),
|
||||
(
|
||||
OwnedValue::Float(-1.0),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1e10"),
|
||||
(
|
||||
OwnedValue::Float(1e10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1e10"),
|
||||
(
|
||||
OwnedValue::Float(-1e10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1e-10"),
|
||||
(
|
||||
OwnedValue::Float(1e-10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1e-10"),
|
||||
(
|
||||
OwnedValue::Float(-1e-10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1.123e10"),
|
||||
(
|
||||
OwnedValue::Float(1.123e10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1.123e10"),
|
||||
(
|
||||
OwnedValue::Float(-1.123e10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1.123e-10"),
|
||||
(
|
||||
OwnedValue::Float(1.123e-10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1.123e-10"),
|
||||
(
|
||||
OwnedValue::Float(-1.123e-10),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1-282584294928"),
|
||||
(OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt),
|
||||
),
|
||||
(
|
||||
text_to_real("xxx"),
|
||||
(OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid),
|
||||
),
|
||||
(
|
||||
text_to_real("1.7976931348623157e308"),
|
||||
(
|
||||
OwnedValue::Float(f64::MAX),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("1.7976931348623157e309"),
|
||||
(
|
||||
OwnedValue::Float(f64::INFINITY),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1.7976931348623157e308"),
|
||||
(
|
||||
OwnedValue::Float(f64::MIN),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
(
|
||||
text_to_real("-1.7976931348623157e309"),
|
||||
(
|
||||
OwnedValue::Float(f64::NEG_INFINITY),
|
||||
CastTextToRealResultCode::HasDecimal,
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
for (left, right) in pairs {
|
||||
assert_eq!(left, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,10 @@ use crate::translate::plan::{ResultSetColumn, TableReference};
|
|||
use crate::types::{
|
||||
AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, Record, SeekKey, SeekOp,
|
||||
};
|
||||
use crate::util::parse_schema_rows;
|
||||
use crate::util::{
|
||||
parse_schema_rows, text_to_integer, text_to_real, CastTextToIntResultCode,
|
||||
CastTextToRealResultCode,
|
||||
};
|
||||
use crate::vdbe::builder::CursorType;
|
||||
use crate::vdbe::insn::Insn;
|
||||
use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract};
|
||||
|
@ -403,28 +406,6 @@ macro_rules! must_be_btree_cursor {
|
|||
}};
|
||||
}
|
||||
|
||||
/// Reference:
|
||||
/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L798
|
||||
enum CastTextToIntResultCode {
|
||||
NotInt = -1,
|
||||
Success = 0,
|
||||
ExcessSpace = 1,
|
||||
#[allow(dead_code)]
|
||||
TooLargeOrMalformed = 2,
|
||||
#[allow(dead_code)]
|
||||
SpecialCase = 3,
|
||||
}
|
||||
|
||||
/// Reference
|
||||
/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L529
|
||||
enum CastTextToRealResultCode {
|
||||
PureInt = 1,
|
||||
HasDecimal = 2,
|
||||
NotValid = 0,
|
||||
#[allow(dead_code)]
|
||||
NotValidButPrefix = -1,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
pub max_registers: usize,
|
||||
|
@ -3652,35 +3633,7 @@ fn exec_replace(source: &OwnedValue, pattern: &OwnedValue, replacement: &OwnedVa
|
|||
/// because it is no part of the integer prefix. For example, "CAST('123e+5' AS INTEGER)" results in 123, not in 12300000.
|
||||
/// The CAST operator understands decimal integers only — conversion of hexadecimal integers stops at the "x" in the "0x" prefix of the hexadecimal integer string and thus result of the CAST is always zero.
|
||||
fn cast_text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) {
|
||||
let text = text.trim();
|
||||
if text.is_empty() {
|
||||
return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt);
|
||||
}
|
||||
if let Ok(i) = text.parse::<i64>() {
|
||||
// Compare if the text value has more characters that the number of digits + the sign in the parsed int
|
||||
if i.to_string().len() < text.len() {
|
||||
// Means it was probably casted from a real or some malformed number.
|
||||
return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace);
|
||||
}
|
||||
|
||||
return (OwnedValue::Integer(i), CastTextToIntResultCode::Success);
|
||||
}
|
||||
// Try to find longest valid prefix that parses as an integer
|
||||
// TODO: inefficient
|
||||
let mut end_index = text.len().saturating_sub(1) as isize;
|
||||
while end_index >= 0 {
|
||||
if let Ok(i) = text[..=end_index as usize].parse::<i64>() {
|
||||
// Compare if the text value has more characters that the number of digits + the sign in the parsed int
|
||||
if i.to_string().len() < text.len() {
|
||||
// Means it was probably casted from a real or some malformed number.
|
||||
return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace);
|
||||
}
|
||||
|
||||
return (OwnedValue::Integer(i), CastTextToIntResultCode::Success);
|
||||
}
|
||||
end_index -= 1;
|
||||
}
|
||||
return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt);
|
||||
text_to_integer(text)
|
||||
}
|
||||
|
||||
/// When casting a TEXT value to REAL, the longest possible prefix of the value that can be interpreted
|
||||
|
@ -3688,31 +3641,7 @@ fn cast_text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) {
|
|||
/// the TEXT value are ignored when converging from TEXT to REAL.
|
||||
/// If there is no prefix that can be interpreted as a real number, the result of the conversion is 0.0.
|
||||
fn cast_text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) {
|
||||
let trimmed = text.trim_start();
|
||||
if trimmed.is_empty() {
|
||||
return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid);
|
||||
}
|
||||
if let Ok(num) = trimmed.parse::<f64>() {
|
||||
if num.fract() == 0.0 {
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt);
|
||||
}
|
||||
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal);
|
||||
}
|
||||
// Try to find longest valid prefix that parses as a float
|
||||
// TODO: inefficient
|
||||
let mut end_index = trimmed.len().saturating_sub(1) as isize;
|
||||
while end_index >= 0 {
|
||||
if let Ok(num) = trimmed[..=end_index as usize].parse::<f64>() {
|
||||
if num.fract() == 0.0 {
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt);
|
||||
}
|
||||
|
||||
return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal);
|
||||
}
|
||||
end_index -= 1;
|
||||
}
|
||||
return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid);
|
||||
text_to_real(text)
|
||||
}
|
||||
|
||||
/// NUMERIC Casting a TEXT or BLOB value into NUMERIC yields either an INTEGER or a REAL result.
|
||||
|
@ -3747,6 +3676,7 @@ fn checked_cast_text_to_numeric(text: &str) -> std::result::Result<OwnedValue, (
|
|||
fn cast_text_to_numeric(text: &str) -> OwnedValue {
|
||||
let (real_cast, rc_real) = cast_text_to_real(text);
|
||||
let (int_cast, rc_int) = cast_text_to_integer(text);
|
||||
|
||||
match (rc_real, rc_int) {
|
||||
(
|
||||
CastTextToRealResultCode::NotValid,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue