add unicode scalar function

This commit is contained in:
Ethan Niser 2024-07-24 23:48:58 -07:00
parent b194d5fbdf
commit aa6531f802
6 changed files with 96 additions and 11 deletions

View file

@ -90,11 +90,11 @@ This document describes the SQLite compatibility status of Limbo:
| octet_length(X) | No | |
| printf(FORMAT,...) | No | |
| quote(X) | No | |
| random() | Yes | |
| random() | Yes | |
| randomblob(N) | No | |
| replace(X,Y,Z) | No | |
| round(X) | Yes | |
| round(X,Y) | Yes | |
| round(X) | Yes | |
| round(X,Y) | Yes | |
| rtrim(X) | No | |
| rtrim(X,Y) | No | |
| sign(X) | No | |
@ -109,12 +109,12 @@ This document describes the SQLite compatibility status of Limbo:
| substring(X,Y,Z) | No | |
| substring(X,Y) | No | |
| total_changes() | No | |
| trim(X) | Yes | |
| trim(X,Y) | Yes | |
| trim(X) | Yes | |
| trim(X,Y) | Yes | |
| typeof(X) | No | |
| unhex(X) | No | |
| unhex(X,Y) | No | |
| unicode(X) | No | |
| unicode(X) | Yes | |
| unlikely(X) | No | |
| upper(X) | Yes | |
| zeroblob(N) | No | |

View file

@ -39,6 +39,7 @@ pub enum ScalarFunc {
Min,
Max,
Date,
Unicode,
}
impl ToString for ScalarFunc {
@ -56,6 +57,7 @@ impl ToString for ScalarFunc {
ScalarFunc::Min => "min".to_string(),
ScalarFunc::Max => "max".to_string(),
ScalarFunc::Date => "date".to_string(),
ScalarFunc::Unicode => "unicode".to_string(),
}
}
}
@ -89,6 +91,7 @@ impl Func {
"round" => Ok(Func::Scalar(ScalarFunc::Round)),
"length" => Ok(Func::Scalar(ScalarFunc::Length)),
"date" => Ok(Func::Scalar(ScalarFunc::Date)),
"unicode" => Ok(Func::Scalar(ScalarFunc::Unicode)),
_ => Err(()),
}
}

View file

@ -210,7 +210,8 @@ pub fn translate_expr(
ScalarFunc::Abs
| ScalarFunc::Lower
| ScalarFunc::Upper
| ScalarFunc::Length => {
| ScalarFunc::Length
| ScalarFunc::Unicode => {
let args = if let Some(args) = args {
if args.len() != 1 {
anyhow::bail!(

View file

@ -42,7 +42,7 @@ impl Display for OwnedValue {
OwnedValue::Integer(i) => write!(f, "{}", i),
OwnedValue::Float(fl) => write!(f, "{:?}", fl),
OwnedValue::Text(s) => write!(f, "{}", s),
OwnedValue::Blob(b) => write!(f, "{:?}", b),
OwnedValue::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b)),
OwnedValue::Agg(a) => match a.as_ref() {
AggContext::Avg(acc, _count) => write!(f, "{}", acc),
AggContext::Sum(acc) => write!(f, "{}", acc),

View file

@ -1150,6 +1150,11 @@ impl Program {
}
state.pc += 1;
}
ScalarFunc::Unicode => {
let reg_value = state.registers[*start_reg].borrow_mut();
state.registers[*dest] = exec_unicode(reg_value);
state.pc += 1;
}
},
}
}
@ -1268,6 +1273,23 @@ fn exec_minmax<'a>(
regs.into_iter().reduce(|a, b| op(a, b)).cloned()
}
fn exec_unicode(reg: &OwnedValue) -> OwnedValue {
match reg {
OwnedValue::Text(_)
| OwnedValue::Integer(_)
| OwnedValue::Float(_)
| OwnedValue::Blob(_) => {
let text = reg.to_string();
if let Some(first_char) = text.chars().next() {
OwnedValue::Integer(first_char as u32 as i64)
} else {
OwnedValue::Null
}
}
_ => OwnedValue::Null,
}
}
fn exec_round(reg: &OwnedValue, precision: Option<OwnedValue>) -> OwnedValue {
let precision = match precision {
Some(OwnedValue::Text(x)) => x.parse().unwrap_or(0.0),
@ -1324,7 +1346,7 @@ fn exec_if(reg: &OwnedValue, null_reg: &OwnedValue, not: bool) -> bool {
mod tests {
use super::{
exec_abs, exec_if, exec_length, exec_like, exec_lower, exec_minmax, exec_random,
exec_round, exec_trim, exec_upper, OwnedValue,
exec_round, exec_trim, exec_unicode, exec_upper, OwnedValue,
};
use std::rc::Rc;
@ -1342,12 +1364,48 @@ mod tests {
let expected_len = OwnedValue::Integer(7);
assert_eq!(exec_length(&input_float), expected_len);
// Expected byte array for "example"
let expected_blob = OwnedValue::Blob(Rc::new(vec![101, 120, 97, 109, 112, 108, 101]));
let expected_blob = OwnedValue::Blob(Rc::new("example".as_bytes().to_vec()));
let expected_len = OwnedValue::Integer(7);
assert_eq!(exec_length(&expected_blob), expected_len);
}
#[test]
fn test_unicode() {
assert_eq!(
exec_unicode(&OwnedValue::Text(Rc::new("a".to_string()))),
OwnedValue::Integer(97)
);
assert_eq!(
exec_unicode(&OwnedValue::Text(Rc::new("😊".to_string()))),
OwnedValue::Integer(128522)
);
assert_eq!(
exec_unicode(&OwnedValue::Text(Rc::new("".to_string()))),
OwnedValue::Null
);
assert_eq!(
exec_unicode(&OwnedValue::Integer(23)),
OwnedValue::Integer(50)
);
assert_eq!(
exec_unicode(&OwnedValue::Integer(0)),
OwnedValue::Integer(48)
);
assert_eq!(
exec_unicode(&OwnedValue::Float(0.0)),
OwnedValue::Integer(48)
);
assert_eq!(
exec_unicode(&OwnedValue::Float(23.45)),
OwnedValue::Integer(50)
);
assert_eq!(exec_unicode(&OwnedValue::Null), OwnedValue::Null);
assert_eq!(
exec_unicode(&OwnedValue::Blob(Rc::new("example".as_bytes().to_vec()))),
OwnedValue::Integer(101)
);
}
#[test]
fn test_minmax() {
let min_fn = |a, b| if a < b { a } else { b };

View file

@ -243,3 +243,26 @@ do_execsql_test date-with-invalid-timezone {
SELECT date('2023-05-18 15:30:45+25:00');
} {{}}
do_execsql_test unicode-a {
SELECT unicode('a');
} {97}
do_execsql_test unicode-emoji {
SELECT unicode('😊');
} {128522}
do_execsql_test unicode-empty {
SELECT unicode('');
} {}
do_execsql_test unicode-number {
SELECT unicode(23);
} {50}
do_execsql_test unicode-float {
SELECT unicode(23.45);
} {50}
do_execsql_test unicode-null {
SELECT unicode(NULL);
} {}