Add jsonb impl to existing json functions where possible

This commit is contained in:
Ihor Andrianov 2025-03-16 02:08:49 +02:00
parent e327707ac6
commit dc6342c0de
No known key found for this signature in database
4 changed files with 234 additions and 170 deletions

View file

@ -197,7 +197,7 @@ mod tests {
}
fn create_json(s: &str) -> OwnedValue {
OwnedValue::Text(Text::json(s))
OwnedValue::Text(Text::json(s.to_string()))
}
#[test]

View file

@ -192,6 +192,25 @@ pub enum ElementType {
RESERVED3 = 15,
}
impl Into<String> for ElementType {
fn into(self) -> String {
let result = match self {
ElementType::ARRAY => "array",
ElementType::OBJECT => "object",
ElementType::NULL => "null",
ElementType::TRUE => "true",
ElementType::FALSE => "false",
ElementType::FLOAT | ElementType::FLOAT5 => "real",
ElementType::INT | ElementType::INT5 => "integer",
ElementType::TEXT | ElementType::TEXT5 | ElementType::TEXTJ | ElementType::TEXTRAW => {
"text"
}
_ => unreachable!(),
};
result.into()
}
}
impl TryFrom<u8> for ElementType {
type Error = LimboError;
@ -223,7 +242,7 @@ type PayloadSize = usize;
#[derive(Debug, Clone, Copy)]
pub struct JsonbHeader(ElementType, PayloadSize);
enum HeaderFormat {
pub(crate) enum HeaderFormat {
Inline([u8; 1]), // Small payloads embedded directly in the header
OneByte([u8; 2]), // Medium payloads with 1-byte size field
TwoBytes([u8; 3]), // Large payloads with 2-byte size field
@ -231,7 +250,7 @@ enum HeaderFormat {
}
impl HeaderFormat {
fn as_bytes(&self) -> &[u8] {
pub fn as_bytes(&self) -> &[u8] {
match self {
Self::Inline(bytes) => bytes,
Self::OneByte(bytes) => bytes,
@ -246,6 +265,10 @@ impl JsonbHeader {
Self(element_type, payload_size)
}
pub fn make_null() -> Self {
Self(ElementType::NULL, 0)
}
fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
match slice.get(cursor) {
Some(header_byte) => {
@ -296,7 +319,7 @@ impl JsonbHeader {
}
}
fn into_bytes(self) -> HeaderFormat {
pub fn into_bytes(self) -> HeaderFormat {
let (element_type, payload_size) = (self.0, self.1);
match payload_size {
@ -361,17 +384,36 @@ impl Jsonb {
self.data.len()
}
pub fn make_empty_array(size: usize) -> Self {
let mut jsonb = Self {
data: Vec::with_capacity(size),
};
jsonb
.write_element_header(0, ElementType::ARRAY, 0)
.unwrap();
jsonb
}
pub fn append_to_array_unsafe(&mut self, data: &[u8]) {
self.data.extend_from_slice(data);
}
pub fn finalize_array_unsafe(&mut self) -> Result<()> {
self.write_element_header(0, ElementType::ARRAY, self.len() - 1)?;
Ok(())
}
fn read_header(&self, cursor: usize) -> Result<(JsonbHeader, usize)> {
let (header, offset) = JsonbHeader::from_slice(cursor, &self.data)?;
Ok((header, offset))
}
pub fn is_valid(&self) -> Result<()> {
pub fn is_valid(&self) -> Result<ElementType> {
match self.read_header(0) {
Ok((header, offset)) => {
if let Some(_) = self.data.get(offset..offset + header.1) {
Ok(())
Ok(header.0)
} else {
bail_parse_error!("malformed JSON")
}
@ -1395,6 +1437,33 @@ impl Jsonb {
Ok((Jsonb::from_raw_data(&self.data[pos..end]), header.0))
}
pub fn get_by_path_raw(&self, path: &JsonPath) -> Result<&[u8]> {
let mut pos = 0;
let mut string_buffer = String::with_capacity(1024);
for segment in path.elements.iter() {
pos = self.navigate_to_segment(segment, pos, &mut string_buffer)?;
}
let (header, skip_header) = self.read_header(pos)?;
let end = pos + skip_header + header.1;
Ok(&self.data[pos..end])
}
pub fn array_len(&self) -> Result<usize> {
let (header, header_skip) = self.read_header(0)?;
if header.0 != ElementType::ARRAY {
return Ok(0);
}
let mut count = 0;
let mut pos = header_skip;
while pos < header_skip + header.1 {
pos = self.skip_element(pos)?;
count += 1;
}
Ok(count)
}
fn navigate_to_segment(
&self,
segment: &PathElement,
@ -1448,7 +1517,8 @@ impl Jsonb {
};
if let Some(id) = idx {
let id = id.to_owned();
if id > 0 {
if id >= 0 {
for _ in 0..id as usize {
if current_pos < pos + parent_size {
current_pos = self.skip_element(current_pos)?;
@ -1530,6 +1600,7 @@ fn compare(key: (&str, ElementType), path_key: (&str, bool)) -> bool {
pub fn unescape_string(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
let mut code_point = String::with_capacity(5);
while let Some(c) = chars.next() {
if c == '\\' {
@ -1542,10 +1613,24 @@ pub fn unescape_string(input: &str) -> String {
Some('"') => result.push('"'),
Some('b') => result.push('\u{0008}'),
Some('f') => result.push('\u{000C}'),
Some('x') => {
code_point.clear();
for _ in 0..2 {
if let Some(hex_char) = chars.next() {
code_point.push(hex_char);
} else {
break;
}
}
if let Ok(code) = u16::from_str_radix(&code_point, 16) {
if let Some(ch) = char::from_u32(code as u32) {
result.push(ch)
}
}
}
// Handle \uXXXX format (JSON style)
Some('u') => {
let mut code_point = String::new();
code_point.clear();
for _ in 0..4 {
if let Some(hex_char) = chars.next() {
code_point.push(hex_char);
@ -1556,18 +1641,18 @@ pub fn unescape_string(input: &str) -> String {
if let Ok(code) = u16::from_str_radix(&code_point, 16) {
// Check if this is a high surrogate
if (0xD800..=0xDBFF).contains(&code) {
if matches!(code, 0xD800..=0xDBFF) {
if chars.next() == Some('\\') && chars.next() == Some('u') {
let mut low_surrogate = String::new();
code_point.clear();
for _ in 0..4 {
if let Some(hex_char) = chars.next() {
low_surrogate.push(hex_char);
code_point.push(hex_char);
} else {
break;
}
}
if let Ok(low_code) = u16::from_str_radix(&low_surrogate, 16) {
if let Ok(low_code) = u16::from_str_radix(&code_point, 16) {
if (0xDC00..=0xDFFF).contains(&low_code) {
let high_ten_bits = (code - 0xD800) as u32;
let low_ten_bits = (low_code - 0xDC00) as u32;

View file

@ -5,6 +5,7 @@ mod json_path;
mod jsonb;
mod ser;
use crate::bail_constraint_error;
pub use crate::json::de::from_str;
use crate::json::error::Error as JsonError;
pub use crate::json::json_operations::{json_patch, json_remove};
@ -13,7 +14,7 @@ pub use crate::json::ser::to_string;
use crate::types::{OwnedValue, Text, TextSubtype};
use crate::{bail_parse_error, json::de::ordered_object};
use indexmap::IndexMap;
use jsonb::Jsonb;
use jsonb::{ElementType, Jsonb, JsonbHeader};
use ser::to_string_pretty;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
@ -49,7 +50,7 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result<
None => to_string(&json_val)?,
};
Ok(OwnedValue::Text(Text::json(&json)))
Ok(OwnedValue::Text(Text::json(json)))
}
OwnedValue::Blob(b) => {
let jsonbin = Jsonb::new(b.len(), Some(b));
@ -67,25 +68,13 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result<
None => to_string(&json_val)?,
};
Ok(OwnedValue::Text(Text::json(&json)))
Ok(OwnedValue::Text(Text::json(json)))
}
}
}
pub fn jsonb(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
let jsonbin = match json_value {
OwnedValue::Null | OwnedValue::Integer(_) | OwnedValue::Float(_) | OwnedValue::Text(_) => {
Jsonb::from_str(&json_value.to_text().unwrap())
}
OwnedValue::Blob(blob) => {
let blob = Jsonb::new(blob.len(), Some(&blob));
blob.is_valid()?;
Ok(blob)
}
_ => {
unimplemented!()
}
};
let jsonbin = convert_dbtype_to_jsonb(json_value);
match jsonbin {
Ok(jsonbin) => Ok(OwnedValue::Blob(Rc::new(jsonbin.data()))),
Err(_) => {
@ -94,6 +83,26 @@ pub fn jsonb(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
}
}
fn convert_dbtype_to_jsonb(val: &OwnedValue) -> crate::Result<Jsonb> {
match val {
OwnedValue::Text(text) => Jsonb::from_str(text.as_str()),
OwnedValue::Blob(blob) => {
let json = Jsonb::from_raw_data(&blob);
json.is_valid()?;
Ok(json)
}
OwnedValue::Record(_) | OwnedValue::Agg(_) => {
bail_constraint_error!("Wront number of arguments");
}
OwnedValue::Null => Jsonb::from_str("null"),
OwnedValue::Float(float) => {
let mut buff = ryu::Buffer::new();
Jsonb::from_str(buff.format(*float))
}
OwnedValue::Integer(int) => Jsonb::from_str(&int.to_string()),
}
}
fn get_json_value(json_value: &OwnedValue) -> crate::Result<Val> {
match json_value {
OwnedValue::Text(ref t) => match from_str::<Val>(t.as_str()) {
@ -149,29 +158,31 @@ pub fn json_array(values: &[OwnedValue]) -> crate::Result<OwnedValue> {
}
s.push(']');
Ok(OwnedValue::Text(Text::json(&s)))
Ok(OwnedValue::Text(Text::json(s)))
}
pub fn json_array_length(
json_value: &OwnedValue,
json_path: Option<&OwnedValue>,
) -> crate::Result<OwnedValue> {
let json = get_json_value(json_value)?;
let json = convert_dbtype_to_jsonb(json_value)?;
let arr_val = if let Some(path) = json_path {
match json_extract_single(&json, path, true)? {
Some(val) => val,
None => return Ok(OwnedValue::Null),
}
} else {
&json
};
match arr_val {
Val::Array(val) => Ok(OwnedValue::Integer(val.len() as i64)),
Val::Null => Ok(OwnedValue::Null),
_ => Ok(OwnedValue::Integer(0)),
if json_path.is_none() {
let result = json.array_len()?;
return Ok(OwnedValue::Integer(result as i64));
}
let path = json_path_from_owned_value(json_path.expect("We already checked none"), true)?;
if let Some(path) = path {
if let Ok(len) = json
.get_by_path(&path)
.and_then(|(json, _)| json.array_len())
{
return Ok(OwnedValue::Integer(len as i64));
}
}
Ok(OwnedValue::Null)
}
pub fn json_set(json: &OwnedValue, values: &[OwnedValue]) -> crate::Result<OwnedValue> {
@ -222,13 +233,14 @@ pub fn json_arrow_extract(value: &OwnedValue, path: &OwnedValue) -> crate::Resul
return Ok(OwnedValue::Null);
}
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, path, false)?;
if let Some(val) = extracted {
let json = to_string(val)?;
Ok(OwnedValue::Text(Text::json(&json)))
if let Some(path) = json_path_from_owned_value(path, false)? {
let json = convert_dbtype_to_jsonb(value)?;
let extracted = json.get_by_path(&path);
if let Ok((json, _)) = extracted {
Ok(OwnedValue::Text(Text::json(json.to_string()?)))
} else {
Ok(OwnedValue::Null)
}
} else {
Ok(OwnedValue::Null)
}
@ -243,11 +255,17 @@ pub fn json_arrow_shift_extract(
if let OwnedValue::Null = value {
return Ok(OwnedValue::Null);
}
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, path, false)?.unwrap_or(&Val::Null);
convert_json_to_db_type(extracted, true)
if let Some(path) = json_path_from_owned_value(path, false)? {
let json = convert_dbtype_to_jsonb(value)?;
let extracted = json.get_by_path(&path);
if let Ok((json, element_type)) = extracted {
Ok(json_string_to_db_type(json.to_string()?, element_type))
} else {
Ok(OwnedValue::Null)
}
} else {
Ok(OwnedValue::Null)
}
}
/// Extracts a JSON value from a JSON object or array.
@ -261,37 +279,66 @@ pub fn json_extract(value: &OwnedValue, paths: &[OwnedValue]) -> crate::Result<O
if paths.is_empty() {
return Ok(OwnedValue::Null);
} else if paths.len() == 1 {
let json = get_json_value(value)?;
let extracted = json_extract_single(&json, &paths[0], true)?.unwrap_or(&Val::Null);
if let Some(path) = json_path_from_owned_value(&paths[0], true)? {
let json = convert_dbtype_to_jsonb(value)?;
let (expected_value, value_type) = json.get_by_path(&path)?;
return convert_json_to_db_type(extracted, false);
}
let json = get_json_value(value)?;
let mut result = "[".to_string();
for path in paths {
match path {
OwnedValue::Null => {
return Ok(OwnedValue::Null);
}
_ => {
let extracted = json_extract_single(&json, path, true)?.unwrap_or(&Val::Null);
if paths.len() == 1 && extracted == &Val::Null {
return Ok(OwnedValue::Null);
}
result.push_str(&to_string(&extracted)?);
result.push(',');
}
return Ok(json_string_to_db_type(
expected_value.to_string()?,
value_type,
));
} else {
return Ok(OwnedValue::Null);
}
}
result.pop(); // remove the final comma
result.push(']');
let json = convert_dbtype_to_jsonb(value)?;
let mut result = Jsonb::make_empty_array(json.len());
Ok(OwnedValue::Text(Text::json(&result)))
let paths = paths
.into_iter()
.map(|p| json_path_from_owned_value(p, true));
for path in paths {
if let Some(path) = path? {
let fragment = json.get_by_path_raw(&path);
if let Ok(data) = fragment {
result.append_to_array_unsafe(data);
} else {
result.append_to_array_unsafe(JsonbHeader::make_null().into_bytes().as_bytes());
}
} else {
return Ok(OwnedValue::Null);
}
}
result.finalize_array_unsafe()?;
Ok(json_string_to_db_type(
result.to_string()?,
ElementType::ARRAY,
))
}
fn json_string_to_db_type(mut json: String, element_type: ElementType) -> OwnedValue {
match element_type {
ElementType::ARRAY | ElementType::OBJECT => OwnedValue::Text(Text::json(json)),
ElementType::TEXT | ElementType::TEXT5 | ElementType::TEXTJ | ElementType::TEXTRAW => {
json.remove(json.len() - 1);
json.remove(0);
OwnedValue::Text(Text {
value: Rc::new(json.into_bytes()),
subtype: TextSubtype::Text,
})
}
ElementType::FLOAT5 | ElementType::FLOAT => {
OwnedValue::Float(json.parse().expect("Should be valid f64"))
}
ElementType::INT | ElementType::INT5 => {
OwnedValue::Integer(json.parse().expect("Should be valid i64"))
}
ElementType::TRUE => OwnedValue::Integer(1),
ElementType::FALSE => OwnedValue::Integer(0),
ElementType::NULL => OwnedValue::Null,
_ => unreachable!(),
}
}
/// Returns a value with type defined by SQLite documentation:
@ -324,7 +371,7 @@ fn convert_json_to_db_type(extracted: &Val, all_as_db: bool) -> crate::Result<Ow
if all_as_db {
Ok(OwnedValue::build_text(&json))
} else {
Ok(OwnedValue::Text(Text::json(&json)))
Ok(OwnedValue::Text(Text::json(json)))
}
}
}
@ -357,91 +404,23 @@ pub fn json_type(value: &OwnedValue, path: Option<&OwnedValue>) -> crate::Result
if let OwnedValue::Null = value {
return Ok(OwnedValue::Null);
}
if path.is_none() {
let json = convert_dbtype_to_jsonb(value)?;
let element_type = json.is_valid()?;
let json = get_json_value(value)?;
return Ok(OwnedValue::Text(Text::json(element_type.into())));
}
if let Some(path) = json_path_from_owned_value(path.unwrap(), true)? {
let json = convert_dbtype_to_jsonb(value)?;
let json = if let Some(path) = path {
match json_extract_single(&json, path, true)? {
Some(val) => val,
None => return Ok(OwnedValue::Null),
if let Ok((_, element_type)) = json.get_by_path(&path) {
return Ok(OwnedValue::Text(Text::json(element_type.into())));
} else {
return Ok(OwnedValue::Null);
}
} else {
&json
};
let val = match json {
Val::Null => "null",
Val::Bool(v) => {
if *v {
"true"
} else {
"false"
}
}
Val::Integer(_) => "integer",
Val::Float(_) => "real",
Val::String(_) => "text",
Val::Array(_) => "array",
Val::Object(_) => "object",
Val::Removed => unreachable!(),
};
Ok(OwnedValue::Text(Text::json(val)))
}
/// Returns the value at the given JSON path. If the path does not exist, it returns None.
/// If the path is an invalid path, returns an error.
///
/// *strict* - if false, we will try to resolve the path even if it does not start with "$"
/// in a way that's compatible with the `->` and `->>` operators. See examples in the docs:
/// https://sqlite.org/json1.html#the_and_operators
fn json_extract_single<'a>(
json: &'a Val,
path: &OwnedValue,
strict: bool,
) -> crate::Result<Option<&'a Val>> {
let json_path = match json_path_from_owned_value(path, strict)? {
Some(path) => path,
None => return Ok(None),
};
let mut current_element = &Val::Null;
for element in json_path.elements.iter() {
match element {
PathElement::Root() => {
current_element = json;
}
PathElement::Key(key, _) => match current_element {
Val::Object(map) => {
if let Some((_, value)) = map.iter().find(|(k, _)| k == key) {
current_element = value;
} else {
return Ok(None);
}
}
_ => return Ok(None),
},
PathElement::ArrayLocator(idx) => match current_element {
Val::Array(array) => {
if let Some(mut idx) = *idx {
if idx < 0 {
idx += array.len() as i32;
}
if idx < array.len() as i32 {
current_element = &array[idx as usize];
} else {
return Ok(None);
}
}
}
_ => return Ok(None),
},
}
return Ok(OwnedValue::Null);
}
Ok(Some(current_element))
}
fn json_path_from_owned_value(path: &OwnedValue, strict: bool) -> crate::Result<Option<JsonPath>> {
@ -674,7 +653,7 @@ pub fn json_object(values: &[OwnedValue]) -> crate::Result<OwnedValue> {
.collect::<Result<IndexMap<String, Val>, _>>()?;
let result = crate::json::to_string(&value_map)?;
Ok(OwnedValue::Text(Text::json(&result)))
Ok(OwnedValue::Text(Text::json(result)))
}
pub fn is_json_valid(json_value: &OwnedValue) -> crate::Result<OwnedValue> {
@ -866,7 +845,7 @@ mod tests {
#[test]
fn test_json_array_simple() {
let text = OwnedValue::build_text("value1");
let json = OwnedValue::Text(Text::json("\"value2\""));
let json = OwnedValue::Text(Text::json("\"value2\"".to_string()));
let input = vec![text, json, OwnedValue::Integer(1), OwnedValue::Float(1.1)];
let result = json_array(&input).unwrap();
@ -1104,7 +1083,7 @@ mod tests {
let text_key = OwnedValue::build_text("text_key");
let text_value = OwnedValue::build_text("text_value");
let json_key = OwnedValue::build_text("json_key");
let json_value = OwnedValue::Text(Text::json(r#"{"json":"value","number":1}"#));
let json_value = OwnedValue::Text(Text::json(r#"{"json":"value","number":1}"#.to_string()));
let integer_key = OwnedValue::build_text("integer_key");
let integer_value = OwnedValue::Integer(1);
let float_key = OwnedValue::build_text("float_key");
@ -1138,7 +1117,7 @@ mod tests {
#[test]
fn test_json_object_json_value_is_rendered_as_json() {
let key = OwnedValue::build_text("key");
let value = OwnedValue::Text(Text::json(r#"{"json":"value"}"#));
let value = OwnedValue::Text(Text::json(r#"{"json":"value"}"#.to_string()));
let input = vec![key, value];
let result = json_object(&input).unwrap();

View file

@ -45,9 +45,9 @@ impl Text {
}
}
pub fn json(value: &str) -> Self {
pub fn json(value: String) -> Self {
Self {
value: Rc::new(value.as_bytes().to_vec()),
value: Rc::new(value.into_bytes()),
subtype: TextSubtype::Json,
}
}
@ -186,7 +186,7 @@ impl OwnedValue {
return Ok(OwnedValue::Null);
};
if v.is_json() {
Ok(OwnedValue::Text(Text::json(text)))
Ok(OwnedValue::Text(Text::json(text.to_string())))
} else {
Ok(OwnedValue::build_text(text))
}