limbo/core/json/jsonb.rs

use crate::{bail_parse_error, LimboError, Result};
use std::{fmt::Write, str::from_utf8};

use super::json_path::{JsonPath, PathElement};

const SIZE_MARKER_8BIT: u8 = 12;
const SIZE_MARKER_16BIT: u8 = 13;
const SIZE_MARKER_32BIT: u8 = 14;
const MAX_JSON_DEPTH: usize = 1000;
const INFINITY_CHAR_COUNT: u8 = 5;

const fn make_whitespace_table() -> [u8; 256] {
    let mut table = [0u8; 256];

    // Mark whitespace characters
    table[0x09] = 1; // Tab
    table[0x0A] = 1; // Line feed
    table[0x0D] = 1; // Carriage return
    table[0x20] = 1; // Space

    table
}

static WS_TABLE: [u8; 256] = make_whitespace_table();

const fn make_character_type_table() -> [u8; 256] {
    let mut table = [0u8; 256];

    // Mark whitespace characters
    table[0x09] = 1; // Tab
    table[0x0A] = 1; // Line feed
    table[0x0D] = 1; // Carriage return
    table[0x20] = 1; // Space

    // Mark numeric digits
    table[0x30] = 2; // 0
    table[0x31] = 2; // 1
    table[0x32] = 2; // 2
    table[0x33] = 2; // 3
    table[0x34] = 2; // 4
    table[0x35] = 2; // 5
    table[0x36] = 2; // 6
    table[0x37] = 2; // 7
    table[0x38] = 2; // 8
    table[0x39] = 2; // 9

    // Mark hex digits (a-f, A-F)
    table[0x41] = 3; // A
    table[0x42] = 3; // B
    table[0x43] = 3; // C
    table[0x44] = 3; // D
    table[0x45] = 3; // E
    table[0x46] = 3; // F
    table[0x61] = 3; // a
    table[0x62] = 3; // b
    table[0x63] = 3; // c
    table[0x64] = 3; // d
    table[0x65] = 3; // e
    table[0x66] = 3; // f

    table
}

static CHARACTER_TYPE: [u8; 256] = make_character_type_table();

const fn make_character_type_ok_table() -> [u8; 256] {
    let mut table = [0u8; 256];

    table[0x20] |= 4; // Space
    table[0x21] |= 4; // !
                      // Skipping 0x22 (") as it needs escaping
    table[0x23] |= 4; // #
    table[0x24] |= 4; // $
    table[0x25] |= 4; // %
    table[0x26] |= 4; // &
    table[0x27] |= 4; // '
    table[0x28] |= 4; // (
    table[0x29] |= 4; // )
    table[0x2A] |= 4; // *
    table[0x2B] |= 4; // +
    table[0x2C] |= 4; // ,
    table[0x2D] |= 4; // -
    table[0x2E] |= 4; // .
    table[0x2F] |= 4; // /
    table[0x30] |= 4; // 0
    table[0x31] |= 4; // 1
    table[0x32] |= 4; // 2
    table[0x33] |= 4; // 3
    table[0x34] |= 4; // 4
    table[0x35] |= 4; // 5
    table[0x36] |= 4; // 6
    table[0x37] |= 4; // 7
    table[0x38] |= 4; // 8
    table[0x39] |= 4; // 9
    table[0x3A] |= 4; // :
    table[0x3B] |= 4; // ;
    table[0x3C] |= 4; //
    table[0x3D] |= 4; // =
    table[0x3E] |= 4; // >
    table[0x3F] |= 4; // ?
    table[0x40] |= 4; // @
    table[0x41] |= 4; // A
    table[0x42] |= 4; // B
    table[0x43] |= 4; // C
    table[0x44] |= 4; // D
    table[0x45] |= 4; // E
    table[0x46] |= 4; // F
    table[0x47] |= 4; // G
    table[0x48] |= 4; // H
    table[0x49] |= 4; // I
    table[0x4A] |= 4; // J
    table[0x4B] |= 4; // K
    table[0x4C] |= 4; // L
    table[0x4D] |= 4; // M
    table[0x4E] |= 4; // N
    table[0x4F] |= 4; // O
    table[0x50] |= 4; // P
    table[0x51] |= 4; // Q
    table[0x52] |= 4; // R
    table[0x53] |= 4; // S
    table[0x54] |= 4; // T
    table[0x55] |= 4; // U
    table[0x56] |= 4; // V
    table[0x57] |= 4; // W
    table[0x58] |= 4; // X
    table[0x59] |= 4; // Y
    table[0x5A] |= 4; // Z
    table[0x5B] |= 4; // [
                      // Skipping 0x5C (\) as it needs escaping
    table[0x5D] |= 4; // ]
    table[0x5E] |= 4; // ^
    table[0x5F] |= 4; // _
    table[0x60] |= 4; // `
    table[0x61] |= 4; // a
    table[0x62] |= 4; // b
    table[0x63] |= 4; // c
    table[0x64] |= 4; // d
    table[0x65] |= 4; // e
    table[0x66] |= 4; // f
    table[0x67] |= 4; // g
    table[0x68] |= 4; // h
    table[0x69] |= 4; // i
    table[0x6A] |= 4; // j
    table[0x6B] |= 4; // k
    table[0x6C] |= 4; // l
    table[0x6D] |= 4; // m
    table[0x6E] |= 4; // n
    table[0x6F] |= 4; // o
    table[0x70] |= 4; // p
    table[0x71] |= 4; // q
    table[0x72] |= 4; // r
    table[0x73] |= 4; // s
    table[0x74] |= 4; // t
    table[0x75] |= 4; // u
    table[0x76] |= 4; // v
    table[0x77] |= 4; // w
    table[0x78] |= 4; // x
    table[0x79] |= 4; // y
    table[0x7A] |= 4; // z
    table[0x7B] |= 4; // {
    table[0x7C] |= 4; // |
    table[0x7D] |= 4; // }
    table[0x7E] |= 4; // ~

    table
}

static CHARACTER_TYPE_OK: [u8; 256] = make_character_type_ok_table();

#[derive(Debug, Clone)]
pub struct Jsonb {
    data: Vec<u8>,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ElementType {
    NULL = 0,
    TRUE = 1,
    FALSE = 2,
    INT = 3,
    INT5 = 4,
    FLOAT = 5,
    FLOAT5 = 6,
    TEXT = 7,
    TEXTJ = 8,
    TEXT5 = 9,
    TEXTRAW = 10,
    ARRAY = 11,
    OBJECT = 12,
    RESERVED1 = 13,
    RESERVED2 = 14,
    RESERVED3 = 15,
}

impl Into<String> for ElementType {
    fn into(self) -> String {
        let result = match self {
            ElementType::ARRAY => "array",
            ElementType::OBJECT => "object",
            ElementType::NULL => "null",
            ElementType::TRUE => "true",
            ElementType::FALSE => "false",
            ElementType::FLOAT | ElementType::FLOAT5 => "real",
            ElementType::INT | ElementType::INT5 => "integer",
            ElementType::TEXT | ElementType::TEXT5 | ElementType::TEXTJ | ElementType::TEXTRAW => {
                "text"
            }
            _ => unreachable!(),
        };
        result.into()
    }
}

impl TryFrom<u8> for ElementType {
    type Error = LimboError;

    fn try_from(value: u8) -> std::result::Result<Self, Self::Error> {
        match value {
            0 => Ok(Self::NULL),
            1 => Ok(Self::TRUE),
            2 => Ok(Self::FALSE),
            3 => Ok(Self::INT),
            4 => Ok(Self::INT5),
            5 => Ok(Self::FLOAT),
            6 => Ok(Self::FLOAT5),
            7 => Ok(Self::TEXT),
            8 => Ok(Self::TEXTJ),
            9 => Ok(Self::TEXT5),
            10 => Ok(Self::TEXTRAW),
            11 => Ok(Self::ARRAY),
            12 => Ok(Self::OBJECT),
            13 => Ok(Self::RESERVED1),
            14 => Ok(Self::RESERVED2),
            15 => Ok(Self::RESERVED3),
            _ => bail_parse_error!("Failed to recognize jsonvalue type"),
        }
    }
}

type PayloadSize = usize;

#[derive(Debug, Clone, Copy)]
pub struct JsonbHeader(ElementType, PayloadSize);

pub(crate) enum HeaderFormat {
    Inline([u8; 1]),    // Small payloads embedded directly in the header
    OneByte([u8; 2]),   // Medium payloads with 1-byte size field
    TwoBytes([u8; 3]),  // Large payloads with 2-byte size field
    FourBytes([u8; 5]), // Extra large payloads with 4-byte size field
}

impl HeaderFormat {
    pub fn as_bytes(&self) -> &[u8] {
        match self {
            Self::Inline(bytes) => bytes,
            Self::OneByte(bytes) => bytes,
            Self::TwoBytes(bytes) => bytes,
            Self::FourBytes(bytes) => bytes,
        }
    }
}

impl JsonbHeader {
    fn new(element_type: ElementType, payload_size: PayloadSize) -> Self {
        Self(element_type, payload_size)
    }

    pub fn make_null() -> Self {
        Self(ElementType::NULL, 0)
    }

    fn from_slice(cursor: usize, slice: &[u8]) -> Result<(Self, usize)> {
        match slice.get(cursor) {
            Some(header_byte) => {
                // Extract first 4 bits (values 0-15)
                let element_type = header_byte & 15;
                if element_type > 12 {
                    bail_parse_error!("Invalid element type: {}", element_type);
                }
                // Get the last 4 bits for header_size
                let header_size = header_byte >> 4;
                let offset: usize;
                let total_size = match header_size {
                    size if size <= 11 => {
                        offset = 1;
                        size as usize
                    }

                    12 => match slice.get(cursor + 1) {
                        Some(value) => {
                            offset = 2;
                            *value as usize
                        }
                        None => bail_parse_error!("Failed to read 1-byte size"),
                    },

                    13 => match Self::get_size_bytes(slice, cursor + 1, 2) {
                        Ok(bytes) => {
                            offset = 3;
                            u16::from_be_bytes([bytes[0], bytes[1]]) as usize
                        }
                        Err(e) => return Err(e),
                    },

                    14 => match Self::get_size_bytes(slice, cursor + 1, 4) {
                        Ok(bytes) => {
                            offset = 5;
                            u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize
                        }
                        Err(e) => return Err(e),
                    },

                    _ => unreachable!(),
                };

                Ok((Self(element_type.try_into()?, total_size), offset))
            }
            None => bail_parse_error!("Failed to read header byte"),
        }
    }

    pub fn into_bytes(self) -> HeaderFormat {
        let (element_type, payload_size) = (self.0, self.1);

        match payload_size {
            // Small payload (fits in 4 bits)
            size if size <= 11 => {
                HeaderFormat::Inline([(element_type as u8) | ((size as u8) << 4)])
            }

            // Medium payload (fits in 1 byte)
            size if size <= 0xFF => {
                HeaderFormat::OneByte([(element_type as u8) | (SIZE_MARKER_8BIT << 4), size as u8])
            }

            // Large payload (fits in 2 bytes)
            size if size <= 0xFFFF => {
                let size_bytes = (size as u16).to_be_bytes();
                HeaderFormat::TwoBytes([
                    (element_type as u8) | (SIZE_MARKER_16BIT << 4),
                    size_bytes[0],
                    size_bytes[1],
                ])
            }

            // Extra large payload (fits in 4 bytes)
            size if size <= 0xFFFFFFFF => {
                let size_bytes = (size as u32).to_be_bytes();
                HeaderFormat::FourBytes([
                    (element_type as u8) | (SIZE_MARKER_32BIT << 4),
                    size_bytes[0],
                    size_bytes[1],
                    size_bytes[2],
                    size_bytes[3],
                ])
            }

            // Payload too large
            _ => panic!("Payload size too large for encoding"),
        }
    }

    fn get_size_bytes(slice: &[u8], start: usize, count: usize) -> Result<&[u8]> {
        match slice.get(start..start + count) {
            Some(bytes) => Ok(bytes),
            None => bail_parse_error!("Failed to read header size"),
        }
    }
}

impl Jsonb {
    pub fn new(capacity: usize, data: Option<&[u8]>) -> Self {
        if let Some(data) = data {
            return Self {
                data: data.to_vec(),
            };
        }
        Self {
            data: Vec::with_capacity(capacity),
        }
    }

    pub fn len(&self) -> usize {
        self.data.len()
    }

    pub fn make_empty_array(size: usize) -> Self {
        let mut jsonb = Self {
            data: Vec::with_capacity(size),
        };
        jsonb
            .write_element_header(0, ElementType::ARRAY, 0)
            .unwrap();
        jsonb
    }

    pub fn append_to_array_unsafe(&mut self, data: &[u8]) {
        self.data.extend_from_slice(data);
    }

    pub fn finalize_array_unsafe(&mut self) -> Result<()> {
        self.write_element_header(0, ElementType::ARRAY, self.len() - 1)?;
        Ok(())
    }

    fn read_header(&self, cursor: usize) -> Result<(JsonbHeader, usize)> {
        let (header, offset) = JsonbHeader::from_slice(cursor, &self.data)?;

        Ok((header, offset))
    }

    pub fn is_valid(&self) -> Result<ElementType> {
        match self.read_header(0) {
            Ok((header, offset)) => {
                if let Some(_) = self.data.get(offset..offset + header.1) {
                    Ok(header.0)
                } else {
                    bail_parse_error!("malformed JSON")
                }
            }
            Err(_) => bail_parse_error!("malformed JSON"),
        }
    }

    pub fn to_string(&self) -> Result<String> {
        let mut result = String::with_capacity(self.data.len() * 2);
        self.write_to_string(&mut result)?;

        Ok(result)
    }

    fn write_to_string(&self, string: &mut String) -> Result<()> {
        let cursor = 0;
        let _ = self.serialize_value(string, cursor);
        Ok(())
    }

    fn serialize_value(&self, string: &mut String, cursor: usize) -> Result<usize> {
        let (header, skip_header) = self.read_header(cursor)?;
        let cursor = cursor + skip_header;

        let current_cursor = match header {
            JsonbHeader(ElementType::OBJECT, len) => self.serialize_object(string, cursor, len)?,
            JsonbHeader(ElementType::ARRAY, len) => self.serialize_array(string, cursor, len)?,
            JsonbHeader(ElementType::TEXT, len)
            | JsonbHeader(ElementType::TEXTRAW, len)
            | JsonbHeader(ElementType::TEXTJ, len)
            | JsonbHeader(ElementType::TEXT5, len) => {
                self.serialize_string(string, cursor, len, &header.0, true)?
            }
            JsonbHeader(ElementType::INT, len)
            | JsonbHeader(ElementType::INT5, len)
            | JsonbHeader(ElementType::FLOAT, len)
            | JsonbHeader(ElementType::FLOAT5, len) => {
                self.serialize_number(string, cursor, len, &header.0)?
            }

            JsonbHeader(ElementType::TRUE, _) => self.serialize_boolean(string, cursor, true),
            JsonbHeader(ElementType::FALSE, _) => self.serialize_boolean(string, cursor, false),
            JsonbHeader(ElementType::NULL, _) => self.serialize_null(string, cursor),
            JsonbHeader(_, _) => {
                unreachable!();
            }
        };
        Ok(current_cursor)
    }

    fn serialize_object(&self, string: &mut String, cursor: usize, len: usize) -> Result<usize> {
        let end_cursor = cursor + len;
        let mut current_cursor = cursor;
        string.push('{');
        while current_cursor < end_cursor {
            let (key_header, key_header_offset) = self.read_header(current_cursor)?;
            current_cursor += key_header_offset;
            let JsonbHeader(element_type, len) = key_header;

            match element_type {
                ElementType::TEXT
                | ElementType::TEXTRAW
                | ElementType::TEXTJ
                | ElementType::TEXT5 => {
                    current_cursor =
                        self.serialize_string(string, current_cursor, len, &element_type, true)?;
                }
                _ => bail_parse_error!("malformed JSON"),
            }

            string.push(':');
            current_cursor = self.serialize_value(string, current_cursor)?;
            if current_cursor < end_cursor {
                string.push(',');
            }
        }
        string.push('}');
        Ok(current_cursor)
    }

    fn serialize_array(&self, string: &mut String, cursor: usize, len: usize) -> Result<usize> {
        let end_cursor = cursor + len;
        let mut current_cursor = cursor;

        string.push('[');

        while current_cursor < end_cursor {
            current_cursor = self.serialize_value(string, current_cursor)?;
            if current_cursor < end_cursor {
                string.push(',');
            }
        }

        string.push(']');
        Ok(current_cursor)
    }

    fn serialize_string(
        &self,
        string: &mut String,
        cursor: usize,
        len: usize,
        kind: &ElementType,
        quote: bool,
    ) -> Result<usize> {
        let word_slice = &self.data[cursor..cursor + len];
        if quote {
            string.push('"');
        }

        match kind {
            // Can be serialized as is. Do not need escaping
            ElementType::TEXT => {
                let word = from_utf8(word_slice).map_err(|_| {
                    LimboError::ParseError("Failed to serialize string!".to_string())
                })?;
                string.push_str(word);
            }

            // Contain standard json escapes
            ElementType::TEXTJ => {
                let word = from_utf8(word_slice).map_err(|_| {
                    LimboError::ParseError("Failed to serialize string!".to_string())
                })?;
                string.push_str(word);
            }

            // We have to escape some JSON5 escape sequences
            ElementType::TEXT5 => {
                let mut i = 0;
                while i < word_slice.len() {
                    let ch = word_slice[i];

                    // Handle normal characters that don't need escaping
                    if is_json_ok(ch) || ch == b'\'' {
                        string.push(ch as char);
                        i += 1;
                        continue;
                    }

                    // Handle special cases
                    match ch {
                        // Double quotes need escaping
                        b'"' => {
                            string.push_str("\\\"");
                            i += 1;
                        }

                        // Control characters (0x00-0x1F)
                        ch if ch <= 0x1F => {
                            match ch {
                                // \b
                                0x08 => string.push_str("\\b"),
                                b'\t' => string.push_str("\\t"),
                                b'\n' => string.push_str("\\n"),
                                // \f
                                0x0C => string.push_str("\\f"),
                                b'\r' => string.push_str("\\r"),
                                _ => {
                                    // Format as \u00XX
                                    let hex = format!("\\u{:04x}", ch);
                                    string.push_str(&hex);
                                }
                            }
                            i += 1;
                        }

                        // Handle escape sequences
                        b'\\' if i + 1 < word_slice.len() => {
                            let next_ch = word_slice[i + 1];
                            match next_ch {
                                // Single quote
                                b'\'' => {
                                    string.push('\'');
                                    i += 2;
                                }

                                // Vertical tab
                                b'v' => {
                                    string.push_str("\\u0009");
                                    i += 2;
                                }

                                // Hex escapes like \x27
                                b'x' if i + 3 < word_slice.len() => {
                                    string.push_str("\\u00");
                                    string.push(word_slice[i + 2] as char);
                                    string.push(word_slice[i + 3] as char);
                                    i += 4;
                                }

                                // Null character
                                b'0' => {
                                    string.push_str("\\u0000");
                                    i += 2;
                                }

                                // CR line continuation
                                b'\r' => {
                                    if i + 2 < word_slice.len() && word_slice[i + 2] == b'\n' {
                                        i += 3; // Skip CRLF
                                    } else {
                                        i += 2; // Skip CR
                                    }
                                }

                                // LF line continuation
                                b'\n' => {
                                    i += 2;
                                }

                                // Unicode line separators (U+2028 and U+2029)
                                0xe2 if i + 3 < word_slice.len()
                                    && word_slice[i + 2] == 0x80
                                    && (word_slice[i + 3] == 0xa8 || word_slice[i + 3] == 0xa9) =>
                                {
                                    i += 4;
                                }

                                // All other escapes pass through
                                _ => {
                                    string.push('\\');
                                    string.push(next_ch as char);
                                    i += 2;
                                }
                            }
                        }

                        // Default case - just push the character
                        _ => {
                            string.push(ch as char);
                            i += 1;
                        }
                    }
                }
            }

            ElementType::TEXTRAW => {
                let word = from_utf8(word_slice).map_err(|_| {
                    LimboError::ParseError("Failed to serialize string!".to_string())
                })?;

                for ch in word.chars() {
                    match ch {
                        '"' => string.push_str("\\\""),
                        '\\' => string.push_str("\\\\"),
                        '\x08' => string.push_str("\\b"),
                        '\x0C' => string.push_str("\\f"),
                        '\n' => string.push_str("\\n"),
                        '\r' => string.push_str("\\r"),
                        '\t' => string.push_str("\\t"),
                        c if c <= '\u{001F}' => {
                            string.push_str(&format!("\\u{:04x}", c as u32));
                        }
                        _ => string.push(ch),
                    }
                }
            }

            _ => {
                unreachable!()
            }
        }
        if quote {
            string.push('"');
        }

        Ok(cursor + len)
    }

    fn serialize_number(
        &self,
        string: &mut String,
        cursor: usize,
        len: usize,
        kind: &ElementType,
    ) -> Result<usize> {
        let current_cursor = cursor + len;
        let num_slice = from_utf8(&self.data[cursor..current_cursor])
            .map_err(|_| LimboError::ParseError("Failed to parse integer".to_string()))?;

        match kind {
            ElementType::INT | ElementType::FLOAT => {
                string.push_str(num_slice);
            }
            ElementType::INT5 => {
                self.serialize_int5(string, num_slice)?;
            }
            ElementType::FLOAT5 => {
                self.serialize_float5(string, num_slice)?;
            }
            _ => unreachable!(),
        }
        Ok(current_cursor)
    }

    fn serialize_int5(&self, string: &mut String, hex_str: &str) -> Result<()> {
        // Check if number is hex
        if hex_str.len() > 2
            && (hex_str[..2].eq_ignore_ascii_case("0x")
                || (hex_str.starts_with("-") || hex_str.starts_with("+"))
                    && hex_str[1..3].eq_ignore_ascii_case("0x"))
        {
            let (sign, hex_part) = if hex_str.starts_with("-0x") || hex_str.starts_with("-0X") {
                ("-", &hex_str[3..])
            } else if hex_str.starts_with("+0x") || hex_str.starts_with("+0X") {
                ("", &hex_str[3..])
            } else {
                ("", &hex_str[2..])
            };

            // Add sign
            string.push_str(sign);

            let mut value = 0u64;

            for ch in hex_part.chars() {
                if !ch.is_ascii_hexdigit() {
                    bail_parse_error!("Failed to parse hex digit: {}", hex_part);
                }

                if (value >> 60) != 0 {
                    string.push_str("9.0e999");
                    return Ok(());
                }

                value = value * 16 + ch.to_digit(16).unwrap_or(0) as u64;
            }
            write!(string, "{}", value)
                .map_err(|_| LimboError::ParseError("Error writing string to json!".to_string()))?;
        } else {
            string.push_str(hex_str);
        }

        Ok(())
    }

    fn serialize_float5(&self, string: &mut String, float_str: &str) -> Result<()> {
        if float_str.len() < 2 {
            bail_parse_error!("Integer is less then 2 chars: {}", float_str);
        }
        match float_str {
            "9e999" | "-9e999" => {
                string.push_str(float_str);
            }
            val if val.starts_with("-.") => {
                string.push_str("-0.");
                string.push_str(&val[2..]);
            }
            val if val.starts_with("+.") => {
                string.push_str("0.");
                string.push_str(&val[2..]);
            }
            val if val.starts_with(".") => {
                string.push_str("0.");
                string.push_str(&val[1..]);
            }
            val if val
                .chars()
                .next()
                .map_or(false, |c| c.is_ascii_alphanumeric() || c == '+' || c == '-') =>
            {
                string.push_str(val);
                string.push('0');
            }
            _ => bail_parse_error!("Unable to serialize float5: {}", float_str),
        }

        Ok(())
    }

    fn serialize_boolean(&self, string: &mut String, cursor: usize, val: bool) -> usize {
        if val {
            string.push_str("true");
        } else {
            string.push_str("false");
        }

        cursor
    }

    fn serialize_null(&self, string: &mut String, cursor: usize) -> usize {
        string.push_str("null");
        cursor
    }

    fn deserialize_value(&mut self, input: &[u8], mut pos: usize, depth: usize) -> Result<usize> {
        if depth > MAX_JSON_DEPTH {
            bail_parse_error!("Too deep");
        }

        pos = skip_whitespace(input, pos);
        if pos >= input.len() {
            bail_parse_error!("Unexpected end of input")
        }

        match input[pos] {
            b'{' => {
                pos += 1; // consume '{'
                pos = self.deserialize_obj(input, pos, depth + 1)?;
            }
            b'[' => {
                pos += 1; // consume '['
                pos = self.deserialize_array(input, pos, depth + 1)?;
            }
            b't' => {
                pos = self.deserialize_true(input, pos)?;
            }
            b'f' => {
                pos = self.deserialize_false(input, pos)?;
            }
            b'n' => {
                pos = self.deserialize_null_or_nan(input, pos)?;
            }
            b'"' | b'\'' => {
                pos = self.deserialize_string(input, pos)?;
            }
            c if (c >= b'0' && c <= b'9')
                || c == b'-'
                || c == b'+'
                || c == b'.'
                || c.to_ascii_lowercase() == b'i' =>
            {
                pos = self.deserialize_number(input, pos)?;
            }
            _ => {
                bail_parse_error!("Unexpected character: {}", input[pos] as char);
            }
        }

        Ok(pos)
    }

    fn deserialize_obj(&mut self, input: &[u8], mut pos: usize, depth: usize) -> Result<usize> {
        if depth > MAX_JSON_DEPTH {
            bail_parse_error!("Too deep!");
        }
        if self.data.capacity() - self.data.len() < 50 {
            self.data.reserve(self.data.capacity());
        }
        if pos >= input.len() {
            bail_parse_error!("Unexpected end of input");
        }

        let header_pos = self.len();
        self.write_element_header(header_pos, ElementType::OBJECT, 0)?;
        let obj_start = self.len();
        let mut first = true;

        loop {
            pos = skip_whitespace(input, pos);
            if pos >= input.len() {
                bail_parse_error!("Unexpected end of input");
            }

            match input[pos] {
                b'}' => {
                    pos += 1; // consume '}'
                    if first {
                        return Ok(pos);
                    } else {
                        let obj_size = self.len() - obj_start;
                        self.write_element_header(header_pos, ElementType::OBJECT, obj_size)?;
                        return Ok(pos);
                    }
                }
                b',' if !first => {
                    pos += 1; // consume ','
                    pos = skip_whitespace(input, pos);
                    if input[pos] == b',' {
                        bail_parse_error!("2 commas in a row are not allowed")
                    }
                }
                _ => {
                    // Parse key (must be string)
                    pos = self.deserialize_string(input, pos)?;

                    pos = skip_whitespace(input, pos);
                    if pos >= input.len() || input[pos] != b':' {
                        bail_parse_error!("Expected ':' after object key");
                    }
                    pos += 1; // consume ':'

                    pos = skip_whitespace(input, pos);

                    // Parse value - can be any JSON value including another object
                    pos = self.deserialize_value(input, pos, depth + 1)?;
                    pos = skip_whitespace(input, pos);
                    if pos < input.len() && !matches!(input[pos], b',' | b'}') {
                        bail_parse_error!("Should be , or }}")
                    }
                    first = false;
                }
            }
        }
    }

    fn deserialize_array(&mut self, input: &[u8], mut pos: usize, depth: usize) -> Result<usize> {
        if depth > MAX_JSON_DEPTH {
            bail_parse_error!("Too deep");
        }

        let header_pos = self.len();
        self.write_element_header(header_pos, ElementType::ARRAY, 0)?;
        let arr_start = self.len();
        let mut first = true;

        loop {
            pos = skip_whitespace(input, pos);
            if pos >= input.len() {
                bail_parse_error!("Unexpected end of input");
            }

            match input[pos] {
                b']' => {
                    pos += 1; // consume ']'
                    if first {
                        return Ok(pos);
                    } else {
                        let arr_len = self.len() - arr_start;
                        self.write_element_header(header_pos, ElementType::ARRAY, arr_len)?;
                        return Ok(pos);
                    }
                }
                b',' if !first => {
                    pos += 1; // consume ','
                    pos = skip_whitespace(input, pos);
                }
                _ => {
                    pos = skip_whitespace(input, pos);

                    // Parse array element
                    pos = self.deserialize_value(input, pos, depth + 1)?;

                    first = false;
                }
            }
        }
    }

    fn deserialize_string(&mut self, input: &[u8], mut pos: usize) -> Result<usize> {
        if pos >= input.len() {
            bail_parse_error!("Unexpected end of input");
        }

        let string_start = self.len();
        let quote = input[pos];
        pos += 1; // consume quote

        let quoted = quote == b'"' || quote == b'\'';
        let mut len = 0;

        // Write placeholder header to be updated later
        self.write_element_header(string_start, ElementType::TEXT, 0)?;

        if pos >= input.len() {
            bail_parse_error!("Unexpected end of input in string");
        }

        let mut element_type = ElementType::TEXT;

        // Special case for unquoted JSON5 keys (identifiers)
        if !quoted {
            self.data.push(quote);
            len += 1;

            if pos < input.len() && input[pos] == b':' {
                self.write_element_header(string_start, element_type, len)?;
                return Ok(pos);
            }
        }

        let mut escape_buffer = [0u8; 6]; // Buffer for escape sequences

        while pos < input.len() {
            let c = input[pos];
            pos += 1;

            if quoted && c == quote {
                break; // End of string
            } else if !quoted && (c == b'"' || c == b'\'') {
                bail_parse_error!("Something gone wrong")
            } else if c == b'\\' {
                // Handle escape sequences
                if pos >= input.len() {
                    bail_parse_error!("Unexpected end of input in escape sequence");
                }

                let esc = input[pos];
                pos += 1;

                match esc {
                    b'b' => {
                        self.data.extend_from_slice(b"\\b");
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b'f' => {
                        self.data.extend_from_slice(b"\\f");
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b'n' => {
                        self.data.extend_from_slice(b"\\n");
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b'r' => {
                        self.data.extend_from_slice(b"\\r");
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b't' => {
                        self.data.extend_from_slice(b"\\t");
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b'\\' | b'"' | b'/' => {
                        self.data.push(b'\\');
                        self.data.push(esc);
                        len += 2;
                        element_type = ElementType::TEXTJ;
                    }
                    b'u' => {
                        // Unicode escape sequence
                        if pos + 4 > input.len() {
                            bail_parse_error!("Incomplete Unicode escape sequence");
                        }

                        escape_buffer[0] = b'\\';
                        escape_buffer[1] = b'u';

                        for i in 0..4 {
                            let h = input[pos + i];
                            if !is_hex_digit(h) {
                                bail_parse_error!("Invalid Unicode escape sequence");
                            }
                            escape_buffer[2 + i] = h;
                        }

                        self.data.extend_from_slice(&escape_buffer[0..6]);
                        len += 6;
                        pos += 4;
                        element_type = ElementType::TEXTJ;
                    }
                    // JSON5 extensions
                    b'\n' => {
                        self.data.extend_from_slice(b"\\\n");
                        len += 2;
                        element_type = ElementType::TEXT5;
                    }
                    b'\'' => {
                        self.data.extend_from_slice(b"\\\'");
                        len += 2;
                        element_type = ElementType::TEXT5;
                    }
                    b'0' => {
                        self.data.extend_from_slice(b"\\0");
                        len += 2;
                        element_type = ElementType::TEXT5;
                    }
                    b'v' => {
                        self.data.extend_from_slice(b"\\v");
                        len += 2;
                        element_type = ElementType::TEXT5;
                    }
                    b'x' => {
                        // Hex escape sequence (JSON5)
                        if pos + 2 > input.len() {
                            bail_parse_error!("Incomplete hex escape sequence");
                        }

                        escape_buffer[0] = b'\\';
                        escape_buffer[1] = b'x';

                        for i in 0..2 {
                            let h = input[pos + i];
                            if !is_hex_digit(h) {
                                bail_parse_error!("Invalid hex escape sequence");
                            }
                            escape_buffer[2 + i] = h;
                        }

                        self.data.extend_from_slice(&escape_buffer[0..4]);
                        len += 4;
                        pos += 2;
                        element_type = ElementType::TEXT5;
                    }

                    _ => {
                        bail_parse_error!("Invalid escape sequence: \\{}", esc as char);
                    }
                }
            } else if !quoted && (c == b':' || c.is_ascii_whitespace()) {
                // End of unquoted identifier
                pos -= 1; // Put back the terminating character
                break;
            } else if c <= 0x1F {
                // Control character
                element_type = ElementType::TEXT5;
                self.data.push(c);
                len += 1;
            } else {
                // Normal character
                self.data.push(c);
                len += 1;
            }
        }

        // Write final header with correct type and size
        self.write_element_header(string_start, element_type, len)?;

        Ok(pos)
    }

    fn deserialize_number(&mut self, input: &[u8], mut pos: usize) -> Result<usize> {
        let num_start = self.len();
        let start_pos = pos;
        let mut len = 0;
        let mut is_float = false;
        let mut is_json5 = false;

        // Write placeholder header
        self.write_element_header(num_start, ElementType::INT, 0)?;

        // Handle sign
        if pos < input.len() && (input[pos] == b'-' || input[pos] == b'+') {
            if input[pos] == b'+' {
                is_json5 = true;
                pos += 1;
            } else {
                self.data.push(input[pos]);
                pos += 1;
                len += 1;
            }
        }

        // Handle JSON5 float starting with dot
        if pos < input.len() && input[pos] == b'.' {
            is_json5 = true;
            is_float = true;
        }

        // Check for hex (JSON5)
        if pos < input.len() && input[pos] == b'0' && pos + 1 < input.len() {
            self.data.push(input[pos]);
            pos += 1;
            len += 1;

            if pos < input.len() && (input[pos] == b'x' || input[pos] == b'X') {
                // Hexadecimal number
                self.data.push(input[pos]);
                pos += 1;
                len += 1;

                let mut has_digit = false;
                while pos < input.len() && is_hex_digit(input[pos]) {
                    self.data.push(input[pos]);
                    pos += 1;
                    len += 1;
                    has_digit = true;
                }

                if !has_digit {
                    bail_parse_error!("Invalid hex number: no digits after 0x");
                }

                self.write_element_header(num_start, ElementType::INT5, len)?;
                return Ok(pos);
            } else if pos < input.len() && input[pos].is_ascii_digit() {
                // Leading zero followed by digit is not allowed in standard JSON
                bail_parse_error!("Leading zero is not allowed in number");
            }
        }

        // Check for Infinity
        if pos < input.len() && (input[pos] == b'I' || input[pos] == b'i') {
            // Try to match "Infinity"
            let infinity = b"infinity";
            let mut i = 0;

            while i < infinity.len() && pos + i < input.len() {
                if input[pos + i].to_ascii_lowercase() != infinity[i] {
                    bail_parse_error!("Invalid number: expected Infinity");
                }
                i += 1;
            }

            if i < infinity.len() {
                bail_parse_error!("Invalid number: incomplete Infinity");
            }

            pos += infinity.len();

            // Write Infinity as 9e999
            self.data.extend_from_slice(b"9e999");
            self.write_element_header(
                num_start,
                ElementType::FLOAT5,
                len + INFINITY_CHAR_COUNT as usize,
            )?;

            return Ok(pos);
        }

        // Regular number parsing
        while pos < input.len() {
            match input[pos] {
                b'0'..=b'9' => {
                    self.data.push(input[pos]);
                    pos += 1;
                    len += 1;
                }
                b'.' => {
                    is_float = true;
                    self.data.push(input[pos]);
                    pos += 1;
                    len += 1;

                    // Check for trailing dot
                    if pos >= input.len() || !input[pos].is_ascii_digit() {
                        is_json5 = true;
                    }
                }
                b'e' | b'E' => {
                    is_float = true;
                    self.data.push(input[pos]);
                    pos += 1;
                    len += 1;

                    // Optional sign after exponent
                    if pos < input.len() && (input[pos] == b'+' || input[pos] == b'-') {
                        self.data.push(input[pos]);
                        pos += 1;
                        len += 1;
                    }
                }
                _ => break,
            }
        }

        // No digits found
        if len == 0 && (!is_json5 || !is_float) {
            bail_parse_error!("Invalid number at position {}", start_pos);
        }

        // Determine the appropriate element type
        let element_type = if is_float {
            if is_json5 {
                ElementType::FLOAT5
            } else {
                ElementType::FLOAT
            }
        } else {
            if is_json5 {
                ElementType::INT5
            } else {
                ElementType::INT
            }
        };

        self.write_element_header(num_start, element_type, len)?;

        Ok(pos)
    }

    fn deserialize_true(&mut self, input: &[u8], mut pos: usize) -> Result<usize> {
        let true_lit = b"true";
        for i in 0..true_lit.len() {
            if pos + i >= input.len() || input[pos + i] != true_lit[i] {
                bail_parse_error!("Expected 'true'");
            }
        }

        pos += true_lit.len();
        self.data.push(ElementType::TRUE as u8);

        Ok(pos)
    }

    fn deserialize_false(&mut self, input: &[u8], mut pos: usize) -> Result<usize> {
        let false_lit = b"false";
        for i in 0..false_lit.len() {
            if pos + i >= input.len() || input[pos + i] != false_lit[i] {
                bail_parse_error!("Expected 'false'");
            }
        }

        pos += false_lit.len();
        self.data.push(ElementType::FALSE as u8);

        Ok(pos)
    }

    pub fn deserialize_null_or_nan(&mut self, input: &[u8], mut pos: usize) -> Result<usize> {
        // First check if we have enough bytes remaining
        if pos + 3 >= input.len() {
            bail_parse_error!("Unexpected end of input, expected 'null' or 'nan'");
        }

        // Fast path for "null"
        if pos + 4 <= input.len()
            && input[pos] == b'n'
            && input[pos + 1] == b'u'
            && input[pos + 2] == b'l'
            && input[pos + 3] == b'l'
        {
            pos += 4;
            self.data.push(ElementType::NULL as u8);
            return Ok(pos);
        }

        // Fast path for "nan"
        if pos + 3 <= input.len()
            && (input[pos] == b'n' || input[pos] == b'N')
            && (input[pos + 1] == b'a' || input[pos + 1] == b'A')
            && (input[pos + 2] == b'n' || input[pos + 2] == b'N')
        {
            pos += 3;
            self.data.push(ElementType::NULL as u8);
            return Ok(pos);
        }

        // If we get here, we didn't match either pattern
        bail_parse_error!("Expected 'null' or 'nan'");
    }

    fn write_element_header(
        &mut self,
        cursor: usize,
        element_type: ElementType,
        payload_size: usize,
    ) -> Result<usize> {
        if payload_size <= 11 {
            let header_byte = (element_type as u8) | ((payload_size as u8) << 4);
            if cursor == self.len() {
                self.data.push(header_byte);
            } else {
                self.data[cursor] = header_byte;
            }
            return Ok(1);
        }

        let header = JsonbHeader::new(element_type, payload_size).into_bytes();

        let header_bytes = header.as_bytes();
        let header_len = header_bytes.len();
        if cursor == self.len() {
            self.data.extend_from_slice(header_bytes);
        } else {
            // Calculate difference in length
            let old_len = 1; // We're replacing 1 byte
            let new_len = header_bytes.len();
            let diff = new_len as isize - old_len as isize;

            // Resize the Vec if needed
            if diff > 0 {
                // Need to make room
                self.data.resize(self.data.len() + diff as usize, 0);

                // Shift data after cursor to the right
                unsafe {
                    let ptr = self.data.as_mut_ptr();
                    std::ptr::copy(
                        ptr.add(cursor + old_len),
                        ptr.add(cursor + new_len),
                        self.data.len() - cursor - new_len,
                    );
                }
            } else if diff < 0 {
                // Need to shrink
                unsafe {
                    let ptr = self.data.as_mut_ptr();
                    std::ptr::copy(
                        ptr.add(cursor + old_len),
                        ptr.add(cursor + new_len),
                        self.data.len() - cursor - old_len,
                    );
                }
            }

            // Copy the header bytes
            for (i, &byte) in header_bytes.iter().enumerate() {
                self.data[cursor + i] = byte;
            }
        }
        Ok(header_len)
    }

    fn from_str(input: &str) -> Result<Self> {
        let mut result = Self::new(input.len(), None);
        let input = input.as_bytes();

        if input.is_empty() {
            bail_parse_error!("Empty input");
        }

        // Parse the first complete JSON value
        let mut pos = 0;
        pos = result.deserialize_value(input, pos, 0)?;

        // Skip any trailing whitespace
        pos = skip_whitespace(input, pos);

        // Check for any non-whitespace characters after the JSON value
        if pos < input.len() {
            bail_parse_error!("Unexpected trailing content after JSON value");
        }

        Ok(result)
    }

    pub fn from_raw_data(data: &[u8]) -> Self {
        Self::new(data.len(), Some(data))
    }

    pub fn data(self) -> Vec<u8> {
        self.data
    }

    pub fn get_by_path(&self, path: &JsonPath) -> Result<(Jsonb, ElementType)> {
        let mut pos = 0;
        let mut string_buffer = String::with_capacity(1024);
        for segment in path.elements.iter() {
            pos = self.navigate_to_segment(segment, pos, &mut string_buffer)?;
        }
        let (header, skip_header) = self.read_header(pos)?;
        let end = pos + skip_header + header.1;
        Ok((Jsonb::from_raw_data(&self.data[pos..end]), header.0))
    }

    pub fn get_by_path_raw(&self, path: &JsonPath) -> Result<&[u8]> {
        let mut pos = 0;
        let mut string_buffer = String::with_capacity(1024);
        for segment in path.elements.iter() {
            pos = self.navigate_to_segment(segment, pos, &mut string_buffer)?;
        }
        let (header, skip_header) = self.read_header(pos)?;
        let end = pos + skip_header + header.1;
        Ok(&self.data[pos..end])
    }

    pub fn array_len(&self) -> Result<usize> {
        let (header, header_skip) = self.read_header(0)?;
        if header.0 != ElementType::ARRAY {
            return Ok(0);
        }

        let mut count = 0;
        let mut pos = header_skip;
        while pos < header_skip + header.1 {
            pos = self.skip_element(pos)?;
            count += 1;
        }

        Ok(count)
    }

    fn navigate_to_segment(
        &self,
        segment: &PathElement,
        pos: usize,
        string_buffer: &mut String,
    ) -> Result<usize> {
        let (header, skip_header) = self.read_header(pos)?;
        let (parent_type, parent_size) = (header.0, header.1);
        let mut current_pos = pos + skip_header;

        match segment {
            PathElement::Root() => return Ok(0),
            PathElement::Key(path_key, is_raw) => {
                if parent_type != ElementType::OBJECT {
                    bail_parse_error!("parent is not object")
                };
                while current_pos < pos + parent_size {
                    let (key_header, skip_header) = self.read_header(current_pos)?;
                    let (key_type, key_size) = (key_header.0, key_header.1);
                    current_pos += skip_header;

                    if matches!(
                        key_header.0,
                        ElementType::TEXT
                            | ElementType::TEXT5
                            | ElementType::TEXTJ
                            | ElementType::TEXTRAW
                    ) {
                        string_buffer.clear();
                        current_pos = self.serialize_string(
                            string_buffer,
                            current_pos,
                            key_size,
                            &key_type,
                            false,
                        )?;

                        if compare((&string_buffer, key_type), (path_key, *is_raw)) {
                            return Ok(current_pos);
                        } else {
                            current_pos = self.skip_element(current_pos)?;
                        }
                    } else {
                        bail_parse_error!("Key is not text!")
                    };
                }
            }
            PathElement::ArrayLocator(idx) => {
                if parent_type != ElementType::ARRAY {
                    bail_parse_error!("parent is not array");
                };
                if let Some(id) = idx {
                    let id = id.to_owned();

                    if id >= 0 {
                        for _ in 0..id as usize {
                            if current_pos < pos + parent_size {
                                current_pos = self.skip_element(current_pos)?;
                            } else {
                                bail_parse_error!("Index is bigger then array size");
                            }
                        }
                        return Ok(current_pos);
                        // fix this after we remove serialized json
                    } else {
                        let mut temp_pos = current_pos;
                        let mut count_elements = 0;
                        while temp_pos < pos + parent_size {
                            temp_pos = self.skip_element(temp_pos)?;
                            count_elements += 1;
                        }
                        let corrected_idx = count_elements + id;
                        if corrected_idx < 0 {
                            bail_parse_error!("Index is bigger then array size")
                        }
                        for _ in 0..corrected_idx as usize {
                            if current_pos < pos + parent_size {
                                current_pos = self.skip_element(current_pos)?;
                            } else {
                                bail_parse_error!("Index is bigger then array size");
                            }
                        }
                        return Ok(current_pos);
                    }
                } else {
                    return Ok(pos);
                }
            }
        }

        bail_parse_error!("Did not find anything")
    }

    fn skip_element(&self, mut pos: usize) -> Result<usize> {
        let (header, skip_header) = self.read_header(pos)?;
        pos += skip_header + header.1;
        Ok(pos)
    }
}

impl std::str::FromStr for Jsonb {
    type Err = LimboError;

    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        Self::from_str(s)
    }
}

#[inline]
fn compare(key: (&str, ElementType), path_key: (&str, bool)) -> bool {
    let (key, element_type) = key;
    let (path_key, is_raw) = path_key;
    if !is_raw && element_type == ElementType::TEXT {
        if key.len() == path_key.len() {
            return key == path_key;
        } else {
            return false;
        }
    }
    if !is_raw {
        return unescape_string(key) == path_key;
    }
    match element_type {
        ElementType::TEXTJ | ElementType::TEXT5 | ElementType::TEXTRAW | ElementType::TEXT => {
            return unescape_string(key) == unescape_string(path_key);
        }
        _ => {}
    };

    return false;
}

#[inline]
pub fn unescape_string(input: &str) -> String {
    let mut result = String::with_capacity(input.len());
    let mut chars = input.chars().peekable();
    let mut code_point = String::with_capacity(5);

    while let Some(c) = chars.next() {
        if c == '\\' {
            match chars.next() {
                Some('n') => result.push('\n'),
                Some('r') => result.push('\r'),
                Some('t') => result.push('\t'),
                Some('\\') => result.push('\\'),
                Some('/') => result.push('/'),
                Some('"') => result.push('"'),
                Some('b') => result.push('\u{0008}'),
                Some('f') => result.push('\u{000C}'),
                Some('x') => {
                    code_point.clear();
                    for _ in 0..2 {
                        if let Some(hex_char) = chars.next() {
                            code_point.push(hex_char);
                        } else {
                            break;
                        }
                    }
                    if let Ok(code) = u16::from_str_radix(&code_point, 16) {
                        if let Some(ch) = char::from_u32(code as u32) {
                            result.push(ch)
                        }
                    }
                }
                // Handle \uXXXX format (JSON style)
                Some('u') => {
                    code_point.clear();
                    for _ in 0..4 {
                        if let Some(hex_char) = chars.next() {
                            code_point.push(hex_char);
                        } else {
                            break;
                        }
                    }

                    if let Ok(code) = u16::from_str_radix(&code_point, 16) {
                        // Check if this is a high surrogate
                        if matches!(code, 0xD800..=0xDBFF) {
                            if chars.next() == Some('\\') && chars.next() == Some('u') {
                                code_point.clear();
                                for _ in 0..4 {
                                    if let Some(hex_char) = chars.next() {
                                        code_point.push(hex_char);
                                    } else {
                                        break;
                                    }
                                }

                                if let Ok(low_code) = u16::from_str_radix(&code_point, 16) {
                                    if (0xDC00..=0xDFFF).contains(&low_code) {
                                        let high_ten_bits = (code - 0xD800) as u32;
                                        let low_ten_bits = (low_code - 0xDC00) as u32;
                                        let code_point = (high_ten_bits << 10) | low_ten_bits;
                                        let unicode_value = code_point + 0x10000;

                                        if let Some(unicode_char) = char::from_u32(unicode_value) {
                                            result.push(unicode_char);
                                        }
                                    } else {
                                        // If low surrogate is invalid, just push both as separate chars
                                        if let Some(c1) = char::from_u32(code as u32) {
                                            result.push(c1);
                                        }
                                        if let Some(c2) = char::from_u32(low_code as u32) {
                                            result.push(c2);
                                        }
                                    }
                                }
                            } else {
                                // No low surrogate, just push the high surrogate as is
                                if let Some(unicode_char) = char::from_u32(code as u32) {
                                    result.push(unicode_char);
                                }
                            }
                        } else {
                            // Not a surrogate pair, just a regular Unicode character
                            if let Some(unicode_char) = char::from_u32(code as u32) {
                                result.push(unicode_char);
                            }
                        }
                    }
                }

                Some(c) => {
                    // For any other escape sequence we don't recognize,
                    // just output the backslash and the character
                    result.push('\\');
                    result.push(c);
                }
                None => {
                    // Handle trailing backslash
                    result.push('\\');
                }
            }
        } else {
            result.push(c);
        }
    }

    result
}

#[inline]
pub fn skip_whitespace(input: &[u8], mut pos: usize) -> usize {
    let len = input.len();
    if pos >= len {
        return pos;
    }

    // Fast path for non-whitespace, non-comment
    if (WS_TABLE[input[pos] as usize] & 1) == 0 && input[pos] != b'/' {
        return pos;
    }

    // Process whitespace and comments
    while pos < len {
        let ch = input[pos];
        if (WS_TABLE[ch as usize] & 1) != 0 {
            // Skip whitespace
            pos += 1;
        } else if ch == b'/' && pos + 1 < len {
            // Handle JSON5 comments
            match input[pos + 1] {
                b'/' => {
                    // Line comment - skip until newline
                    pos += 2;
                    while pos < len && input[pos] != b'\n' {
                        pos += 1;
                    }
                    if pos < len {
                        pos += 1; // Skip the newline
                    }
                }
                b'*' => {
                    // Block comment - skip until "*/"
                    pos += 2;
                    while pos + 1 < len {
                        if input[pos] == b'*' && input[pos + 1] == b'/' {
                            pos += 2;
                            break;
                        }
                        pos += 1;
                    }
                }
                _ => {
                    // Not a comment
                    break;
                }
            }
        } else {
            // Not whitespace or comment
            break;
        }
    }

    pos
}

#[inline]
fn is_hex_digit(ch: u8) -> bool {
    (CHARACTER_TYPE[ch as usize] & 3) == 2 || (CHARACTER_TYPE[ch as usize] & 3) == 3
}

#[inline]
fn is_json_ok(ch: u8) -> bool {
    (CHARACTER_TYPE_OK[ch as usize] & 4) != 0
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_null_serialization() {
        // Create JSONB with null value
        let mut jsonb = Jsonb::new(10, None);
        jsonb.data.push(ElementType::NULL as u8);

        // Test serialization
        let json_str = jsonb.to_string().unwrap();
        assert_eq!(json_str, "null");

        // Test round-trip
        let reparsed = Jsonb::from_str("null").unwrap();
        assert_eq!(reparsed.data[0] as u8, ElementType::NULL as u8);
    }

    #[test]
    fn test_boolean_serialization() {
        // True
        let mut jsonb_true = Jsonb::new(10, None);
        jsonb_true.data.push(ElementType::TRUE as u8);
        assert_eq!(jsonb_true.to_string().unwrap(), "true");

        // False
        let mut jsonb_false = Jsonb::new(10, None);
        jsonb_false.data.push(ElementType::FALSE as u8);
        assert_eq!(jsonb_false.to_string().unwrap(), "false");

        // Round-trip
        let true_parsed = Jsonb::from_str("true").unwrap();
        assert_eq!(true_parsed.data[0] as u8, ElementType::TRUE as u8);

        let false_parsed = Jsonb::from_str("false").unwrap();
        assert_eq!(false_parsed.data[0] as u8, ElementType::FALSE as u8);
    }

    #[test]
    fn test_integer_serialization() {
        // Standard integer
        let parsed = Jsonb::from_str("42").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "42");

        // Negative integer
        let parsed = Jsonb::from_str("-123").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "-123");

        // Zero
        let parsed = Jsonb::from_str("0").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "0");

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::INT));
    }

    #[test]
    fn test_json5_integer_serialization() {
        // Hexadecimal notation
        let parsed = Jsonb::from_str("0x1A").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "26"); // Should convert to decimal

        // Positive sign (JSON5)
        let parsed = Jsonb::from_str("+42").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "42");

        // Negative hexadecimal
        let parsed = Jsonb::from_str("-0xFF").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "-255");

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::INT5));
    }

    #[test]
    fn test_float_serialization() {
        // Standard float
        let parsed = Jsonb::from_str("3.14159").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "3.14159");

        // Negative float
        let parsed = Jsonb::from_str("-2.718").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "-2.718");

        // Scientific notation
        let parsed = Jsonb::from_str("6.022e23").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "6.022e23");

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::FLOAT));
    }

    #[test]
    fn test_json5_float_serialization() {
        // Leading decimal point
        let parsed = Jsonb::from_str(".123").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "0.123");

        // Trailing decimal point
        let parsed = Jsonb::from_str("42.").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "42.0");

        // Plus sign in exponent
        let parsed = Jsonb::from_str("1.5e+10").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "1.5e+10");

        // Infinity
        let parsed = Jsonb::from_str("Infinity").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "9e999");

        // Negative Infinity
        let parsed = Jsonb::from_str("-Infinity").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "-9e999");

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::FLOAT5));
    }

    #[test]
    fn test_string_serialization() {
        // Simple string
        let parsed = Jsonb::from_str(r#""hello world""#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""hello world""#);

        // String with escaped characters
        let parsed = Jsonb::from_str(r#""hello\nworld""#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""hello\nworld""#);

        // Unicode escape
        let parsed = Jsonb::from_str(r#""hello\u0020world""#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""hello\u0020world""#);

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::TEXTJ));
    }

    #[test]
    fn test_json5_string_serialization() {
        // Single quotes
        let parsed = Jsonb::from_str("'hello world'").unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""hello world""#);

        // Hex escape
        let parsed = Jsonb::from_str(r#"'\x41\x42\x43'"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""\u0041\u0042\u0043""#);

        // Multiline string with line continuation
        let parsed = Jsonb::from_str(
            r#""hello \
world""#,
        )
        .unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""hello world""#);

        // Escaped single quote
        let parsed = Jsonb::from_str(r#"'Don\'t worry'"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#""Don't worry""#);

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::TEXT5));
    }

    #[test]
    fn test_array_serialization() {
        // Empty array
        let parsed = Jsonb::from_str("[]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[]");

        // Simple array
        let parsed = Jsonb::from_str("[1,2,3]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[1,2,3]");

        // Nested array
        let parsed = Jsonb::from_str("[[1,2],[3,4]]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[[1,2],[3,4]]");

        // Mixed types array
        let parsed = Jsonb::from_str(r#"[1,"text",true,null,{"key":"value"}]"#).unwrap();
        assert_eq!(
            parsed.to_string().unwrap(),
            r#"[1,"text",true,null,{"key":"value"}]"#
        );

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::ARRAY));
    }

    #[test]
    fn test_json5_array_serialization() {
        // Trailing comma
        let parsed = Jsonb::from_str("[1,2,3,]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[1,2,3]");

        // Comments in array
        let parsed = Jsonb::from_str("[1,/* comment */2,3]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[1,2,3]");

        // Line comment in array
        let parsed = Jsonb::from_str("[1,// line comment\n2,3]").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[1,2,3]");
    }

    #[test]
    fn test_object_serialization() {
        // Empty object
        let parsed = Jsonb::from_str("{}").unwrap();
        assert_eq!(parsed.to_string().unwrap(), "{}");

        // Simple object
        let parsed = Jsonb::from_str(r#"{"key":"value"}"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"key":"value"}"#);

        // Multiple properties
        let parsed = Jsonb::from_str(r#"{"a":1,"b":2,"c":3}"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"a":1,"b":2,"c":3}"#);

        // Nested object
        let parsed = Jsonb::from_str(r#"{"outer":{"inner":"value"}}"#).unwrap();
        assert_eq!(
            parsed.to_string().unwrap(),
            r#"{"outer":{"inner":"value"}}"#
        );

        // Mixed values
        let parsed =
            Jsonb::from_str(r#"{"str":"text","num":42,"bool":true,"null":null,"arr":[1,2]}"#)
                .unwrap();
        assert_eq!(
            parsed.to_string().unwrap(),
            r#"{"str":"text","num":42,"bool":true,"null":null,"arr":[1,2]}"#
        );

        // Verify correct type
        let header = JsonbHeader::from_slice(0, &parsed.data).unwrap().0;
        assert!(matches!(header.0, ElementType::OBJECT));
    }

    #[test]
    fn test_json5_object_serialization() {
        // Unquoted keys
        let parsed = Jsonb::from_str("{key:\"value\"}").unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"key":"value"}"#);

        // Trailing comma
        let parsed = Jsonb::from_str(r#"{"a":1,"b":2,}"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"a":1,"b":2}"#);

        // Comments in object
        let parsed = Jsonb::from_str(r#"{"a":1,/*comment*/"b":2}"#).unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"a":1,"b":2}"#);

        // Single quotes for keys and values
        let parsed = Jsonb::from_str("{'a':'value'}").unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"a":"value"}"#);
    }

    #[test]
    fn test_complex_json() {
        let complex_json = r#"{
            "string": "Hello, world!",
            "number": 42,
            "float": 3.14159,
            "boolean": true,
            "null": null,
            "array": [1, 2, 3, "text", {"nested": "object"}],
            "object": {
                "key1": "value1",
                "key2": [4, 5, 6],
                "key3": {
                    "nested": true
                }
            }
        }"#;

        let parsed = Jsonb::from_str(complex_json).unwrap();
        // Round-trip test
        let reparsed = Jsonb::from_str(&parsed.to_string().unwrap()).unwrap();
        assert_eq!(parsed.to_string().unwrap(), reparsed.to_string().unwrap());
    }

    #[test]
    fn test_error_handling() {
        // Invalid JSON syntax
        assert!(Jsonb::from_str("{").is_err());
        assert!(Jsonb::from_str("[").is_err());
        assert!(Jsonb::from_str("}").is_err());
        assert!(Jsonb::from_str("]").is_err());

        assert!(Jsonb::from_str(r#"{"a":"55,"b":72}"#).is_err());

        assert!(Jsonb::from_str(r#"{"a":"55",,"b":72}"#).is_err());

        // Unclosed string
        assert!(Jsonb::from_str(r#"{"key":"value"#).is_err());

        // Invalid number format
        assert!(Jsonb::from_str("01234").is_err()); // Leading zero not allowed in JSON

        // Invalid escape sequence
        assert!(Jsonb::from_str(r#""\z""#).is_err());

        // Missing colon in object
        assert!(Jsonb::from_str(r#"{"key" "value"}"#).is_err());

        // Trailing characters
        assert!(Jsonb::from_str(r#"{"key":"value"} extra"#).is_err());
    }

    #[test]
    fn test_depth_limit() {
        // Create a JSON string that exceeds MAX_JSON_DEPTH
        let mut deep_json = String::from("[");
        for _ in 0..MAX_JSON_DEPTH + 1 {
            deep_json.push_str("[");
        }
        for _ in 0..MAX_JSON_DEPTH + 1 {
            deep_json.push_str("]");
        }
        deep_json.push_str("]");

        // Should fail due to exceeding depth limit
        assert!(Jsonb::from_str(&deep_json).is_err());
    }

    #[test]
    fn test_header_encoding() {
        // Small payload (fits in 4 bits)
        let header = JsonbHeader::new(ElementType::TEXT, 5);
        let bytes = header.into_bytes().as_bytes().to_vec();
        assert_eq!(bytes[0], (5 << 4) | (ElementType::TEXT as u8));

        // Medium payload (8-bit)
        let header = JsonbHeader::new(ElementType::TEXT, 200);
        let bytes = header.into_bytes().as_bytes().to_vec();
        assert_eq!(
            bytes[0],
            (SIZE_MARKER_8BIT << 4) | (ElementType::TEXT as u8)
        );
        assert_eq!(bytes[1], 200);

        // Large payload (16-bit)
        let header = JsonbHeader::new(ElementType::TEXT, 40000);
        let bytes = header.into_bytes().as_bytes().to_vec();
        assert_eq!(
            bytes[0],
            (SIZE_MARKER_16BIT << 4) | (ElementType::TEXT as u8)
        );
        assert_eq!(bytes[1], (40000 >> 8) as u8);
        assert_eq!(bytes[2], (40000 & 0xFF) as u8);

        // Extra large payload (32-bit)
        let header = JsonbHeader::new(ElementType::TEXT, 70000);
        let bytes = header.into_bytes().as_bytes().to_vec();
        assert_eq!(
            bytes[0],
            (SIZE_MARKER_32BIT << 4) | (ElementType::TEXT as u8)
        );
        assert_eq!(bytes[1], (70000 >> 24) as u8);
        assert_eq!(bytes[2], ((70000 >> 16) & 0xFF) as u8);
        assert_eq!(bytes[3], ((70000 >> 8) & 0xFF) as u8);
        assert_eq!(bytes[4], (70000 & 0xFF) as u8);
    }

    #[test]
    fn test_header_decoding() {
        // Create sample data with various headers
        let data = vec![
            (5 << 4) | (ElementType::TEXT as u8),
            (SIZE_MARKER_8BIT << 4) | (ElementType::ARRAY as u8),
            150,
            (SIZE_MARKER_16BIT << 4) | (ElementType::OBJECT as u8),
            0x98,
            0x68,
        ];

        // Parse and verify each header
        let (header1, offset1) = JsonbHeader::from_slice(0, &data).unwrap();
        assert_eq!(offset1, 1);
        assert_eq!(header1.0, ElementType::TEXT);
        assert_eq!(header1.1, 5);

        let (header2, offset2) = JsonbHeader::from_slice(1, &data).unwrap();
        assert_eq!(offset2, 2);
        assert_eq!(header2.0, ElementType::ARRAY);
        assert_eq!(header2.1, 150);

        let (header3, offset3) = JsonbHeader::from_slice(3, &data).unwrap();
        assert_eq!(offset3, 3);
        assert_eq!(header3.0, ElementType::OBJECT);
        assert_eq!(header3.1, 0x9868); // 39000
    }

    #[test]
    fn test_unicode_escapes() {
        // Basic unicode escape
        let parsed = Jsonb::from_str(r#""\u00A9""#).unwrap(); // Copyright symbol
        assert_eq!(parsed.to_string().unwrap(), r#""\u00A9""#);

        // Non-BMP character (surrogate pair)
        let parsed = Jsonb::from_str(r#""\uD83D\uDE00""#).unwrap(); // Smiley emoji
        assert_eq!(parsed.to_string().unwrap(), r#""\uD83D\uDE00""#);
    }

    #[test]
    fn test_json5_comments() {
        // Line comments
        let parsed = Jsonb::from_str(
            r#"{
            // This is a line comment
            "key": "value"
        }"#,
        )
        .unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"key":"value"}"#);

        // Block comments
        let parsed = Jsonb::from_str(
            r#"{
            /* This is a
               block comment */
            "key": "value"
        }"#,
        )
        .unwrap();
        assert_eq!(parsed.to_string().unwrap(), r#"{"key":"value"}"#);

        // Comments inside array
        let parsed = Jsonb::from_str(
            r#"[1, // Comment
                                       2, /* Another comment */ 3]"#,
        )
        .unwrap();
        assert_eq!(parsed.to_string().unwrap(), "[1,2,3]");
    }

    #[test]
    fn test_whitespace_handling() {
        // Various whitespace patterns
        let json_with_whitespace = r#"
        {
            "key1"    :    "value1"   ,
             "key2": [   1,    2,    3   ]  ,
            "key3":   {
                "nested"   :   true
            }
        }
        "#;

        let parsed = Jsonb::from_str(json_with_whitespace).unwrap();
        assert_eq!(
            parsed.to_string().unwrap(),
            r#"{"key1":"value1","key2":[1,2,3],"key3":{"nested":true}}"#
        );
    }

    #[test]
    fn test_binary_roundtrip() {
        // Test that binary data can be round-tripped through the JSONB format
        let original = r#"{"test":"value","array":[1,2,3]}"#;
        let parsed = Jsonb::from_str(original).unwrap();
        let binary_data = parsed.data.clone();

        // Create a new Jsonb from the binary data
        let from_binary = Jsonb::new(0, Some(&binary_data));
        assert_eq!(from_binary.to_string().unwrap(), original);
    }

    #[test]
    fn test_large_json() {
        // Generate a large JSON with many elements
        let mut large_array = String::from("[");
        for i in 0..1000 {
            large_array.push_str(&format!("{}", i));
            if i < 999 {
                large_array.push_str(",");
            }
        }
        large_array.push_str("]");

        let parsed = Jsonb::from_str(&large_array).unwrap();
        assert!(parsed.to_string().unwrap().starts_with("[0,1,2,"));
        assert!(parsed.to_string().unwrap().ends_with("998,999]"));
    }

    #[test]
    fn test_jsonb_is_valid() {
        // Valid JSONB
        let jsonb = Jsonb::from_str(r#"{"test":"value"}"#).unwrap();
        assert!(jsonb.is_valid().is_ok());

        // Invalid JSONB (manually corrupted)
        let mut invalid = jsonb.data.clone();
        if !invalid.is_empty() {
            invalid[0] = 0xFF; // Invalid element type
            let jsonb = Jsonb::new(0, Some(&invalid));
            assert!(jsonb.is_valid().is_err());
        }
    }

    #[test]
    fn test_special_characters_in_strings() {
        // Test handling of various special characters
        let json = r#"{
            "escaped_quotes": "He said \"Hello\"",
            "backslashes": "C:\\Windows\\System32",
            "control_chars": "\b\f\n\r\t",
            "unicode": "\u00A9 2023"
        }"#;

        let parsed = Jsonb::from_str(json).unwrap();
        let result = parsed.to_string().unwrap();

        assert!(result.contains(r#""escaped_quotes":"He said \"Hello\"""#));
        assert!(result.contains(r#""backslashes":"C:\\Windows\\System32""#));
        assert!(result.contains(r#""control_chars":"\b\f\n\r\t""#));
        assert!(result.contains(r#""unicode":"\u00A9 2023""#));
    }
}