Merge 'core/btree: small refactoring + documentation tweaks' from Jussi Saurio

small follow up to https://github.com/tursodatabase/limbo/pull/539
contains:
- Variable renaming and comments to `btreecursor.insert_into_cell()`
- New utility methods `pagecontent.header_size()`,
`pagecontent.cell_pointer_array_size()`,
`pagecontent.unallocated_region_start()` and
`pagecontent.unallocated_region_size()`
- Refactor of `btreecursor.compute_free_space()` (plus comments and
variable renaming)
- Rename `pagecontent.cell_get_raw_pointer_region()` to
`pagecontent.cell_pointer_array_offset_and_size()` and remove its usage
in `btreecursor.defragment_page()`

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #543
This commit is contained in:
Pekka Enberg 2024-12-27 10:06:30 +02:00
commit 937779b8c0
2 changed files with 103 additions and 71 deletions

View file

@ -742,7 +742,8 @@ impl BTreeCursor {
/// i.e. whether we need to balance the btree after the insert.
fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) {
let free = self.compute_free_space(page, RefCell::borrow(&self.database_header));
let enough_space = payload.len() + 2 <= free as usize;
const CELL_POINTER_SIZE_BYTES: usize = 2;
let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize;
if !enough_space {
// add to overflow cell
page.overflow_cells.push(OverflowCell {
@ -753,27 +754,30 @@ impl BTreeCursor {
}
// TODO: insert into cell payload in internal page
let pc = self.allocate_cell_space(page, payload.len() as u16);
let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16);
let buf = page.as_ptr();
// copy data
buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload);
buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()]
.copy_from_slice(payload);
// memmove(pIns+2, pIns, 2*(pPage->nCell - i));
let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region();
let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx);
let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size();
let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx);
// move previous pointers forward and insert new pointer there
let n_cells_forward = 2 * (page.cell_count() - cell_idx);
if n_cells_forward > 0 {
// move existing pointers forward by CELL_POINTER_SIZE_BYTES...
let n_cells_forward = page.cell_count() - cell_idx;
let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward;
if n_bytes_forward > 0 {
buf.copy_within(
pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward,
pointer_area_pc_by_idx + 2,
cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward,
cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES,
);
}
page.write_u16(pointer_area_pc_by_idx - page.offset, pc);
// ...and insert new cell pointer at the current index
page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer);
// update first byte of content area
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc);
// update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data)
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer);
// update cell count
let new_n_cells = (page.cell_count() + 1) as u16;
@ -1228,7 +1232,7 @@ impl BTreeCursor {
if is_page_1 {
// Remove header from child and set offset to 0
let contents = child.get().contents.as_mut().unwrap();
let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region();
let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size();
// change cell pointers
for cell_idx in 0..contents.cell_count() {
let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset;
@ -1284,7 +1288,7 @@ impl BTreeCursor {
fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 {
let amount = amount as usize;
let (cell_offset, _) = page_ref.cell_get_raw_pointer_region();
let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size();
let gap = cell_offset + 2 * page_ref.cell_count();
let mut top = page_ref.cell_content_area() as usize;
@ -1326,10 +1330,7 @@ impl BTreeCursor {
// TODO: implement fast algorithm
let last_cell = usable_space - 4;
let first_cell = {
let (start, end) = cloned_page.cell_get_raw_pointer_region();
start + end
};
let first_cell = cloned_page.unallocated_region_start() as u64;
if cloned_page.cell_count() > 0 {
let page_type = page.page_type();
@ -1411,10 +1412,12 @@ impl BTreeCursor {
#[allow(unused_assignments)]
fn compute_free_space(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) -> u16 {
// TODO(pere): maybe free space is not calculated correctly with offset
let buf = page.as_ptr();
// Usable space, not the same as free space, simply means:
// space that is not reserved for extensions by sqlite. Usually reserved_space is 0.
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
let mut first_byte_in_cell_content = page.cell_content_area();
let mut cell_content_area_start = page.cell_content_area();
// A zero value for the cell content area pointer is interpreted as 65536.
// See https://www.sqlite.org/fileformat.html
// The max page size for a sqlite database is 64kiB i.e. 65536 bytes.
@ -1424,26 +1427,23 @@ impl BTreeCursor {
// 1. the page size is 64kiB
// 2. there are no cells on the page
// 3. there is no reserved space at the end of the page
if first_byte_in_cell_content == 0 {
first_byte_in_cell_content = u16::MAX;
if cell_content_area_start == 0 {
cell_content_area_start = u16::MAX;
}
let fragmented_free_bytes = page.num_frag_free_bytes();
let free_block_pointer = page.first_freeblock();
let ncell = page.cell_count();
// 8 + 4 == header end
let child_pointer_size = if page.is_leaf() { 0 } else { 4 };
let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16;
// The amount of free space is the sum of:
// 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space)
// 2. fragmented_free_bytes.
let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize;
// #1. the size of the unallocated region
// #2. fragments (isolated 1-3 byte chunks of free space within the cell content area)
// #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions)
let mut pc = free_block_pointer as usize;
if pc > 0 {
if pc < first_byte_in_cell_content as usize {
let mut free_space_bytes =
page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize;
// #3 is computed by iterating over the freeblocks linked list
let mut cur_freeblock_ptr = page.first_freeblock() as usize;
let page_buf = page.as_ptr();
if cur_freeblock_ptr > 0 {
if cur_freeblock_ptr < cell_content_area_start as usize {
// Freeblocks exist in the cell content area e.g. after deletions
// They should never exist in the unused area of the page.
todo!("corrupted page");
@ -1453,30 +1453,47 @@ impl BTreeCursor {
let mut size = 0;
loop {
// TODO: check corruption icellast
next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize;
size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize;
nfree += size;
if next <= pc + size + 3 {
next = u16::from_be_bytes(
page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2]
.try_into()
.unwrap(),
) as usize; // first 2 bytes in freeblock = next freeblock pointer
size = u16::from_be_bytes(
page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4]
.try_into()
.unwrap(),
) as usize; // next 2 bytes in freeblock = size of current freeblock
free_space_bytes += size;
// Freeblocks are in order from left to right on the page,
// so next pointer should > current pointer + its size, or 0 if no next block exists.
if next <= cur_freeblock_ptr + size + 3 {
break;
}
pc = next;
cur_freeblock_ptr = next;
}
if next > 0 {
todo!("corrupted page ascending order");
}
// Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list
assert!(
next == 0,
"corrupted page: freeblocks list not in ascending order"
);
if pc + size > usable_space {
todo!("corrupted page last freeblock extends last page end");
}
assert!(
cur_freeblock_ptr + size <= usable_space,
"corrupted page: last freeblock extends last page end"
);
}
assert!(
free_space_bytes <= usable_space,
"corrupted page: free space is greater than usable space"
);
// if( nFree>usableSize || nFree<iCellFirst ){
// return SQLITE_CORRUPT_PAGE(pPage);
// }
// don't count header and cell pointers?
nfree -= first_cell as usize;
nfree as u16
free_space_bytes as u16
}
/// Fill in the cell payload with the record.

View file

@ -485,10 +485,29 @@ impl PageContent {
self.read_u16(1)
}
/// The number of cells on the page.
pub fn cell_count(&self) -> usize {
self.read_u16(3) as usize
}
/// The size of the cell pointer array in bytes.
/// 2 bytes per cell pointer
pub fn cell_pointer_array_size(&self) -> usize {
const CELL_POINTER_SIZE_BYTES: usize = 2;
self.cell_count() * CELL_POINTER_SIZE_BYTES
}
/// The start of the unallocated region.
/// Effectively: the offset after the page header + the cell pointer array.
pub fn unallocated_region_start(&self) -> usize {
let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size();
cell_ptr_array_start + cell_ptr_array_size
}
pub fn unallocated_region_size(&self) -> usize {
self.cell_content_area() as usize - self.unallocated_region_start()
}
/// The start of the cell content area.
/// SQLite strives to place cells as far toward the end of the b-tree page as it can,
/// in order to leave space for future growth of the cell pointer array.
@ -497,6 +516,17 @@ impl PageContent {
self.read_u16(5)
}
/// The size of the page header in bytes.
/// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer)
pub fn header_size(&self) -> usize {
match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
}
}
/// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header.
/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
pub fn num_frag_free_bytes(&self) -> u8 {
@ -526,12 +556,7 @@ impl PageContent {
let ncells = self.cell_count();
// the page header is 12 bytes for interior pages, 8 bytes for leaf pages
// this is because the 4 last bytes in the interior page's header are used for the rightmost pointer.
let cell_pointer_array_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
let cell_pointer_array_start = self.header_size();
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_pointer_array_start + (idx * 2);
let cell_pointer = self.read_u16(cell_pointer) as usize;
@ -552,14 +577,9 @@ impl PageContent {
/// The cell pointers are arranged in key order with:
/// - left-most cell (the cell with the smallest key) first and
/// - the right-most cell (the cell with the largest key) last.
pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) {
let cell_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
(self.offset + cell_start, self.cell_count() * 2)
pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) {
let header_size = self.header_size();
(self.offset + header_size, self.cell_pointer_array_size())
}
/* Get region of a cell's payload */
@ -572,12 +592,7 @@ impl PageContent {
) -> (usize, usize) {
let buf = self.as_ptr();
let ncells = self.cell_count();
let cell_pointer_array_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
let cell_pointer_array_start = self.header_size();
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
let cell_pointer = self.read_u16(cell_pointer) as usize;