mirror of
https://github.com/tursodatabase/limbo.git
synced 2025-08-04 01:58:16 +00:00
Merge 'core/btree: small refactoring + documentation tweaks' from Jussi Saurio
small follow up to https://github.com/tursodatabase/limbo/pull/539 contains: - Variable renaming and comments to `btreecursor.insert_into_cell()` - New utility methods `pagecontent.header_size()`, `pagecontent.cell_pointer_array_size()`, `pagecontent.unallocated_region_start()` and `pagecontent.unallocated_region_size()` - Refactor of `btreecursor.compute_free_space()` (plus comments and variable renaming) - Rename `pagecontent.cell_get_raw_pointer_region()` to `pagecontent.cell_pointer_array_offset_and_size()` and remove its usage in `btreecursor.defragment_page()` Reviewed-by: Pere Diaz Bou <pere-altea@homail.com> Closes #543
This commit is contained in:
commit
937779b8c0
2 changed files with 103 additions and 71 deletions
|
@ -742,7 +742,8 @@ impl BTreeCursor {
|
|||
/// i.e. whether we need to balance the btree after the insert.
|
||||
fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) {
|
||||
let free = self.compute_free_space(page, RefCell::borrow(&self.database_header));
|
||||
let enough_space = payload.len() + 2 <= free as usize;
|
||||
const CELL_POINTER_SIZE_BYTES: usize = 2;
|
||||
let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize;
|
||||
if !enough_space {
|
||||
// add to overflow cell
|
||||
page.overflow_cells.push(OverflowCell {
|
||||
|
@ -753,27 +754,30 @@ impl BTreeCursor {
|
|||
}
|
||||
|
||||
// TODO: insert into cell payload in internal page
|
||||
let pc = self.allocate_cell_space(page, payload.len() as u16);
|
||||
let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16);
|
||||
let buf = page.as_ptr();
|
||||
|
||||
// copy data
|
||||
buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload);
|
||||
buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()]
|
||||
.copy_from_slice(payload);
|
||||
// memmove(pIns+2, pIns, 2*(pPage->nCell - i));
|
||||
let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region();
|
||||
let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx);
|
||||
let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size();
|
||||
let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx);
|
||||
|
||||
// move previous pointers forward and insert new pointer there
|
||||
let n_cells_forward = 2 * (page.cell_count() - cell_idx);
|
||||
if n_cells_forward > 0 {
|
||||
// move existing pointers forward by CELL_POINTER_SIZE_BYTES...
|
||||
let n_cells_forward = page.cell_count() - cell_idx;
|
||||
let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward;
|
||||
if n_bytes_forward > 0 {
|
||||
buf.copy_within(
|
||||
pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward,
|
||||
pointer_area_pc_by_idx + 2,
|
||||
cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward,
|
||||
cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES,
|
||||
);
|
||||
}
|
||||
page.write_u16(pointer_area_pc_by_idx - page.offset, pc);
|
||||
// ...and insert new cell pointer at the current index
|
||||
page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer);
|
||||
|
||||
// update first byte of content area
|
||||
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc);
|
||||
// update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data)
|
||||
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer);
|
||||
|
||||
// update cell count
|
||||
let new_n_cells = (page.cell_count() + 1) as u16;
|
||||
|
@ -1228,7 +1232,7 @@ impl BTreeCursor {
|
|||
if is_page_1 {
|
||||
// Remove header from child and set offset to 0
|
||||
let contents = child.get().contents.as_mut().unwrap();
|
||||
let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region();
|
||||
let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size();
|
||||
// change cell pointers
|
||||
for cell_idx in 0..contents.cell_count() {
|
||||
let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset;
|
||||
|
@ -1284,7 +1288,7 @@ impl BTreeCursor {
|
|||
fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 {
|
||||
let amount = amount as usize;
|
||||
|
||||
let (cell_offset, _) = page_ref.cell_get_raw_pointer_region();
|
||||
let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size();
|
||||
let gap = cell_offset + 2 * page_ref.cell_count();
|
||||
let mut top = page_ref.cell_content_area() as usize;
|
||||
|
||||
|
@ -1326,10 +1330,7 @@ impl BTreeCursor {
|
|||
// TODO: implement fast algorithm
|
||||
|
||||
let last_cell = usable_space - 4;
|
||||
let first_cell = {
|
||||
let (start, end) = cloned_page.cell_get_raw_pointer_region();
|
||||
start + end
|
||||
};
|
||||
let first_cell = cloned_page.unallocated_region_start() as u64;
|
||||
|
||||
if cloned_page.cell_count() > 0 {
|
||||
let page_type = page.page_type();
|
||||
|
@ -1411,10 +1412,12 @@ impl BTreeCursor {
|
|||
#[allow(unused_assignments)]
|
||||
fn compute_free_space(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) -> u16 {
|
||||
// TODO(pere): maybe free space is not calculated correctly with offset
|
||||
let buf = page.as_ptr();
|
||||
|
||||
// Usable space, not the same as free space, simply means:
|
||||
// space that is not reserved for extensions by sqlite. Usually reserved_space is 0.
|
||||
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
|
||||
let mut first_byte_in_cell_content = page.cell_content_area();
|
||||
|
||||
let mut cell_content_area_start = page.cell_content_area();
|
||||
// A zero value for the cell content area pointer is interpreted as 65536.
|
||||
// See https://www.sqlite.org/fileformat.html
|
||||
// The max page size for a sqlite database is 64kiB i.e. 65536 bytes.
|
||||
|
@ -1424,26 +1427,23 @@ impl BTreeCursor {
|
|||
// 1. the page size is 64kiB
|
||||
// 2. there are no cells on the page
|
||||
// 3. there is no reserved space at the end of the page
|
||||
if first_byte_in_cell_content == 0 {
|
||||
first_byte_in_cell_content = u16::MAX;
|
||||
if cell_content_area_start == 0 {
|
||||
cell_content_area_start = u16::MAX;
|
||||
}
|
||||
|
||||
let fragmented_free_bytes = page.num_frag_free_bytes();
|
||||
let free_block_pointer = page.first_freeblock();
|
||||
let ncell = page.cell_count();
|
||||
|
||||
// 8 + 4 == header end
|
||||
let child_pointer_size = if page.is_leaf() { 0 } else { 4 };
|
||||
let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16;
|
||||
|
||||
// The amount of free space is the sum of:
|
||||
// 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space)
|
||||
// 2. fragmented_free_bytes.
|
||||
let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize;
|
||||
// #1. the size of the unallocated region
|
||||
// #2. fragments (isolated 1-3 byte chunks of free space within the cell content area)
|
||||
// #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions)
|
||||
|
||||
let mut pc = free_block_pointer as usize;
|
||||
if pc > 0 {
|
||||
if pc < first_byte_in_cell_content as usize {
|
||||
let mut free_space_bytes =
|
||||
page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize;
|
||||
|
||||
// #3 is computed by iterating over the freeblocks linked list
|
||||
let mut cur_freeblock_ptr = page.first_freeblock() as usize;
|
||||
let page_buf = page.as_ptr();
|
||||
if cur_freeblock_ptr > 0 {
|
||||
if cur_freeblock_ptr < cell_content_area_start as usize {
|
||||
// Freeblocks exist in the cell content area e.g. after deletions
|
||||
// They should never exist in the unused area of the page.
|
||||
todo!("corrupted page");
|
||||
|
@ -1453,30 +1453,47 @@ impl BTreeCursor {
|
|||
let mut size = 0;
|
||||
loop {
|
||||
// TODO: check corruption icellast
|
||||
next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize;
|
||||
size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize;
|
||||
nfree += size;
|
||||
if next <= pc + size + 3 {
|
||||
next = u16::from_be_bytes(
|
||||
page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
) as usize; // first 2 bytes in freeblock = next freeblock pointer
|
||||
size = u16::from_be_bytes(
|
||||
page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
) as usize; // next 2 bytes in freeblock = size of current freeblock
|
||||
free_space_bytes += size;
|
||||
// Freeblocks are in order from left to right on the page,
|
||||
// so next pointer should > current pointer + its size, or 0 if no next block exists.
|
||||
if next <= cur_freeblock_ptr + size + 3 {
|
||||
break;
|
||||
}
|
||||
pc = next;
|
||||
cur_freeblock_ptr = next;
|
||||
}
|
||||
|
||||
if next > 0 {
|
||||
todo!("corrupted page ascending order");
|
||||
}
|
||||
// Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list
|
||||
assert!(
|
||||
next == 0,
|
||||
"corrupted page: freeblocks list not in ascending order"
|
||||
);
|
||||
|
||||
if pc + size > usable_space {
|
||||
todo!("corrupted page last freeblock extends last page end");
|
||||
}
|
||||
assert!(
|
||||
cur_freeblock_ptr + size <= usable_space,
|
||||
"corrupted page: last freeblock extends last page end"
|
||||
);
|
||||
}
|
||||
|
||||
assert!(
|
||||
free_space_bytes <= usable_space,
|
||||
"corrupted page: free space is greater than usable space"
|
||||
);
|
||||
|
||||
// if( nFree>usableSize || nFree<iCellFirst ){
|
||||
// return SQLITE_CORRUPT_PAGE(pPage);
|
||||
// }
|
||||
// don't count header and cell pointers?
|
||||
nfree -= first_cell as usize;
|
||||
nfree as u16
|
||||
|
||||
free_space_bytes as u16
|
||||
}
|
||||
|
||||
/// Fill in the cell payload with the record.
|
||||
|
|
|
@ -485,10 +485,29 @@ impl PageContent {
|
|||
self.read_u16(1)
|
||||
}
|
||||
|
||||
/// The number of cells on the page.
|
||||
pub fn cell_count(&self) -> usize {
|
||||
self.read_u16(3) as usize
|
||||
}
|
||||
|
||||
/// The size of the cell pointer array in bytes.
|
||||
/// 2 bytes per cell pointer
|
||||
pub fn cell_pointer_array_size(&self) -> usize {
|
||||
const CELL_POINTER_SIZE_BYTES: usize = 2;
|
||||
self.cell_count() * CELL_POINTER_SIZE_BYTES
|
||||
}
|
||||
|
||||
/// The start of the unallocated region.
|
||||
/// Effectively: the offset after the page header + the cell pointer array.
|
||||
pub fn unallocated_region_start(&self) -> usize {
|
||||
let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size();
|
||||
cell_ptr_array_start + cell_ptr_array_size
|
||||
}
|
||||
|
||||
pub fn unallocated_region_size(&self) -> usize {
|
||||
self.cell_content_area() as usize - self.unallocated_region_start()
|
||||
}
|
||||
|
||||
/// The start of the cell content area.
|
||||
/// SQLite strives to place cells as far toward the end of the b-tree page as it can,
|
||||
/// in order to leave space for future growth of the cell pointer array.
|
||||
|
@ -497,6 +516,17 @@ impl PageContent {
|
|||
self.read_u16(5)
|
||||
}
|
||||
|
||||
/// The size of the page header in bytes.
|
||||
/// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer)
|
||||
pub fn header_size(&self) -> usize {
|
||||
match self.page_type() {
|
||||
PageType::IndexInterior => 12,
|
||||
PageType::TableInterior => 12,
|
||||
PageType::IndexLeaf => 8,
|
||||
PageType::TableLeaf => 8,
|
||||
}
|
||||
}
|
||||
|
||||
/// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header.
|
||||
/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
|
||||
pub fn num_frag_free_bytes(&self) -> u8 {
|
||||
|
@ -526,12 +556,7 @@ impl PageContent {
|
|||
let ncells = self.cell_count();
|
||||
// the page header is 12 bytes for interior pages, 8 bytes for leaf pages
|
||||
// this is because the 4 last bytes in the interior page's header are used for the rightmost pointer.
|
||||
let cell_pointer_array_start = match self.page_type() {
|
||||
PageType::IndexInterior => 12,
|
||||
PageType::TableInterior => 12,
|
||||
PageType::IndexLeaf => 8,
|
||||
PageType::TableLeaf => 8,
|
||||
};
|
||||
let cell_pointer_array_start = self.header_size();
|
||||
assert!(idx < ncells, "cell_get: idx out of bounds");
|
||||
let cell_pointer = cell_pointer_array_start + (idx * 2);
|
||||
let cell_pointer = self.read_u16(cell_pointer) as usize;
|
||||
|
@ -552,14 +577,9 @@ impl PageContent {
|
|||
/// The cell pointers are arranged in key order with:
|
||||
/// - left-most cell (the cell with the smallest key) first and
|
||||
/// - the right-most cell (the cell with the largest key) last.
|
||||
pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) {
|
||||
let cell_start = match self.page_type() {
|
||||
PageType::IndexInterior => 12,
|
||||
PageType::TableInterior => 12,
|
||||
PageType::IndexLeaf => 8,
|
||||
PageType::TableLeaf => 8,
|
||||
};
|
||||
(self.offset + cell_start, self.cell_count() * 2)
|
||||
pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) {
|
||||
let header_size = self.header_size();
|
||||
(self.offset + header_size, self.cell_pointer_array_size())
|
||||
}
|
||||
|
||||
/* Get region of a cell's payload */
|
||||
|
@ -572,12 +592,7 @@ impl PageContent {
|
|||
) -> (usize, usize) {
|
||||
let buf = self.as_ptr();
|
||||
let ncells = self.cell_count();
|
||||
let cell_pointer_array_start = match self.page_type() {
|
||||
PageType::IndexInterior => 12,
|
||||
PageType::TableInterior => 12,
|
||||
PageType::IndexLeaf => 8,
|
||||
PageType::TableLeaf => 8,
|
||||
};
|
||||
let cell_pointer_array_start = self.header_size();
|
||||
assert!(idx < ncells, "cell_get: idx out of bounds");
|
||||
let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
|
||||
let cell_pointer = self.read_u16(cell_pointer) as usize;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue