Fix bug: left join null flag not being cleared

In left joins, even if the join condition is not matched, the system
must emit a row for every row of the outer table:

-- this must return t1.count() rows, with NULLs for all columns of t2
SELECT * FROM t1 LEFT JOIN t2 ON FALSE;

Our logic for clearing the null flag was to do it in Next/Prev. However,
this is problematic for a few reasons:

- If the inner table of the left join is using SeekRowid, then Next/Prev
  is never called on its cursor, so the null flag doesn't get cleared.
- If the inner table of the left join is using a non-covering index seek,
  i.e. it iterates its rows using an index, but seeks to the main table
  to fetch data, then Next/Prev is never called on the main table, and the
  main table's null flag doesn't get cleared.

What this results in is NULL values incorrectly being emitted for the
inner table after the first correct NULL row, since the null flag is
correctly set to true, but never cleared.

This PR fixes the issue by clearing the null flag whenever seek() is
invoked on the cursor. Hence, the null flag is now cleared on:

- next()
- prev()
- seek()
This commit is contained in:
Jussi Saurio 2025-04-19 13:47:17 +03:00
parent ac8ffa645d
commit 83c509a613
3 changed files with 34 additions and 14 deletions

View file

@ -3416,6 +3416,10 @@ impl BTreeCursor {
pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result<CursorResult<bool>> {
assert!(self.mv_cursor.is_none());
// We need to clear the null flag for the table cursor before seeking,
// because it might have been set to false by an unmatched left-join row during the previous iteration
// on the outer loop.
self.set_null_flag(false);
let rowid = return_if_io!(self.do_seek(key, op));
self.rowid.replace(rowid);
self.empty_record.replace(rowid.is_none());
@ -3789,10 +3793,15 @@ impl BTreeCursor {
}
}
/// In outer joins, whenever the right-side table has no matching row, the query must still return a row
/// for each left-side row. In order to achieve this, we set the null flag on the right-side table cursor
/// so that it returns NULL for all columns until cleared.
#[inline(always)]
pub fn set_null_flag(&mut self, flag: bool) {
self.null_flag = flag;
}
#[inline(always)]
pub fn get_null_flag(&self) -> bool {
self.null_flag
}

View file

@ -853,24 +853,26 @@ pub fn close_loop(
// If the left join match flag has been set to 1, we jump to the next row on the outer table,
// i.e. continue to the next row of t1 in our example.
program.resolve_label(lj_meta.label_match_flag_check_value, program.offset());
let jump_offset = program.offset().add(3u32);
let label_when_right_table_notnull = program.allocate_label();
program.emit_insn(Insn::IfPos {
reg: lj_meta.reg_match_flag,
target_pc: jump_offset,
target_pc: label_when_right_table_notnull,
decrement_by: 0,
});
// If the left join match flag is still 0, it means there was no match on the right table,
// but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table.
// In that case, we now enter the routine that does exactly that.
// First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL
let right_cursor_id = match &table.op {
Operation::Scan { .. } => program.resolve_cursor_id(&table.identifier),
Operation::Search { .. } => program.resolve_cursor_id(&table.identifier),
_ => unreachable!(),
};
program.emit_insn(Insn::NullRow {
cursor_id: right_cursor_id,
});
// First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL.
// This needs to be set for both the table and the index cursor, if present,
// since even if the iteration cursor is the index cursor, it might fetch values from the table cursor.
[table_cursor_id, index_cursor_id]
.iter()
.filter_map(|maybe_cursor_id| maybe_cursor_id.as_ref())
.for_each(|cursor_id| {
program.emit_insn(Insn::NullRow {
cursor_id: *cursor_id,
});
});
// Then we jump to setting the left join match flag to 1 again,
// but this time the right table cursor will set everything to null.
// This leads to emitting a row with cols from the left + nulls from the right,
@ -880,8 +882,7 @@ pub fn close_loop(
program.emit_insn(Insn::Goto {
target_pc: lj_meta.label_match_flag_set_true,
});
assert_eq!(program.offset(), jump_offset);
program.resolve_label(label_when_right_table_notnull, program.offset());
}
}
}

View file

@ -272,4 +272,14 @@ do_execsql_test natural-join-and-using-join {
select u.id, u2.id, p.id from users u natural join products p join users u2 using (first_name) limit 3;
} {"1|1|1
1|1204|1
1|1261|1"}
1|1261|1"}
# regression test for a backwards iteration left join case,
# where the null flag of the right table was not cleared after a previous unmatched row.
do_execsql_test left-join-backwards-iteration {
select users.id, users.first_name as user_name, products.name as product_name
from users left join products on users.id = products.id
where users.id < 13 order by users.id desc limit 3;
} {12|Alan|
11|Travis|accessories
10|Daniel|coat}