diff --git a/crates/lsh-gen/src/compiler.rs b/crates/lsh-gen/src/compiler.rs index 33cf733..bd35caf 100644 --- a/crates/lsh-gen/src/compiler.rs +++ b/crates/lsh-gen/src/compiler.rs @@ -96,24 +96,34 @@ impl<'a> Compiler<'a> { _ = write!(output, " N{node_cell:p}"); match node.instr { + IRI::Add { dst: Register::Zero, src: Register::Zero, imm: 0 } => { + output.push_str("[noop]"); + } + IRI::Add { dst: Register::HighlightKind, src: Register::Zero, imm } => { + _ = write!(output, "[\"hk = {:?}\"]", unsafe { + HighlightKind::from_usize(imm) + }); + } IRI::Add { dst, src, imm } => { - if dst == Register::Zero && src == Register::Zero && imm == 0 { - _ = write!(output, "[noop]"); - } else if dst == Register::HighlightKind && src == Register::Zero { - _ = write!(output, "[\"hk = {:?}\"]", unsafe { - HighlightKind::from_usize(imm) - }); - } else { - _ = write!(output, "[\"{} = ", dst.mnemonic()); - if src != Register::Zero { - _ = write!(output, "{} + ", src.mnemonic()); + _ = write!(output, "[\"{} = ", dst.mnemonic()); + match (src, imm) { + (Register::Zero, 0) => { + _ = write!(output, "0"); } - if imm == usize::MAX { - _ = write!(output, "max\"]"); - } else { - _ = write!(output, "{imm}\"]"); + (Register::Zero, usize::MAX) => { + _ = write!(output, "max"); + } + (Register::Zero, _) => { + _ = write!(output, "{imm}"); + } + (_, 0) => { + _ = write!(output, "{}", src.mnemonic()); + } + _ => { + _ = write!(output, "{} + {}", src.mnemonic(), imm); } } + output.push_str("\"]"); } IRI::If { condition, then } => { match condition { @@ -134,10 +144,10 @@ impl<'a> Compiler<'a> { _ = write!(output, "[\"Call {name}\"]"); } IRI::Return => { - _ = write!(output, "[return]"); + output.push_str("[return]"); } IRI::Flush => { - _ = write!(output, "[flush]"); + output.push_str("[flush]"); } IRI::Loop { dst } => { _ = write!(output, "[loop] --> N{dst:p}"); @@ -216,6 +226,7 @@ pub struct IR<'a> { pub type IRCell<'a> = &'a RefCell>; +// IRI = Immediate Representation Instruction #[derive(Debug)] pub enum IRI<'a> { Add { dst: Register, src: Register, imm: usize }, diff --git a/crates/lsh-gen/src/frontend.rs b/crates/lsh-gen/src/frontend.rs index 39174a3..00079e8 100644 --- a/crates/lsh-gen/src/frontend.rs +++ b/crates/lsh-gen/src/frontend.rs @@ -155,6 +155,7 @@ impl<'a, 'c, 'src> Parser<'a, 'c, 'src> { break; } + // Gobble the "else" token. self.advance(); // The else branch has a block? Connect it with the if. @@ -166,6 +167,7 @@ impl<'a, 'c, 'src> Parser<'a, 'c, 'src> { break; } + // Otherwise, we expect an "if" in the next iteration to form an "else if". else_branch = Some(re.dst_bad); } diff --git a/crates/lsh-gen/src/lib.rs b/crates/lsh-gen/src/lib.rs index a6adeb5..636a1d7 100644 --- a/crates/lsh-gen/src/lib.rs +++ b/crates/lsh-gen/src/lib.rs @@ -234,18 +234,21 @@ impl Instruction { pub fn encode(&self) -> u32 { match *self { Instruction::Add { dst, src, imm } => { - Self::cast_imm(imm) | (src as u32) << 8 | (dst as u32) << 4 | 0b0000 + Self::cast_imm(imm) + | Self::cast_bits(src as usize, 4, 8) + | Self::cast_bits(dst as usize, 4, 4) + | 0b0000 } Instruction::Call { dst } => Self::cast_imm(dst) | 0b0001, Instruction::Return => 0b0010, Instruction::JumpIfMatchCharset { idx, dst } => { - Self::cast_imm(dst) | (idx as u32) << 4 | 0b0011 + Self::cast_imm(dst) | Self::cast_bits(idx, 8, 4) | 0b0011 } Instruction::JumpIfMatchPrefix { idx, dst } => { - Self::cast_imm(dst) | (idx as u32) << 4 | 0b0100 + Self::cast_imm(dst) | Self::cast_bits(idx, 8, 4) | 0b0100 } Instruction::JumpIfMatchPrefixInsensitive { idx, dst } => { - Self::cast_imm(dst) | (idx as u32) << 4 | 0b0101 + Self::cast_imm(dst) | Self::cast_bits(idx, 8, 4) | 0b0101 } Instruction::FlushHighlight => 0b0110, Instruction::Loop { dst } => Self::cast_imm(dst) | 0b0111, @@ -290,6 +293,11 @@ impl Instruction { (imm << 12) as u32 } } + + fn cast_bits(val: usize, bits: usize, shift: usize) -> u32 { + assert!(val < (1 << bits)); + (val as u32) << shift + } } #[derive(Clone, PartialEq, Eq, Hash)] diff --git a/crates/lsh-gen/src/optimizer.rs b/crates/lsh-gen/src/optimizer.rs index 7b8402b..a8bc6d5 100644 --- a/crates/lsh-gen/src/optimizer.rs +++ b/crates/lsh-gen/src/optimizer.rs @@ -8,6 +8,7 @@ pub fn optimize<'a>(compiler: &mut Compiler<'a>) { optimize_noop(compiler); } +// Removes no-op instructions from the IR. fn optimize_noop<'a>(compiler: &mut Compiler<'a>) { let scratch = scratch_arena(None); let mut candidates = Vec::new_in(&*scratch); diff --git a/crates/lsh-gen/src/regex.rs b/crates/lsh-gen/src/regex.rs index 86f987a..f32747d 100644 --- a/crates/lsh-gen/src/regex.rs +++ b/crates/lsh-gen/src/regex.rs @@ -235,28 +235,24 @@ fn transform_concat<'a>(compiler: &mut Compiler<'a>, dst: IRCell<'a>, hirs: &[Hi str }); - let node = if let Some(str) = prefix_insensitive { + let dst = compiler.alloc_noop(); + let src = if let Some(str) = prefix_insensitive { let str = compiler.intern_string(&str); compiler.alloc_iri(IRI::If { condition: Condition::PrefixInsensitive(str), then: dst }) } else { transform(compiler, dst, hir) }; if first.is_none() { - first = Some(node); + first = Some(src); } if let Some(last) = &last { - let mut last = last.borrow_mut(); - match last.instr { - IRI::Add { .. } => { - last.next = Some(node); - } - IRI::If { ref mut then, .. } => { - *then = node; - } - _ => unreachable!(), - } + last.borrow_mut().set_next(src); } - last = Some(node); + last = Some(dst); + } + + if let Some(last) = &last { + last.borrow_mut().set_next(dst); } first.unwrap() @@ -268,7 +264,6 @@ fn transform_alt<'a>(compiler: &mut Compiler<'a>, dst: IRCell<'a>, hirs: &[Hir]) let mut last: Option> = None; for hir in hirs { - // TODO: needs to write into the else branch let node = transform(compiler, dst, hir); if first.is_none() { first = Some(node); diff --git a/lsh/diff.lsh b/lsh/diff.lsh index c4baad4..0f632be 100644 --- a/lsh/diff.lsh +++ b/lsh/diff.lsh @@ -1,10 +1,6 @@ pub fn diff() { loop { - if /diff.*/ { - yield BrightBlue; - } else if /---.*/ { - yield BrightBlue; - } else if /\+\+\+.*/ { + if /(?:diff|---|\+\+\+).*/ { yield BrightBlue; } else if /-.*/ { yield BrightRed;