use crate::parser::Progress::*; use crate::parser::{BadInputError, Progress}; use bumpalo::Bump; use roc_region::all::{Position, Region}; use std::fmt; /// A position in a source file. #[derive(Clone)] pub struct State<'a> { /// The raw input bytes from the file. /// Beware: bytes[0] always points the the current byte the parser is examining. bytes: &'a [u8], original_bytes: &'a [u8], /// Length of the original input in bytes input_len: usize, line_start: Position, /// Current position within the input (line/column) pub xyzlcol: JustColumn, /// Current indentation level, in columns /// (so no indent is col 1 - this saves an arithmetic operation.) pub indent_column: u16, } #[derive(Clone, Copy)] pub struct JustColumn { pub column: u16, } impl<'a> State<'a> { pub fn new(bytes: &'a [u8]) -> State<'a> { State { bytes, original_bytes: bytes, input_len: bytes.len(), line_start: Position::zero(), xyzlcol: JustColumn { column: 0 }, indent_column: 0, } } pub fn bytes(&self) -> &'a [u8] { self.bytes } pub fn column(&self) -> u16 { assert_eq!( self.xyzlcol.column as u32, self.pos().offset - self.line_start.offset, "between {:?} and {:?}", std::str::from_utf8(&self.original_bytes[..self.pos().offset as usize]).unwrap(), std::str::from_utf8(&self.original_bytes[self.pos().offset as usize..]).unwrap(), ); self.xyzlcol.column } #[must_use] pub fn advance(&self, offset: usize) -> State<'a> { let mut state = self.clone(); // debug_assert!(!state.bytes[..offset].iter().any(|b| *b == b'\n')); state.bytes = &state.bytes[offset..]; state } #[must_use] pub fn advance_newline(&self) -> State<'a> { let mut state = self.clone(); state.bytes = &state.bytes[1..]; state.line_start = state.pos(); state } /// Returns the current position pub const fn pos(&self) -> Position { Position::new((self.input_len - self.bytes.len()) as u32) } /// Returns whether the parser has reached the end of the input pub const fn has_reached_end(&self) -> bool { self.bytes.is_empty() } /// Use advance_spaces to advance with indenting. /// This assumes we are *not* advancing with spaces, or at least that /// any spaces on the line were preceded by non-spaces - which would mean /// they weren't eligible to indent anyway. pub fn advance_without_indenting_e( self, quantity: usize, to_error: TE, ) -> Result where TE: Fn(BadInputError, Position) -> E, { self.advance_without_indenting_ee(quantity, |p| to_error(BadInputError::LineTooLong, p)) } pub fn advance_without_indenting_ee( self, quantity: usize, to_error: TE, ) -> Result where TE: Fn(Position) -> E, { match (self.xyzlcol.column as usize).checked_add(quantity) { Some(column_usize) if column_usize <= u16::MAX as usize => { Ok(State { bytes: &self.bytes[quantity..], xyzlcol: JustColumn { column: column_usize as u16, }, // Once we hit a nonspace character, we are no longer indenting. ..self }) } _ => Err((NoProgress, to_error(self.pos()), self)), } } /// Returns a Region corresponding to the current state, but /// with the the end column advanced by the given amount. This is /// useful when parsing something "manually" (using input.chars()) /// and thus wanting a Region while not having access to loc(). pub fn len_region(&self, length: u16) -> Region { Region::new( self.pos(), self.pos().bump_column(length), ) } /// Return a failing ParseResult for the given FailReason pub fn fail( self, _arena: &'a Bump, progress: Progress, reason: X, ) -> Result<(Progress, T, Self), (Progress, X, Self)> { Err((progress, reason, self)) } } impl<'a> fmt::Debug for State<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "State {{")?; match std::str::from_utf8(self.bytes) { Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?, Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?, } write!( f, "\n\t(col): {},", self.xyzlcol.column )?; write!(f, "\n\tindent_column: {}", self.indent_column)?; write!(f, "\n}}") } } #[test] fn state_size() { // State should always be under 8 machine words, so it fits in a typical // cache line. let state_size = std::mem::size_of::(); let maximum = std::mem::size_of::() * 8; assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum); }