Always buffer files before lexing them

This commit is contained in:
Tad Hardesty 2020-05-02 15:30:41 -07:00
parent c6a8af20d2
commit 5e514214b0
11 changed files with 122 additions and 88 deletions

View file

@ -394,11 +394,12 @@ impl fmt::Display for ConstFn {
pub fn evaluate_str(location: Location, input: &[u8]) -> Result<Constant, DMError> {
use super::lexer::{Lexer, from_utf8_or_latin1_borrowed};
let mut bytes = input.iter().map(|&x| Ok(x));
let ctx = Context::default();
let expr = crate::parser::parse_expression(&ctx, location, Lexer::new(&ctx, location.file, &mut bytes))?;
if bytes.next().is_some() {
return Err(DMError::new(location, format!("leftover: {:?} {}", from_utf8_or_latin1_borrowed(&input), bytes.len())));
let mut lexer = Lexer::new(&ctx, location.file, input);
let expr = crate::parser::parse_expression(&ctx, location, &mut lexer)?;
let leftover = lexer.remaining();
if !leftover.is_empty() {
return Err(DMError::new(location, format!("leftover: {:?} {}", from_utf8_or_latin1_borrowed(&input), leftover.len())));
}
expr.simple_evaluate(location)
}

View file

@ -1,5 +1,5 @@
//! The lexer/tokenizer.
use std::io;
use std::io::Read;
use std::str::FromStr;
use std::fmt;
use std::borrow::Cow;
@ -469,25 +469,61 @@ enum Directive {
Stringy,
}
fn buffer_read<R: Read>(file: FileId, mut read: R) -> Result<Vec<u8>, DMError> {
let mut buffer = Vec::new();
if let Err(error) = read.read_to_end(&mut buffer) {
let mut tracker = LocationTracker::new(file, buffer.as_slice().into());
tracker.by_ref().count();
return Err(DMError::new(tracker.location(), "i/o error reading file").with_cause(error));
}
Ok(buffer)
}
/// Attempt to read an entire file into memory, returning a line and column if
/// an I/O error occurs.
pub fn buffer_file(file: FileId, path: &std::path::Path) -> Result<Vec<u8>, DMError> {
let mut buffer = match std::fs::metadata(path) {
Ok(metadata) => Vec::with_capacity(metadata.len() as usize),
Err(_) => Vec::new(),
};
let mut read = match std::fs::File::open(path) {
Ok(read) => read,
Err(error) => return Err(DMError::new(Location { file, line: 1, column: 1 }, "i/o error opening file").with_cause(error)),
};
if let Err(error) = read.read_to_end(&mut buffer) {
let mut tracker = LocationTracker::new(file, buffer.as_slice().into());
tracker.by_ref().count();
return Err(DMError::new(tracker.location(), "i/o error reading file").with_cause(error));
}
Ok(buffer)
}
/// A wrapper for an input stream which tracks line and column numbers.
///
/// All characters, including tabs, are considered to occupy one column
/// regardless of position.
///
/// `io::Error`s are converted to `DMError`s which include the location.
pub struct LocationTracker<I> {
inner: I,
pub struct LocationTracker<'a> {
inner: Cow<'a, [u8]>,
offset: usize,
/// The location of the last character returned by `next()`.
location: Location,
at_line_end: bool,
}
impl<I> LocationTracker<I> {
pub fn new(file_number: FileId, inner: I) -> LocationTracker<I> {
impl<'a> LocationTracker<'a> {
pub fn new(file: FileId, inner: Cow<'a, [u8]>) -> LocationTracker<'a> {
LocationTracker {
inner,
offset: 0,
location: Location {
file: file_number,
file,
line: 0,
column: 0,
},
@ -498,22 +534,27 @@ impl<I> LocationTracker<I> {
pub fn location(&self) -> Location {
self.location
}
pub fn remaining(&self) -> &[u8] {
&self.inner[self.offset..]
}
}
impl<I> fmt::Debug for LocationTracker<I> {
impl<'a> fmt::Debug for LocationTracker<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("LocationTracker")
// inner omitted
.field("offset", &self.offset)
.field("location", &self.location)
.field("at_line_end", &self.at_line_end)
.finish()
}
}
impl<I: Iterator<Item=io::Result<u8>>> Iterator for LocationTracker<I> {
type Item = Result<u8, DMError>;
impl<'a> Iterator for LocationTracker<'a> {
type Item = u8;
fn next(&mut self) -> Option<Result<u8, DMError>> {
fn next(&mut self) -> Option<u8> {
if self.at_line_end {
self.at_line_end = false;
match self.location.line.checked_add(1) {
@ -523,27 +564,27 @@ impl<I: Iterator<Item=io::Result<u8>>> Iterator for LocationTracker<I> {
self.location.column = 0;
}
match self.inner.next() {
None => None,
Some(Ok(ch)) => {
if ch == b'\n' {
self.at_line_end = true;
}
match self.location.column.checked_add(1) {
Some(new) => self.location.column = new,
None => panic!("per-line column limit of {} exceeded", self.location.column),
}
Some(Ok(ch))
}
Some(Err(e)) => Some(Err(DMError::new(self.location, "i/o error").with_cause(e))),
let ch = match self.inner.get(self.offset) {
Some(&ch) => ch,
None => return None,
};
self.offset += 1;
if ch == b'\n' {
self.at_line_end = true;
}
match self.location.column.checked_add(1) {
Some(new) => self.location.column = new,
None => panic!("per-line column limit of {} exceeded", self.location.column),
}
Some(ch)
}
}
/// The lexer, which serves as a source of tokens through iteration.
pub struct Lexer<'ctx, I> {
pub struct Lexer<'ctx> {
context: &'ctx Context,
input: LocationTracker<I>,
input: LocationTracker<'ctx>,
next: Option<u8>,
final_newline: bool,
at_line_head: bool,
@ -552,7 +593,7 @@ pub struct Lexer<'ctx, I> {
interp_stack: Vec<Interpolation>,
}
impl<'ctx, I> fmt::Debug for Lexer<'ctx, I> {
impl<'ctx> fmt::Debug for Lexer<'ctx> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("Lexer")
.field("context", self.context)
@ -566,26 +607,19 @@ impl<'ctx, I> fmt::Debug for Lexer<'ctx, I> {
}
}
impl<'ctx, I: Iterator<Item=io::Result<u8>>> HasLocation for Lexer<'ctx, I> {
impl<'ctx> HasLocation for Lexer<'ctx> {
#[inline]
fn location(&self) -> Location {
self.input.location
}
}
impl<'ctx, R: io::Read> Lexer<'ctx, io::Bytes<R>> {
/// Create a new lexer from a reader.
pub fn from_read(context: &'ctx Context, file_number: FileId, source: R) -> Lexer<io::Bytes<R>> {
Lexer::new(context, file_number, source.bytes())
}
}
impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
impl<'ctx> Lexer<'ctx> {
/// Create a new lexer from a byte stream.
pub fn new(context: &'ctx Context, file_number: FileId, input: I) -> Lexer<I> {
pub fn new<I: Into<Cow<'ctx, [u8]>>>(context: &'ctx Context, file_number: FileId, input: I) -> Self {
Lexer {
context,
input: LocationTracker::new(file_number, input),
input: LocationTracker::new(file_number, input.into()),
next: None,
final_newline: false,
at_line_head: true,
@ -595,6 +629,20 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
}
}
/// Create a new lexer from a reader.
pub fn from_read<R: Read>(context: &'ctx Context, file: FileId, read: R) -> Result<Self, DMError> {
Ok(Lexer::new(context, file, buffer_read(file, read)?))
}
/// Create a new lexer from a reader.
pub fn from_file(context: &'ctx Context, file: FileId, path: &std::path::Path) -> Result<Self, DMError> {
Ok(Lexer::new(context, file, buffer_file(file, path)?))
}
pub fn remaining(&self) -> &[u8] {
self.input.remaining()
}
fn next(&mut self) -> Option<u8> {
if let Some(next) = self.next.take() {
return Some(next);
@ -608,17 +656,12 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
}
match result {
None => None,
Some(Ok(ch)) => {
Some(ch) => {
if ch != b'\t' && ch != b' ' {
self.at_line_head = false;
}
Some(ch)
}
Some(Err(err)) => {
// I/O error is effectively EOF.
self.context.register_error(err);
None
}
}
}
@ -1049,7 +1092,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
}
}
impl<'ctx, I: Iterator<Item=io::Result<u8>>> Iterator for Lexer<'ctx, I> {
impl<'ctx> Iterator for Lexer<'ctx> {
type Item = LocatedToken;
fn next(&mut self) -> Option<LocatedToken> {

View file

@ -50,7 +50,7 @@ impl Context {
/// Will only return failure on an `io::Error`. Compilation failures will
/// return a best-effort parse. Call `print_all_errors` to pretty-print
/// errors to standard error.
pub fn parse_environment(&self, dme: &Path) -> io::Result<objtree::ObjectTree> {
pub fn parse_environment(&self, dme: &Path) -> Result<objtree::ObjectTree, DMError> {
Ok(parser::parse(self,
indents::IndentProcessor::new(self,
preprocessor::Preprocessor::new(self, dme.to_owned())?

View file

@ -246,7 +246,7 @@ enum Include<'ctx> {
File {
path: PathBuf,
file: FileId,
lexer: Lexer<'ctx, io::Bytes<Box<dyn io::Read>>>,
lexer: Lexer<'ctx>,
},
Expansion {
name: String,
@ -256,11 +256,20 @@ enum Include<'ctx> {
}
impl<'ctx> Include<'ctx> {
fn from_read(context: &'ctx Context, path: PathBuf, read: Box<dyn io::Read>) -> Include {
fn from_path(context: &'ctx Context, path: PathBuf) -> Result<Include<'ctx>, DMError> {
let idx = context.register_file(&path);
Ok(Include::File {
file: idx,
lexer: Lexer::from_file(context, idx, &path)?,
path,
})
}
fn from_buffer(context: &'ctx Context, path: PathBuf, buffer: Cow<'ctx, [u8]>) -> Include<'ctx> {
let idx = context.register_file(&path);
Include::File {
file: idx,
lexer: Lexer::from_read(context, idx, read),
lexer: Lexer::new(context, idx, buffer),
path,
}
}
@ -387,26 +396,12 @@ impl<'ctx> HasLocation for Preprocessor<'ctx> {
}
}
fn buffer_file(path: &Path) -> io::Result<Vec<u8>> {
use std::io::Read;
let mut buffer = if let Ok(metadata) = std::fs::metadata(path) {
Vec::with_capacity(metadata.len() as usize)
} else {
Vec::new()
};
let mut file = File::open(path)?;
file.read_to_end(&mut buffer)?;
Ok(buffer)
}
impl<'ctx> Preprocessor<'ctx> {
/// Create a new preprocessor from the given Context and environment file.
pub fn new(context: &'ctx Context, env_file: PathBuf) -> io::Result<Self> {
pub fn new(context: &'ctx Context, env_file: PathBuf) -> Result<Self, DMError> {
// Buffer the entire environment file. Large environments take a while
// to load and locking it for the whole time is somewhat inconvenient.
let buffer = buffer_file(&env_file)?;
let include = Include::from_read(context, env_file.clone(), Box::new(io::Cursor::new(buffer)));
let include = Include::from_path(context, env_file.clone())?;
Ok(Preprocessor {
context,
@ -436,7 +431,7 @@ impl<'ctx> Preprocessor<'ctx> {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
};
let include = Include::from_read(context, env_file.clone(), Box::new(io::Cursor::new(cow_u8)));
let include = Include::from_buffer(context, env_file.clone(), cow_u8);
Preprocessor {
context,
env_file,
@ -499,14 +494,14 @@ impl<'ctx> Preprocessor<'ctx> {
*/
/// Push a DM file to the top of this preprocessor's stack.
pub fn push_file<R: io::Read + 'static>(&mut self, path: PathBuf, read: R) -> FileId {
pub fn push_file<R: io::Read + 'static>(&mut self, path: PathBuf, read: R) -> Result<FileId, DMError> {
let idx = self.context.register_file(&path);
self.include_stack.stack.push(Include::File {
lexer: Lexer::from_read(self.context, idx, Box::new(read)),
lexer: Lexer::from_read(self.context, idx, read)?,
file: idx,
path,
});
idx
Ok(idx)
}
/// Enable source file annotations.
@ -642,7 +637,7 @@ impl<'ctx> Preprocessor<'ctx> {
Ok(Include::File {
path,
file: file_id,
lexer: Lexer::from_read(&self.context, file_id, Box::new(read)),
lexer: Lexer::from_read(&self.context, file_id, read)?,
})
}
}

View file

@ -22,7 +22,7 @@ fn annotation_basic() {
"##.trim();
let context = Default::default();
let lexer = Lexer::new(&context, Default::default(), code.bytes().map(Ok));
let lexer = Lexer::new(&context, Default::default(), code.as_bytes());
let indent = IndentProcessor::new(&context, lexer);
let mut annotations = AnnotationTree::default();
Parser::new(&context, indent).parse_annotations_only(&mut annotations);

View file

@ -6,7 +6,7 @@ use dm::ast::*;
fn parse_expr(f: &str) -> Expression {
let context = Default::default();
let lexer = Lexer::new(&context, Default::default(), f.bytes().map(Ok));
let lexer = Lexer::new(&context, Default::default(), f.as_bytes());
let result = parse_expression(&context, Default::default(), lexer).expect("failed to parse expression");
context.assert_success();
result

View file

@ -6,7 +6,7 @@ use dm::lexer::Punctuation::*;
fn lex(f: &str) -> Vec<Token> {
let context = Default::default();
let result = Lexer::new(&context, Default::default(), f.bytes().map(Ok))
let result = Lexer::new(&context, Default::default(), f.as_bytes())
.map(|t| t.token)
.collect();
context.assert_success();

View file

@ -18,7 +18,7 @@ fn simple_location_test() {
"##.trim();
let context = Default::default();
let located_tokens: Vec<_> = Lexer::new(&context, Default::default(), code.bytes().map(Ok)).collect();
let located_tokens: Vec<_> = Lexer::new(&context, Default::default(), code.as_bytes()).collect();
context.assert_success();
assert_eq!(located_tokens[0].location.line, 1);

View file

@ -471,7 +471,7 @@ impl<'a> Engine<'a> {
None => (FileId::default(), defines.branch_at_end(&self.context)),
};
let contents = self.docs.read(url).map_err(invalid_request)?;
let file_id = preprocessor.push_file(stripped.to_owned(), contents);
let file_id = preprocessor.push_file(stripped.to_owned(), contents).map_err(invalid_request)?;
preprocessor.enable_annotations();
let mut annotations = AnnotationTree::default();
{

View file

@ -7,7 +7,7 @@ use std::fmt;
use ndarray::{self, Array3, Axis};
use linked_hash_map::LinkedHashMap;
use dm::{DMError, Location};
use dm::DMError;
use dm::constants::Constant;
use crate::dmi::Dir;
@ -158,9 +158,7 @@ impl Map {
dictionary: Default::default(),
grid: Array3::default((1, 1, 1)),
};
read::parse_map(&mut map, File::open(path).map_err(|e| {
DMError::new(Location::default(), "i/o error").with_cause(e)
})?)?;
read::parse_map(&mut map, path)?;
Ok(map)
}

View file

@ -1,7 +1,5 @@
//! Map parser, supporting standard DMM or TGM-format files.
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{Read, BufReader};
use std::cmp::max;
use ndarray::Array3;
@ -16,8 +14,9 @@ fn take<T: Default>(t: &mut T) -> T {
std::mem::replace(t, T::default())
}
pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
let mut chars = LocationTracker::new(Default::default(), BufReader::new(f).bytes());
pub fn parse_map(map: &mut Map, path: &std::path::Path) -> Result<(), DMError> {
let file_id = Default::default();
let mut chars = LocationTracker::new(file_id, dm::lexer::buffer_file(file_id, path)?.into());
let mut in_comment_line = false;
let mut comment_trigger = false;
@ -39,7 +38,6 @@ pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
let mut skip_whitespace = false;
while let Some(ch) = chars.next() {
let ch = ch?;
if ch == b'\n' || ch == b'\r' {
in_comment_line = false;
comment_trigger = false;
@ -178,7 +176,6 @@ pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
let mut adjust_y = true;
while let Some(ch) = chars.next() {
let ch = ch?;
if in_coord_block {
if ch == b',' {
if reading_coord == Coord::X {