mirror of
https://github.com/SpaceManiac/SpacemanDMM.git
synced 2025-12-23 05:36:47 +00:00
Always buffer files before lexing them
This commit is contained in:
parent
c6a8af20d2
commit
5e514214b0
11 changed files with 122 additions and 88 deletions
|
|
@ -394,11 +394,12 @@ impl fmt::Display for ConstFn {
|
|||
pub fn evaluate_str(location: Location, input: &[u8]) -> Result<Constant, DMError> {
|
||||
use super::lexer::{Lexer, from_utf8_or_latin1_borrowed};
|
||||
|
||||
let mut bytes = input.iter().map(|&x| Ok(x));
|
||||
let ctx = Context::default();
|
||||
let expr = crate::parser::parse_expression(&ctx, location, Lexer::new(&ctx, location.file, &mut bytes))?;
|
||||
if bytes.next().is_some() {
|
||||
return Err(DMError::new(location, format!("leftover: {:?} {}", from_utf8_or_latin1_borrowed(&input), bytes.len())));
|
||||
let mut lexer = Lexer::new(&ctx, location.file, input);
|
||||
let expr = crate::parser::parse_expression(&ctx, location, &mut lexer)?;
|
||||
let leftover = lexer.remaining();
|
||||
if !leftover.is_empty() {
|
||||
return Err(DMError::new(location, format!("leftover: {:?} {}", from_utf8_or_latin1_borrowed(&input), leftover.len())));
|
||||
}
|
||||
expr.simple_evaluate(location)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
//! The lexer/tokenizer.
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::str::FromStr;
|
||||
use std::fmt;
|
||||
use std::borrow::Cow;
|
||||
|
|
@ -469,25 +469,61 @@ enum Directive {
|
|||
Stringy,
|
||||
}
|
||||
|
||||
fn buffer_read<R: Read>(file: FileId, mut read: R) -> Result<Vec<u8>, DMError> {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
if let Err(error) = read.read_to_end(&mut buffer) {
|
||||
let mut tracker = LocationTracker::new(file, buffer.as_slice().into());
|
||||
tracker.by_ref().count();
|
||||
return Err(DMError::new(tracker.location(), "i/o error reading file").with_cause(error));
|
||||
}
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
/// Attempt to read an entire file into memory, returning a line and column if
|
||||
/// an I/O error occurs.
|
||||
pub fn buffer_file(file: FileId, path: &std::path::Path) -> Result<Vec<u8>, DMError> {
|
||||
let mut buffer = match std::fs::metadata(path) {
|
||||
Ok(metadata) => Vec::with_capacity(metadata.len() as usize),
|
||||
Err(_) => Vec::new(),
|
||||
};
|
||||
|
||||
let mut read = match std::fs::File::open(path) {
|
||||
Ok(read) => read,
|
||||
Err(error) => return Err(DMError::new(Location { file, line: 1, column: 1 }, "i/o error opening file").with_cause(error)),
|
||||
};
|
||||
|
||||
if let Err(error) = read.read_to_end(&mut buffer) {
|
||||
let mut tracker = LocationTracker::new(file, buffer.as_slice().into());
|
||||
tracker.by_ref().count();
|
||||
return Err(DMError::new(tracker.location(), "i/o error reading file").with_cause(error));
|
||||
}
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
/// A wrapper for an input stream which tracks line and column numbers.
|
||||
///
|
||||
/// All characters, including tabs, are considered to occupy one column
|
||||
/// regardless of position.
|
||||
///
|
||||
/// `io::Error`s are converted to `DMError`s which include the location.
|
||||
pub struct LocationTracker<I> {
|
||||
inner: I,
|
||||
pub struct LocationTracker<'a> {
|
||||
inner: Cow<'a, [u8]>,
|
||||
offset: usize,
|
||||
/// The location of the last character returned by `next()`.
|
||||
location: Location,
|
||||
at_line_end: bool,
|
||||
}
|
||||
|
||||
impl<I> LocationTracker<I> {
|
||||
pub fn new(file_number: FileId, inner: I) -> LocationTracker<I> {
|
||||
impl<'a> LocationTracker<'a> {
|
||||
pub fn new(file: FileId, inner: Cow<'a, [u8]>) -> LocationTracker<'a> {
|
||||
LocationTracker {
|
||||
inner,
|
||||
offset: 0,
|
||||
location: Location {
|
||||
file: file_number,
|
||||
file,
|
||||
line: 0,
|
||||
column: 0,
|
||||
},
|
||||
|
|
@ -498,22 +534,27 @@ impl<I> LocationTracker<I> {
|
|||
pub fn location(&self) -> Location {
|
||||
self.location
|
||||
}
|
||||
|
||||
pub fn remaining(&self) -> &[u8] {
|
||||
&self.inner[self.offset..]
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> fmt::Debug for LocationTracker<I> {
|
||||
impl<'a> fmt::Debug for LocationTracker<'a> {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt.debug_struct("LocationTracker")
|
||||
// inner omitted
|
||||
.field("offset", &self.offset)
|
||||
.field("location", &self.location)
|
||||
.field("at_line_end", &self.at_line_end)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item=io::Result<u8>>> Iterator for LocationTracker<I> {
|
||||
type Item = Result<u8, DMError>;
|
||||
impl<'a> Iterator for LocationTracker<'a> {
|
||||
type Item = u8;
|
||||
|
||||
fn next(&mut self) -> Option<Result<u8, DMError>> {
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
if self.at_line_end {
|
||||
self.at_line_end = false;
|
||||
match self.location.line.checked_add(1) {
|
||||
|
|
@ -523,27 +564,27 @@ impl<I: Iterator<Item=io::Result<u8>>> Iterator for LocationTracker<I> {
|
|||
self.location.column = 0;
|
||||
}
|
||||
|
||||
match self.inner.next() {
|
||||
None => None,
|
||||
Some(Ok(ch)) => {
|
||||
if ch == b'\n' {
|
||||
self.at_line_end = true;
|
||||
}
|
||||
match self.location.column.checked_add(1) {
|
||||
Some(new) => self.location.column = new,
|
||||
None => panic!("per-line column limit of {} exceeded", self.location.column),
|
||||
}
|
||||
Some(Ok(ch))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(DMError::new(self.location, "i/o error").with_cause(e))),
|
||||
let ch = match self.inner.get(self.offset) {
|
||||
Some(&ch) => ch,
|
||||
None => return None,
|
||||
};
|
||||
self.offset += 1;
|
||||
|
||||
if ch == b'\n' {
|
||||
self.at_line_end = true;
|
||||
}
|
||||
match self.location.column.checked_add(1) {
|
||||
Some(new) => self.location.column = new,
|
||||
None => panic!("per-line column limit of {} exceeded", self.location.column),
|
||||
}
|
||||
Some(ch)
|
||||
}
|
||||
}
|
||||
|
||||
/// The lexer, which serves as a source of tokens through iteration.
|
||||
pub struct Lexer<'ctx, I> {
|
||||
pub struct Lexer<'ctx> {
|
||||
context: &'ctx Context,
|
||||
input: LocationTracker<I>,
|
||||
input: LocationTracker<'ctx>,
|
||||
next: Option<u8>,
|
||||
final_newline: bool,
|
||||
at_line_head: bool,
|
||||
|
|
@ -552,7 +593,7 @@ pub struct Lexer<'ctx, I> {
|
|||
interp_stack: Vec<Interpolation>,
|
||||
}
|
||||
|
||||
impl<'ctx, I> fmt::Debug for Lexer<'ctx, I> {
|
||||
impl<'ctx> fmt::Debug for Lexer<'ctx> {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt.debug_struct("Lexer")
|
||||
.field("context", self.context)
|
||||
|
|
@ -566,26 +607,19 @@ impl<'ctx, I> fmt::Debug for Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'ctx, I: Iterator<Item=io::Result<u8>>> HasLocation for Lexer<'ctx, I> {
|
||||
impl<'ctx> HasLocation for Lexer<'ctx> {
|
||||
#[inline]
|
||||
fn location(&self) -> Location {
|
||||
self.input.location
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, R: io::Read> Lexer<'ctx, io::Bytes<R>> {
|
||||
/// Create a new lexer from a reader.
|
||||
pub fn from_read(context: &'ctx Context, file_number: FileId, source: R) -> Lexer<io::Bytes<R>> {
|
||||
Lexer::new(context, file_number, source.bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
||||
impl<'ctx> Lexer<'ctx> {
|
||||
/// Create a new lexer from a byte stream.
|
||||
pub fn new(context: &'ctx Context, file_number: FileId, input: I) -> Lexer<I> {
|
||||
pub fn new<I: Into<Cow<'ctx, [u8]>>>(context: &'ctx Context, file_number: FileId, input: I) -> Self {
|
||||
Lexer {
|
||||
context,
|
||||
input: LocationTracker::new(file_number, input),
|
||||
input: LocationTracker::new(file_number, input.into()),
|
||||
next: None,
|
||||
final_newline: false,
|
||||
at_line_head: true,
|
||||
|
|
@ -595,6 +629,20 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Create a new lexer from a reader.
|
||||
pub fn from_read<R: Read>(context: &'ctx Context, file: FileId, read: R) -> Result<Self, DMError> {
|
||||
Ok(Lexer::new(context, file, buffer_read(file, read)?))
|
||||
}
|
||||
|
||||
/// Create a new lexer from a reader.
|
||||
pub fn from_file(context: &'ctx Context, file: FileId, path: &std::path::Path) -> Result<Self, DMError> {
|
||||
Ok(Lexer::new(context, file, buffer_file(file, path)?))
|
||||
}
|
||||
|
||||
pub fn remaining(&self) -> &[u8] {
|
||||
self.input.remaining()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
if let Some(next) = self.next.take() {
|
||||
return Some(next);
|
||||
|
|
@ -608,17 +656,12 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
match result {
|
||||
None => None,
|
||||
Some(Ok(ch)) => {
|
||||
Some(ch) => {
|
||||
if ch != b'\t' && ch != b' ' {
|
||||
self.at_line_head = false;
|
||||
}
|
||||
Some(ch)
|
||||
}
|
||||
Some(Err(err)) => {
|
||||
// I/O error is effectively EOF.
|
||||
self.context.register_error(err);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1049,7 +1092,7 @@ impl<'ctx, I: Iterator<Item=io::Result<u8>>> Lexer<'ctx, I> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'ctx, I: Iterator<Item=io::Result<u8>>> Iterator for Lexer<'ctx, I> {
|
||||
impl<'ctx> Iterator for Lexer<'ctx> {
|
||||
type Item = LocatedToken;
|
||||
|
||||
fn next(&mut self) -> Option<LocatedToken> {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ impl Context {
|
|||
/// Will only return failure on an `io::Error`. Compilation failures will
|
||||
/// return a best-effort parse. Call `print_all_errors` to pretty-print
|
||||
/// errors to standard error.
|
||||
pub fn parse_environment(&self, dme: &Path) -> io::Result<objtree::ObjectTree> {
|
||||
pub fn parse_environment(&self, dme: &Path) -> Result<objtree::ObjectTree, DMError> {
|
||||
Ok(parser::parse(self,
|
||||
indents::IndentProcessor::new(self,
|
||||
preprocessor::Preprocessor::new(self, dme.to_owned())?
|
||||
|
|
|
|||
|
|
@ -246,7 +246,7 @@ enum Include<'ctx> {
|
|||
File {
|
||||
path: PathBuf,
|
||||
file: FileId,
|
||||
lexer: Lexer<'ctx, io::Bytes<Box<dyn io::Read>>>,
|
||||
lexer: Lexer<'ctx>,
|
||||
},
|
||||
Expansion {
|
||||
name: String,
|
||||
|
|
@ -256,11 +256,20 @@ enum Include<'ctx> {
|
|||
}
|
||||
|
||||
impl<'ctx> Include<'ctx> {
|
||||
fn from_read(context: &'ctx Context, path: PathBuf, read: Box<dyn io::Read>) -> Include {
|
||||
fn from_path(context: &'ctx Context, path: PathBuf) -> Result<Include<'ctx>, DMError> {
|
||||
let idx = context.register_file(&path);
|
||||
Ok(Include::File {
|
||||
file: idx,
|
||||
lexer: Lexer::from_file(context, idx, &path)?,
|
||||
path,
|
||||
})
|
||||
}
|
||||
|
||||
fn from_buffer(context: &'ctx Context, path: PathBuf, buffer: Cow<'ctx, [u8]>) -> Include<'ctx> {
|
||||
let idx = context.register_file(&path);
|
||||
Include::File {
|
||||
file: idx,
|
||||
lexer: Lexer::from_read(context, idx, read),
|
||||
lexer: Lexer::new(context, idx, buffer),
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
|
@ -387,26 +396,12 @@ impl<'ctx> HasLocation for Preprocessor<'ctx> {
|
|||
}
|
||||
}
|
||||
|
||||
fn buffer_file(path: &Path) -> io::Result<Vec<u8>> {
|
||||
use std::io::Read;
|
||||
|
||||
let mut buffer = if let Ok(metadata) = std::fs::metadata(path) {
|
||||
Vec::with_capacity(metadata.len() as usize)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let mut file = File::open(path)?;
|
||||
file.read_to_end(&mut buffer)?;
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
impl<'ctx> Preprocessor<'ctx> {
|
||||
/// Create a new preprocessor from the given Context and environment file.
|
||||
pub fn new(context: &'ctx Context, env_file: PathBuf) -> io::Result<Self> {
|
||||
pub fn new(context: &'ctx Context, env_file: PathBuf) -> Result<Self, DMError> {
|
||||
// Buffer the entire environment file. Large environments take a while
|
||||
// to load and locking it for the whole time is somewhat inconvenient.
|
||||
let buffer = buffer_file(&env_file)?;
|
||||
let include = Include::from_read(context, env_file.clone(), Box::new(io::Cursor::new(buffer)));
|
||||
let include = Include::from_path(context, env_file.clone())?;
|
||||
|
||||
Ok(Preprocessor {
|
||||
context,
|
||||
|
|
@ -436,7 +431,7 @@ impl<'ctx> Preprocessor<'ctx> {
|
|||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
};
|
||||
let include = Include::from_read(context, env_file.clone(), Box::new(io::Cursor::new(cow_u8)));
|
||||
let include = Include::from_buffer(context, env_file.clone(), cow_u8);
|
||||
Preprocessor {
|
||||
context,
|
||||
env_file,
|
||||
|
|
@ -499,14 +494,14 @@ impl<'ctx> Preprocessor<'ctx> {
|
|||
*/
|
||||
|
||||
/// Push a DM file to the top of this preprocessor's stack.
|
||||
pub fn push_file<R: io::Read + 'static>(&mut self, path: PathBuf, read: R) -> FileId {
|
||||
pub fn push_file<R: io::Read + 'static>(&mut self, path: PathBuf, read: R) -> Result<FileId, DMError> {
|
||||
let idx = self.context.register_file(&path);
|
||||
self.include_stack.stack.push(Include::File {
|
||||
lexer: Lexer::from_read(self.context, idx, Box::new(read)),
|
||||
lexer: Lexer::from_read(self.context, idx, read)?,
|
||||
file: idx,
|
||||
path,
|
||||
});
|
||||
idx
|
||||
Ok(idx)
|
||||
}
|
||||
|
||||
/// Enable source file annotations.
|
||||
|
|
@ -642,7 +637,7 @@ impl<'ctx> Preprocessor<'ctx> {
|
|||
Ok(Include::File {
|
||||
path,
|
||||
file: file_id,
|
||||
lexer: Lexer::from_read(&self.context, file_id, Box::new(read)),
|
||||
lexer: Lexer::from_read(&self.context, file_id, read)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ fn annotation_basic() {
|
|||
"##.trim();
|
||||
|
||||
let context = Default::default();
|
||||
let lexer = Lexer::new(&context, Default::default(), code.bytes().map(Ok));
|
||||
let lexer = Lexer::new(&context, Default::default(), code.as_bytes());
|
||||
let indent = IndentProcessor::new(&context, lexer);
|
||||
let mut annotations = AnnotationTree::default();
|
||||
Parser::new(&context, indent).parse_annotations_only(&mut annotations);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use dm::ast::*;
|
|||
|
||||
fn parse_expr(f: &str) -> Expression {
|
||||
let context = Default::default();
|
||||
let lexer = Lexer::new(&context, Default::default(), f.bytes().map(Ok));
|
||||
let lexer = Lexer::new(&context, Default::default(), f.as_bytes());
|
||||
let result = parse_expression(&context, Default::default(), lexer).expect("failed to parse expression");
|
||||
context.assert_success();
|
||||
result
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use dm::lexer::Punctuation::*;
|
|||
|
||||
fn lex(f: &str) -> Vec<Token> {
|
||||
let context = Default::default();
|
||||
let result = Lexer::new(&context, Default::default(), f.bytes().map(Ok))
|
||||
let result = Lexer::new(&context, Default::default(), f.as_bytes())
|
||||
.map(|t| t.token)
|
||||
.collect();
|
||||
context.assert_success();
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ fn simple_location_test() {
|
|||
"##.trim();
|
||||
|
||||
let context = Default::default();
|
||||
let located_tokens: Vec<_> = Lexer::new(&context, Default::default(), code.bytes().map(Ok)).collect();
|
||||
let located_tokens: Vec<_> = Lexer::new(&context, Default::default(), code.as_bytes()).collect();
|
||||
context.assert_success();
|
||||
|
||||
assert_eq!(located_tokens[0].location.line, 1);
|
||||
|
|
|
|||
|
|
@ -471,7 +471,7 @@ impl<'a> Engine<'a> {
|
|||
None => (FileId::default(), defines.branch_at_end(&self.context)),
|
||||
};
|
||||
let contents = self.docs.read(url).map_err(invalid_request)?;
|
||||
let file_id = preprocessor.push_file(stripped.to_owned(), contents);
|
||||
let file_id = preprocessor.push_file(stripped.to_owned(), contents).map_err(invalid_request)?;
|
||||
preprocessor.enable_annotations();
|
||||
let mut annotations = AnnotationTree::default();
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use std::fmt;
|
|||
use ndarray::{self, Array3, Axis};
|
||||
use linked_hash_map::LinkedHashMap;
|
||||
|
||||
use dm::{DMError, Location};
|
||||
use dm::DMError;
|
||||
use dm::constants::Constant;
|
||||
use crate::dmi::Dir;
|
||||
|
||||
|
|
@ -158,9 +158,7 @@ impl Map {
|
|||
dictionary: Default::default(),
|
||||
grid: Array3::default((1, 1, 1)),
|
||||
};
|
||||
read::parse_map(&mut map, File::open(path).map_err(|e| {
|
||||
DMError::new(Location::default(), "i/o error").with_cause(e)
|
||||
})?)?;
|
||||
read::parse_map(&mut map, path)?;
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
//! Map parser, supporting standard DMM or TGM-format files.
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, BufReader};
|
||||
use std::cmp::max;
|
||||
|
||||
use ndarray::Array3;
|
||||
|
|
@ -16,8 +14,9 @@ fn take<T: Default>(t: &mut T) -> T {
|
|||
std::mem::replace(t, T::default())
|
||||
}
|
||||
|
||||
pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
|
||||
let mut chars = LocationTracker::new(Default::default(), BufReader::new(f).bytes());
|
||||
pub fn parse_map(map: &mut Map, path: &std::path::Path) -> Result<(), DMError> {
|
||||
let file_id = Default::default();
|
||||
let mut chars = LocationTracker::new(file_id, dm::lexer::buffer_file(file_id, path)?.into());
|
||||
|
||||
let mut in_comment_line = false;
|
||||
let mut comment_trigger = false;
|
||||
|
|
@ -39,7 +38,6 @@ pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
|
|||
let mut skip_whitespace = false;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
let ch = ch?;
|
||||
if ch == b'\n' || ch == b'\r' {
|
||||
in_comment_line = false;
|
||||
comment_trigger = false;
|
||||
|
|
@ -178,7 +176,6 @@ pub fn parse_map(map: &mut Map, f: File) -> Result<(), DMError> {
|
|||
let mut adjust_y = true;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
let ch = ch?;
|
||||
if in_coord_block {
|
||||
if ch == b',' {
|
||||
if reading_coord == Coord::X {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue