mirror of
https://github.com/erg-lang/erg.git
synced 2025-09-29 12:24:45 +00:00
Organize crates
This commit is contained in:
parent
6ddef21fec
commit
f9d91aa38e
71 changed files with 6 additions and 14 deletions
18
compiler/erg_parser/.gitignore
vendored
Normal file
18
compiler/erg_parser/.gitignore
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
*.pyc
|
||||
/.vscode/
|
||||
/.VSCodeCounter/
|
||||
/.vs/
|
||||
/.DS_Store
|
||||
/*/.DS_Store
|
||||
/.idea/
|
||||
/timeit.dat
|
24
compiler/erg_parser/Cargo.toml
Normal file
24
compiler/erg_parser/Cargo.toml
Normal file
|
@ -0,0 +1,24 @@
|
|||
[package]
|
||||
name = "erg_parser"
|
||||
version = "0.1.1"
|
||||
description = "The Erg parser"
|
||||
authors = ["mtshiba <sbym1346@gmail.com>"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
edition = "2021"
|
||||
repository = "https://github.com/erg-lang/erg/tree/main/src/erg_compiler/erg_parser"
|
||||
documentation = "https://docs.rs/erg_parser"
|
||||
homepage = "https://erg-lang.github.io/"
|
||||
|
||||
[features]
|
||||
debug = [ "erg_common/debug" ]
|
||||
japanese = [ "erg_common/japanese" ]
|
||||
|
||||
[dependencies]
|
||||
erg_common = { version = "0.1.4", path = "../erg_common" }
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "ergp"
|
||||
path = "main.rs"
|
5
compiler/erg_parser/README.md
Normal file
5
compiler/erg_parser/README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Erg parser
|
||||
|
||||
## Why isn't this module but crate?
|
||||
|
||||
For maintainability. This crate has tests.
|
1856
compiler/erg_parser/ast.rs
Normal file
1856
compiler/erg_parser/ast.rs
Normal file
File diff suppressed because it is too large
Load diff
152
compiler/erg_parser/desugar.rs
Normal file
152
compiler/erg_parser/desugar.rs
Normal file
|
@ -0,0 +1,152 @@
|
|||
//! Desugaring syntax sugars.
|
||||
//!
|
||||
//! Syntax sugarをdesugarする
|
||||
//! e.g. Literal parameters, Multi assignment
|
||||
//! 型チェックなどによる検証は行わない
|
||||
#![allow(dead_code)]
|
||||
|
||||
use erg_common::{enum_unwrap, set};
|
||||
use erg_common::{Str};
|
||||
use erg_common::set::{Set};
|
||||
use erg_common::traits::{Stream, Locational};
|
||||
|
||||
use crate::token::{Token, TokenKind};
|
||||
use crate::ast::{
|
||||
Module, Expr, Lambda, Call, Def, Accessor,
|
||||
LambdaSignature, PosArg, Signature, SubrSignature, Args,
|
||||
ParamPattern, NonDefaultParamSignature, Params, VarName,
|
||||
TypeBoundSpecs, DefBody, Block, VarPattern
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Desugarer {
|
||||
desugared: Set<Str>,
|
||||
}
|
||||
|
||||
impl Desugarer {
|
||||
pub fn new() -> Desugarer { Self { desugared: Set::default() } }
|
||||
|
||||
pub fn desugar(&mut self, module: Module) -> Module {
|
||||
self.desugar_multiple_pattern_def(module)
|
||||
}
|
||||
|
||||
fn desugar_ubar_lambda(&self, _module: Module) -> Module {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// `fib 0 = 0; fib 1 = 1; fib n = fib(n-1) + fib(n-2)`
|
||||
/// -> `fib n = match n, (0 -> 0), (1 -> 1), n -> fib(n-1) + fib(n-2)`
|
||||
fn desugar_multiple_pattern_def(&self, mut module: Module) -> Module {
|
||||
let mut new = Module::with_capacity(module.len());
|
||||
while let Some(chunk) = module.lpop() {
|
||||
match chunk {
|
||||
Expr::Def(def) if def.is_subr() => {
|
||||
if let Some(Expr::Def(previous)) = new.last() {
|
||||
if previous.is_subr() && previous.sig.name_as_str() == def.sig.name_as_str() {
|
||||
let mut previous = enum_unwrap!(new.pop().unwrap(), Expr::Def);
|
||||
let name = def.sig.name().unwrap().clone();
|
||||
let op = Token::from_str(TokenKind::FuncArrow, "->");
|
||||
let (call, return_t_spec) = if previous.body.block.len() == 1
|
||||
&& previous.body.block.first().unwrap().is_match_call() {
|
||||
let mut call = enum_unwrap!(previous.body.block.remove(0), Expr::Call);
|
||||
let sig = enum_unwrap!(def.sig, Signature::Subr);
|
||||
let return_t_spec = sig.return_t_spec;
|
||||
let first_arg = sig.params.non_defaults.first().unwrap();
|
||||
// 最後の定義の引数名を関数全体の引数名にする
|
||||
if let Some(name) = first_arg.inspect() {
|
||||
call.args.remove_pos(0);
|
||||
let arg = PosArg::new(Expr::local(name, first_arg.ln_begin().unwrap(), first_arg.col_begin().unwrap()));
|
||||
call.args.insert_pos(0, arg);
|
||||
}
|
||||
let sig = LambdaSignature::new(sig.params, return_t_spec.clone(), sig.bounds);
|
||||
let new_branch = Lambda::new(sig, op, def.body.block, def.body.id);
|
||||
call.args.push_pos(PosArg::new(Expr::Lambda(new_branch)));
|
||||
(call, return_t_spec)
|
||||
} else {
|
||||
let sig = enum_unwrap!(previous.sig, Signature::Subr);
|
||||
let match_symbol = Expr::static_local("match");
|
||||
let sig = LambdaSignature::new(sig.params, sig.return_t_spec, sig.bounds);
|
||||
let first_branch = Lambda::new(sig, op.clone(), previous.body.block, previous.body.id);
|
||||
let sig = enum_unwrap!(def.sig, Signature::Subr);
|
||||
let return_t_spec = sig.return_t_spec;
|
||||
let sig = LambdaSignature::new(sig.params, return_t_spec.clone(), sig.bounds);
|
||||
let second_branch = Lambda::new(sig, op, def.body.block, def.body.id);
|
||||
let args = Args::new(vec![
|
||||
PosArg::new(Expr::dummy_local("_")), // dummy argument, will be removed in line 56
|
||||
PosArg::new(Expr::Lambda(first_branch)),
|
||||
PosArg::new(Expr::Lambda(second_branch))
|
||||
], vec![], None);
|
||||
let call = Call::new(match_symbol, args);
|
||||
(call, return_t_spec)
|
||||
};
|
||||
let param_name = enum_unwrap!(&call.args.pos_args().iter().next().unwrap().expr, Expr::Accessor:(Accessor::Local:(_))).inspect();
|
||||
// FIXME: multiple params
|
||||
let param = VarName::new(Token::new(
|
||||
TokenKind::Symbol,
|
||||
param_name,
|
||||
name.ln_begin().unwrap(),
|
||||
name.col_end().unwrap() + 1 // HACK: `(name) %x = ...`という形を想定
|
||||
));
|
||||
let param = NonDefaultParamSignature::new(ParamPattern::VarName(param), None);
|
||||
let params = Params::new(vec![param], vec![], None);
|
||||
let sig = Signature::Subr(SubrSignature::new(set!{}, name, params, return_t_spec, TypeBoundSpecs::empty()));
|
||||
let body = DefBody::new(def.body.op, Block::new(vec![Expr::Call(call)]), def.body.id);
|
||||
let def = Def::new(sig, body);
|
||||
new.push(Expr::Def(def));
|
||||
} else {
|
||||
new.push(Expr::Def(def));
|
||||
}
|
||||
} else {
|
||||
new.push(Expr::Def(def));
|
||||
}
|
||||
},
|
||||
other => { new.push(other); },
|
||||
}
|
||||
}
|
||||
new
|
||||
}
|
||||
|
||||
/// `f 0 = 1` -> `f _: {0} = 1`
|
||||
fn desugar_literal_pattern(&self, _mod: Module) -> Module {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// `[i, j] = [1, 2]` -> `i = 1; j = 2`
|
||||
/// `[i, j] = l` -> `i = l[0]; j = l[1]`
|
||||
/// `[i, [j, k]] = l` -> `i = l[0]; j = l[1][0]; k = l[1][1]`
|
||||
/// `(i, j) = t` -> `i = t.0; j = t.1`
|
||||
fn desugar_nest_vars_pattern(&self, mut module: Module) -> Module {
|
||||
let mut new = Module::with_capacity(module.len());
|
||||
while let Some(chunk) = module.lpop() {
|
||||
match chunk {
|
||||
Expr::Def(def) => {
|
||||
if let Signature::Var(v) = &def.sig {
|
||||
match &v.pat {
|
||||
VarPattern::Array(_a) => {}
|
||||
VarPattern::Record(_r) => {},
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
new.push(Expr::Def(def));
|
||||
},
|
||||
other => { new.push(other); },
|
||||
}
|
||||
}
|
||||
new
|
||||
}
|
||||
|
||||
/// `{i; j} = s` -> `i = s.i; j = s.j`
|
||||
fn desugar_record_pattern(&self, _mod: Module) -> Module {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// `F(I | I > 0)` -> `F(I: {I: Int | I > 0})`
|
||||
fn desugar_refinement_pattern(&self, _mod: Module) -> Module {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// `show! x: Show := print! x` -> `show! x: '1 | '1 <: Show := print! x`
|
||||
fn desugar_trait_parameter(&self, _mod: Module) -> Module {
|
||||
todo!()
|
||||
}
|
||||
}
|
104
compiler/erg_parser/error.rs
Normal file
104
compiler/erg_parser/error.rs
Normal file
|
@ -0,0 +1,104 @@
|
|||
//! defines `ParseError` and others.
|
||||
//!
|
||||
//! パーサーが出すエラーを定義
|
||||
use erg_common::{impl_stream_for_wrapper, switch_lang};
|
||||
use erg_common::Str;
|
||||
use erg_common::config::Input;
|
||||
use erg_common::error::{ErrorCore, ErrorDisplay, MultiErrorDisplay, Location, ErrorKind::*};
|
||||
use erg_common::traits::Stream;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexError(ErrorCore);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LexErrors(Vec<LexError>);
|
||||
|
||||
impl_stream_for_wrapper!(LexErrors, LexError);
|
||||
|
||||
impl LexError {
|
||||
pub const fn new(core: ErrorCore) -> Self { Self(core) }
|
||||
|
||||
pub fn compiler_bug(errno: usize, loc: Location, fn_name: &str, line: u32) -> Self {
|
||||
Self::new(ErrorCore::new(errno, CompilerSystemError, loc, switch_lang!(
|
||||
format!("this is a bug of the Erg compiler, please report it to https://github.com/mtshiba/erg\ncaused from: {fn_name}:{line}"),
|
||||
format!("これはErg compilerのバグです、開発者に報告して下さい (https://github.com/mtshiba/erg)\n{fn_name}:{line}より発生")
|
||||
), None))
|
||||
}
|
||||
|
||||
pub fn feature_error(errno: usize, loc: Location, name: &str) -> Self {
|
||||
Self::new(ErrorCore::new(errno, FeatureError, loc, switch_lang!(
|
||||
format!("this feature({name}) is not implemented yet"),
|
||||
format!("この機能({name})はまだ正式に提供されていません")
|
||||
), None))
|
||||
}
|
||||
|
||||
pub fn simple_syntax_error(errno: usize, loc: Location) -> Self {
|
||||
Self::new(ErrorCore::new(errno, SyntaxError, loc, switch_lang!("invalid syntax", "不正な構文です"), None))
|
||||
}
|
||||
|
||||
pub fn syntax_error<S: Into<Str>>(errno: usize, loc: Location, desc: S, hint: Option<Str>) -> Self {
|
||||
Self::new(ErrorCore::new(errno, SyntaxError, loc, desc, hint))
|
||||
}
|
||||
|
||||
pub fn syntax_warning<S: Into<Str>>(errno: usize,loc: Location, desc: S, hint: Option<Str>) -> Self {
|
||||
Self::new(ErrorCore::new(errno, SyntaxWarning, loc, desc, hint))
|
||||
}
|
||||
}
|
||||
|
||||
pub type LexResult<T> = Result<T, LexError>;
|
||||
|
||||
pub type ParseError = LexError;
|
||||
pub type ParseErrors = LexErrors;
|
||||
pub type ParseResult<T> = Result<T, ParseError>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DesugaringError {
|
||||
pub core: ErrorCore,
|
||||
}
|
||||
|
||||
impl DesugaringError {
|
||||
pub const fn new(core: ErrorCore) -> Self { Self{ core }}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DesugaringErrors(Vec<DesugaringError>);
|
||||
|
||||
impl_stream_for_wrapper!(DesugaringErrors, DesugaringError);
|
||||
|
||||
pub type DesugaringResult<T> = Result<T, DesugaringError>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParserRunnerError {
|
||||
pub core: ErrorCore,
|
||||
pub input: Input,
|
||||
}
|
||||
|
||||
impl ErrorDisplay for ParserRunnerError {
|
||||
fn core(&self) -> &ErrorCore { &self.core }
|
||||
fn input(&self) -> &Input { &self.input }
|
||||
fn caused_by(&self) -> &str { "" }
|
||||
fn ref_inner(&self) -> Option<&Box<Self>> { None }
|
||||
}
|
||||
|
||||
impl ParserRunnerError {
|
||||
pub const fn new(core: ErrorCore, input: Input) -> Self { Self{ core, input } }
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParserRunnerErrors(Vec<ParserRunnerError>);
|
||||
|
||||
impl_stream_for_wrapper!(ParserRunnerErrors, ParserRunnerError);
|
||||
|
||||
impl MultiErrorDisplay<ParserRunnerError> for ParserRunnerErrors {}
|
||||
|
||||
impl ParserRunnerErrors {
|
||||
pub fn convert(input: &Input, errs: ParseErrors) -> Self {
|
||||
Self(errs.into_iter().map(|err| ParserRunnerError::new(err.0, input.clone())).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub type ParserRunnerResult<T> = Result<T, ParserRunnerError>;
|
||||
|
||||
pub type LexerRunnerError = ParserRunnerError;
|
||||
pub type LexerRunnerErrors = ParserRunnerErrors;
|
||||
pub type LexerRunnerResult<T> = Result<T, LexerRunnerError>;
|
735
compiler/erg_parser/lex.rs
Normal file
735
compiler/erg_parser/lex.rs
Normal file
|
@ -0,0 +1,735 @@
|
|||
//! defines and implements `Lexer` (Tokenizer).
|
||||
use erg_common::cache::Cache;
|
||||
use erg_common::Str;
|
||||
use erg_common::{fn_name_full, switch_lang, debug_power_assert, normalize_newline};
|
||||
use erg_common::config::Input;
|
||||
use erg_common::config::ErgConfig;
|
||||
use erg_common::traits::{Locational, Runnable, Stream};
|
||||
|
||||
use crate::error::{LexerRunnerError, LexerRunnerErrors, LexError, LexErrors, LexResult};
|
||||
use crate::token::{Token, TokenCategory, TokenKind, TokenStream};
|
||||
use TokenKind::*;
|
||||
|
||||
/// Lexerは使い捨てなので、Runnerを用意
|
||||
pub struct LexerRunner {
|
||||
cfg: ErgConfig,
|
||||
}
|
||||
|
||||
impl Runnable for LexerRunner {
|
||||
type Err = LexerRunnerError;
|
||||
type Errs = LexerRunnerErrors;
|
||||
|
||||
#[inline]
|
||||
fn new(cfg: ErgConfig) -> Self { Self { cfg } }
|
||||
|
||||
#[inline]
|
||||
fn input(&self) -> &Input { &self.cfg.input }
|
||||
|
||||
#[inline]
|
||||
fn start_message(&self) -> String { "Erg lexer\n".to_string() }
|
||||
|
||||
#[inline]
|
||||
fn finish(&mut self) {}
|
||||
|
||||
#[inline]
|
||||
fn clear(&mut self) {}
|
||||
|
||||
fn eval(&mut self, src: Str) -> Result<String, LexerRunnerErrors> {
|
||||
let lexer = Lexer::from_str(src);
|
||||
if cfg!(feature = "debug") {
|
||||
let ts = lexer.lex().map_err(|errs| LexerRunnerErrors::convert(self.input(), errs))?;
|
||||
println!("{ts}");
|
||||
Ok(ts.to_string())
|
||||
} else {
|
||||
Ok(lexer.lex().map_err(|errs| LexerRunnerErrors::convert(self.input(), errs))?.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexes a source code and iterates tokens.
|
||||
///
|
||||
/// This can be used as an iterator or to generate a `TokenStream`.
|
||||
#[derive(Debug)]
|
||||
pub struct Lexer /*<'a>*/ {
|
||||
str_cache: Cache<str>,
|
||||
chars: Vec<char>,
|
||||
indent_stack: Vec<usize>,
|
||||
/// indicates the position in the entire source code
|
||||
cursor: usize,
|
||||
/// to determine the type of operators, etc.
|
||||
prev_token: Token,
|
||||
/// 0-origin, but Token.lineno will 1-origin
|
||||
lineno_token_starts: usize,
|
||||
/// 0-origin, indicates the column number in which the token appears
|
||||
col_token_starts: usize,
|
||||
}
|
||||
|
||||
impl Lexer /*<'a>*/ {
|
||||
pub fn new(input: Input) -> Self {
|
||||
let normed = normalize_newline(&input.read());
|
||||
Lexer {
|
||||
str_cache: Cache::new(),
|
||||
chars: normed.chars().collect::<Vec<char>>(),
|
||||
indent_stack: vec![],
|
||||
cursor: 0,
|
||||
prev_token: Token::new(TokenKind::BOF, "", 0, 0),
|
||||
lineno_token_starts: 0,
|
||||
col_token_starts: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_str(src: Str) -> Self {
|
||||
let escaped = normalize_newline(&src);
|
||||
Lexer {
|
||||
str_cache: Cache::new(),
|
||||
chars: escaped.chars().collect::<Vec<char>>(),
|
||||
indent_stack: vec![],
|
||||
cursor: 0,
|
||||
prev_token: Token::new(TokenKind::BOF, "", 0, 0),
|
||||
lineno_token_starts: 0,
|
||||
col_token_starts: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lex(self) -> Result<TokenStream, LexErrors> {
|
||||
let mut result = TokenStream::empty();
|
||||
let mut errs = LexErrors::empty();
|
||||
for i in self.into_iter() {
|
||||
match i {
|
||||
Ok(token) => { result.push(token) }
|
||||
Err(err) => { errs.push(err); }
|
||||
}
|
||||
}
|
||||
if errs.is_empty() { Ok(result) } else { Err(errs) }
|
||||
}
|
||||
|
||||
fn emit_token(&mut self, kind: TokenKind, cont: &str) -> Token {
|
||||
let cont = self.str_cache.get(cont);
|
||||
// cannot use String::len() for multi-byte characters
|
||||
let cont_len = cont.chars().count();
|
||||
let token = Token::new(kind, cont, self.lineno_token_starts + 1, self.col_token_starts);
|
||||
self.prev_token = token.clone();
|
||||
self.col_token_starts += cont_len;
|
||||
token
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn accept(&mut self, kind: TokenKind, cont: &str) -> Option<LexResult<Token>> {
|
||||
Some(Ok(self.emit_token(kind, cont)))
|
||||
}
|
||||
|
||||
fn deny_feature(&mut self, cont: &str, feat_name: &str) -> Option<LexResult<Token>> {
|
||||
let token = self.emit_token(Illegal, cont);
|
||||
Some(Err(LexError::feature_error(0, token.loc(), feat_name)))
|
||||
}
|
||||
|
||||
const fn is_valid_symbol_ch(c: char) -> bool {
|
||||
match c {
|
||||
'0'..='9' => true,
|
||||
// control characters
|
||||
'\0' | '\u{0009}'..='\u{001F}' => false,
|
||||
// white spaces
|
||||
' ' | '\u{00A0}' => false,
|
||||
'\u{007F}' | '\u{0085}' | '\u{05C1}' | '\u{05C2}' => false,
|
||||
'\u{0701}'..='\u{070d}' => false,
|
||||
'\u{07B2}'..='\u{07BF}' => false,
|
||||
'\u{1680}' | '\u{180E}' => false,
|
||||
'\u{2000}'..='\u{200F}' => false,
|
||||
'\u{2028}'..='\u{202F}' => false,
|
||||
'\u{205F}'..='\u{206F}' => false,
|
||||
'\u{3000}' | '\u{3164}' | '\u{FEFF}' => false,
|
||||
// operator characters + special markers
|
||||
'<' | '>' | '$' | '%' | '.' | ',' | ':' | ';' | '+' | '-' | '*' | '/' | '=' | '#'
|
||||
| '&' | '|' | '^' | '~' | '@' | '!' | '?' | '\\' => false,
|
||||
// enclosures
|
||||
'[' | ']' | '(' | ')' | '{' | '}' | '\"' | '\'' | '`' => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect `c` is a bidirectional overriding character.
|
||||
/// [CVE-2021-42574: homoglyph atack](https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html) countermeasures.
|
||||
pub fn is_bidi(c: char) -> bool {
|
||||
match c {
|
||||
'\u{200F}' | '\u{202B}' | '\u{202E}' | '\u{2067}' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_definable_operator(s: &str) -> bool {
|
||||
match s {
|
||||
"+" | "-" | "*" | "/" | "//" | "**" | "%" | ".." | "..=" | "~" | "&&" | "||" | "^^"
|
||||
| ">>" | "<<" | "==" | "!=" | ">" | "<" | ">=" | "<="
|
||||
| "dot" | "cross" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// +, -, * etc. may be pre/bin
|
||||
// and, or, is, isnot, in, notin, as, dot, cross may be bin/function
|
||||
const fn is_bin_position(&self) -> Option<bool> {
|
||||
match self.prev_token.category() {
|
||||
// unary: `[ +`, `= +`, `+ +`, `, +`, `:: +`
|
||||
TokenCategory::LEnclosure
|
||||
| TokenCategory::BinOp
|
||||
| TokenCategory::UnaryOp
|
||||
| TokenCategory::Separator
|
||||
| TokenCategory::SpecialBinOp
|
||||
| TokenCategory::DefOp
|
||||
| TokenCategory::LambdaOp => Some(false),
|
||||
// bin: `] +`, `1 +`, `true and[true]`
|
||||
TokenCategory::REnclosure | TokenCategory::Literal => Some(true),
|
||||
// bin: `fn +1`
|
||||
// NOTE: if semantic analysis shows `fn` is a function, should this be rewritten to be unary?
|
||||
TokenCategory::Symbol => Some(true),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_zero(s: &str) -> bool { s.replace("-0", "").replace("0", "").is_empty() }
|
||||
|
||||
/// emit_tokenで一気にcol_token_startsを移動させるのでここでは移動させない
|
||||
fn consume(&mut self) -> Option<char> {
|
||||
let now = self.cursor;
|
||||
self.cursor += 1;
|
||||
self.chars.get(now).map(|x| *x)
|
||||
}
|
||||
|
||||
fn peek_prev_ch(&self) -> Option<char> {
|
||||
if self.cursor == 0 {
|
||||
None
|
||||
} else {
|
||||
self.chars.get(self.cursor - 1).map(|x| *x)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_cur_ch(&self) -> Option<char> {
|
||||
self.chars.get(self.cursor).map(|x| *x)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_next_ch(&self) -> Option<char> {
|
||||
self.chars.get(self.cursor + 1).map(|x| *x)
|
||||
}
|
||||
|
||||
fn lex_comment(&mut self) -> LexResult<()> {
|
||||
// debug_power_assert!(self.consume(), ==, Some('#'));
|
||||
let mut s = "".to_string();
|
||||
while self.peek_cur_ch().map(|cur| cur != '\n').unwrap_or(false) {
|
||||
if Self::is_bidi(self.peek_cur_ch().unwrap()) {
|
||||
let comment = self.emit_token(Illegal, &s);
|
||||
return Err(LexError::syntax_error(0, comment.loc(), switch_lang!(
|
||||
"invalid unicode character (bi-directional override) in comments",
|
||||
"不正なユニコード文字(双方向オーバーライド)がコメント中に使用されています"
|
||||
), None))
|
||||
}
|
||||
s.push(self.consume().unwrap());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn lex_space_indent_dedent(&mut self) -> Option<LexResult<Token>> {
|
||||
let is_toplevel = self.cursor > 0
|
||||
&& !self.indent_stack.is_empty()
|
||||
&& self.peek_prev_ch() == Some('\n')
|
||||
&& self.peek_cur_ch() != Some(' ');
|
||||
if is_toplevel {
|
||||
let dedent = self.emit_token(Dedent, "");
|
||||
self.indent_stack.pop();
|
||||
self.col_token_starts = 0;
|
||||
return Some(Ok(dedent))
|
||||
}
|
||||
let mut spaces = "".to_string();
|
||||
while let Some(' ') = self.peek_cur_ch() {
|
||||
spaces.push(self.consume().unwrap());
|
||||
}
|
||||
// indent in the first line: error
|
||||
if !spaces.is_empty() && self.cursor == 0 {
|
||||
let space = self.emit_token(Illegal, &spaces);
|
||||
Some(Err(LexError::syntax_error(0, space.loc(), switch_lang!(
|
||||
"invalid indent",
|
||||
"インデントが不正です"
|
||||
), None)))
|
||||
} else if self.prev_token.is(Newline) {
|
||||
self.lex_indent_dedent(spaces)
|
||||
} else {
|
||||
self.col_token_starts += spaces.len();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// The semantic correctness of the use of indent/dedent will be analyzed with `Parser`
|
||||
fn lex_indent_dedent(&mut self, spaces: String) -> Option<LexResult<Token>> {
|
||||
// same as the CPython's limit
|
||||
if spaces.len() > 100 {
|
||||
let token = self.emit_token(Indent, &spaces);
|
||||
return Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
"indentation is too deep",
|
||||
"インデントが深すぎます"
|
||||
), Some(switch_lang!(
|
||||
"The code is too complicated. Please split the process.",
|
||||
"コードが複雑すぎます。処理を分割してください"
|
||||
).into()))))
|
||||
}
|
||||
// ignore indents if the current line is a comment
|
||||
if let Some('#') = self.peek_cur_ch() {
|
||||
if let Err(e) = self.lex_comment() { return Some(Err(e)) }
|
||||
}
|
||||
let mut is_valid_dedent = false;
|
||||
let calc_indent_and_validate = |sum: usize, x: &usize| {
|
||||
if sum + *x == spaces.len() { is_valid_dedent = true; }
|
||||
sum + *x
|
||||
};
|
||||
let sum_indent = self.indent_stack.iter().fold(0, calc_indent_and_validate);
|
||||
if sum_indent < spaces.len() {
|
||||
let indent_len = spaces.len() - sum_indent;
|
||||
self.col_token_starts += sum_indent;
|
||||
let indent = self.emit_token(Indent, &" ".repeat(indent_len));
|
||||
self.indent_stack.push(indent_len);
|
||||
Some(Ok(indent))
|
||||
} else if sum_indent > spaces.len() {
|
||||
if is_valid_dedent {
|
||||
let dedent = self.emit_token(Dedent, "");
|
||||
self.indent_stack.pop();
|
||||
Some(Ok(dedent))
|
||||
} else {
|
||||
let invalid_dedent = self.emit_token(Dedent, "");
|
||||
Some(Err(LexError::syntax_error(0, invalid_dedent.loc(), switch_lang!(
|
||||
"invalid indent",
|
||||
"インデントが不正です"
|
||||
), None)))
|
||||
}
|
||||
} else /* if indent_sum == space.len() */ {
|
||||
self.col_token_starts += spaces.len();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn lex_exponent(&mut self, mantissa: String) -> LexResult<Token> {
|
||||
let mut num = mantissa;
|
||||
debug_power_assert!(self.peek_cur_ch(), ==, Some('e'));
|
||||
num.push(self.consume().unwrap()); // e
|
||||
num.push(self.consume().unwrap()); // + | -
|
||||
while let Some(cur) = self.peek_cur_ch() {
|
||||
if cur.is_ascii_digit() || cur == '_' {
|
||||
num.push(self.consume().unwrap());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(self.emit_token(RatioLit, &num))
|
||||
}
|
||||
|
||||
/// `_` will be removed at compiletime
|
||||
fn lex_num(&mut self, first_ch: char) -> LexResult<Token> {
|
||||
let mut num = first_ch.to_string();
|
||||
while let Some(ch) = self.peek_cur_ch() {
|
||||
match ch {
|
||||
// `.` may be a dot operator, don't consume
|
||||
'.' => { return self.lex_num_dot(num) },
|
||||
n if n.is_ascii_digit() || n == '_' => {
|
||||
num.push(self.consume().unwrap());
|
||||
}
|
||||
c if Self::is_valid_symbol_ch(c) => {
|
||||
// exponent (e.g. 10e+3)
|
||||
if c == 'e'
|
||||
&& (self.peek_next_ch() == Some('+') || self.peek_next_ch() == Some('-')) {
|
||||
return self.lex_exponent(num)
|
||||
} else {
|
||||
// IntLit * Symbol(e.g. 3x + 1)
|
||||
let token = self.emit_token(Illegal, &(num + &c.to_string()));
|
||||
return Err(LexError::feature_error(0, token.loc(), "*-less multiply"))
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let kind = if num.starts_with('-') && !Self::is_zero(&num) { IntLit } else { NatLit };
|
||||
Ok(self.emit_token(kind, &num))
|
||||
}
|
||||
|
||||
/// number '.' ~~
|
||||
/// Possibility: RatioLit or Int/NatLit call
|
||||
fn lex_num_dot(&mut self, mut num: String) -> LexResult<Token> {
|
||||
match self.peek_next_ch() {
|
||||
// RatioLit
|
||||
Some(n) if n.is_ascii_digit() => {
|
||||
num.push(self.consume().unwrap());
|
||||
self.lex_ratio(num)
|
||||
}
|
||||
// method call of IntLit
|
||||
// or range operator (e.g. 1..)
|
||||
Some(c) if Self::is_valid_symbol_ch(c) || c == '.' => {
|
||||
let kind = if num.starts_with('-') && !Self::is_zero(&num) { IntLit } else { NatLit };
|
||||
Ok(self.emit_token(kind, &num))
|
||||
},
|
||||
Some('_') => {
|
||||
self.consume();
|
||||
let token = self.emit_token(Illegal, &(num + "_"));
|
||||
Err(LexError::simple_syntax_error(0, token.loc()))
|
||||
}
|
||||
// RatioLit without zero (e.g. 3.)
|
||||
_ => {
|
||||
num.push(self.consume().unwrap());
|
||||
self.lex_ratio(num)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// int_part_and_point must be like `12.`
|
||||
fn lex_ratio(&mut self, intpart_and_point: String) -> LexResult<Token> {
|
||||
let mut num = intpart_and_point;
|
||||
while let Some(cur) = self.peek_cur_ch() {
|
||||
if cur.is_ascii_digit() || cur == '_' {
|
||||
num.push(self.consume().unwrap());
|
||||
} else if cur == 'e' {
|
||||
return self.lex_exponent(num)
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(self.emit_token(RatioLit, &num))
|
||||
}
|
||||
|
||||
fn lex_symbol(&mut self, first_ch: char) -> LexResult<Token> {
|
||||
let mut cont = first_ch.to_string();
|
||||
while let Some(c) = self.peek_cur_ch() {
|
||||
if Self::is_valid_symbol_ch(c) {
|
||||
cont.push(self.consume().unwrap());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some('!') = self.peek_cur_ch() {
|
||||
cont.push(self.consume().unwrap());
|
||||
}
|
||||
if cont.is_empty() {
|
||||
let token = self.emit_token(Illegal, &self.peek_cur_ch().unwrap().to_string());
|
||||
return Err(LexError::compiler_bug(0, token.loc(), fn_name_full!(), line!()))
|
||||
}
|
||||
// dot: scalar product, cross: vector product
|
||||
// An alphabetical operator can also declare as a function, so checking is necessary
|
||||
// e.g. and(true, true, true) = true
|
||||
let kind = match &cont[..] {
|
||||
"and" => AndOp,
|
||||
"or" => OrOp,
|
||||
"in" => InOp,
|
||||
"notin" => NotInOp,
|
||||
"is" => IsOp,
|
||||
"isnot" => IsNotOp,
|
||||
"dot" => DotOp,
|
||||
"cross" => CrossOp,
|
||||
// これらはリテラルというより定数だが便宜的にリテラルということにしておく
|
||||
"True" | "False" => BoolLit,
|
||||
"None" => NoneLit,
|
||||
"NotImplemented" => NoImplLit,
|
||||
"Ellipsis" => EllipsisLit,
|
||||
"Inf" => InfLit,
|
||||
"_" => UBar,
|
||||
_ => Symbol,
|
||||
};
|
||||
Ok(self.emit_token(kind, &cont))
|
||||
}
|
||||
|
||||
fn lex_str(&mut self) -> LexResult<Token> {
|
||||
let mut s = "\"".to_string();
|
||||
while let Some(c) = self.peek_cur_ch() {
|
||||
if c == '\"' && s.chars().last() != Some('\\') {
|
||||
s.push(self.consume().unwrap());
|
||||
let token = self.emit_token(StrLit, &s);
|
||||
return Ok(token)
|
||||
} else {
|
||||
let c = self.consume().unwrap();
|
||||
s.push(c);
|
||||
if Self::is_bidi(c) {
|
||||
let token = self.emit_token(Illegal, &s);
|
||||
return Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
"invalid unicode character (bi-directional override) in string literal",
|
||||
"不正なユニコード文字(双方向オーバーライド)が文字列中に使用されています"
|
||||
), None))
|
||||
}
|
||||
}
|
||||
}
|
||||
let token = self.emit_token(Illegal, &s);
|
||||
Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
"the string is not closed by \"",
|
||||
"文字列が\"によって閉じられていません"
|
||||
), None))
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer /*<'a>*/ {
|
||||
type Item = LexResult<Token>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.prev_token.is(TokenKind::EOF) {
|
||||
return None
|
||||
}
|
||||
let indent_dedent = self.lex_space_indent_dedent();
|
||||
if indent_dedent.is_some() {
|
||||
return indent_dedent
|
||||
}
|
||||
if let Some('#') = self.peek_cur_ch() {
|
||||
if let Err(e) = self.lex_comment() { return Some(Err(e)) }
|
||||
}
|
||||
match self.consume() {
|
||||
Some('(') => self.accept(LParen, "("),
|
||||
Some(')') => self.accept(RParen, ")"),
|
||||
Some('[') => self.accept(LSqBr, "["),
|
||||
Some(']') => self.accept(RSqBr, "]"),
|
||||
Some('{') => self.accept(LBrace, "{"),
|
||||
Some('}') => self.accept(RBrace, "}"),
|
||||
Some('<') => match self.peek_cur_ch() {
|
||||
Some('.') => {
|
||||
self.consume();
|
||||
if let Some('.') = self.peek_cur_ch() {
|
||||
self.consume();
|
||||
match self.peek_cur_ch() {
|
||||
Some('<') => {
|
||||
self.consume();
|
||||
self.accept(Open, "<..<")
|
||||
}
|
||||
_ => self.accept(LeftOpen, "<.."),
|
||||
}
|
||||
} else {
|
||||
let token = self.emit_token(Illegal, "<.");
|
||||
Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
"no such operator: <.",
|
||||
"<.という演算子はありません"
|
||||
), None)))
|
||||
}
|
||||
}
|
||||
Some('=') => {
|
||||
self.consume();
|
||||
self.accept(LessEq, "<=")
|
||||
}
|
||||
Some('<') => {
|
||||
self.consume();
|
||||
self.accept(Shl, "<<")
|
||||
}
|
||||
_ => self.accept(Less, "<"),
|
||||
},
|
||||
Some('>') => match self.peek_cur_ch() {
|
||||
Some('=') => {
|
||||
self.consume();
|
||||
self.accept(GreEq, ">=")
|
||||
}
|
||||
Some('>') => {
|
||||
self.consume();
|
||||
self.accept(Shr, ">>")
|
||||
}
|
||||
_ => self.accept(Gre, ">"),
|
||||
},
|
||||
Some('.') => {
|
||||
match self.peek_cur_ch() {
|
||||
Some('.') => {
|
||||
self.consume();
|
||||
match self.peek_cur_ch() {
|
||||
Some('<') => {
|
||||
self.consume();
|
||||
self.accept(RightOpen, "..<")
|
||||
},
|
||||
Some('.') => {
|
||||
self.consume();
|
||||
self.accept(EllipsisLit, "...")
|
||||
},
|
||||
_ => {
|
||||
self.accept(Closed, "..")
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(c) if c.is_ascii_digit() => {
|
||||
Some(self.lex_ratio(".".into()))
|
||||
}
|
||||
_ => self.accept(Dot, ".")
|
||||
}
|
||||
}
|
||||
Some(',') => self.accept(Comma, ","),
|
||||
Some(':') => match self.peek_cur_ch() {
|
||||
Some(':') => {
|
||||
self.consume();
|
||||
self.accept(DblColon, "::")
|
||||
}
|
||||
Some('>') => {
|
||||
self.consume();
|
||||
self.accept(SupertypeOf, ":>")
|
||||
}
|
||||
_ => self.accept(Colon, ":"),
|
||||
},
|
||||
Some(';') => self.accept(Semi, ";"),
|
||||
Some('&') => {
|
||||
if let Some('&') = self.peek_cur_ch() {
|
||||
self.consume();
|
||||
self.accept(BitAnd, "&&")
|
||||
} else {
|
||||
// let kind = if self.is_bin_position().unwrap() { Amper } else { PreAmp };
|
||||
self.accept(Amper, "&")
|
||||
}
|
||||
}
|
||||
Some('|') => {
|
||||
match self.peek_cur_ch() {
|
||||
Some('|') => {
|
||||
self.consume();
|
||||
self.accept(BitOr, "||")
|
||||
}
|
||||
Some('=') => {
|
||||
self.consume();
|
||||
self.accept(OrEqual, "|=")
|
||||
}
|
||||
_ => {
|
||||
self.accept(VBar, "|")
|
||||
}
|
||||
}
|
||||
}
|
||||
Some('^') => {
|
||||
if let Some('^') = self.peek_cur_ch() {
|
||||
self.consume();
|
||||
self.accept(BitXor, "^^")
|
||||
} else {
|
||||
self.accept(Caret, "^")
|
||||
}
|
||||
}
|
||||
Some('~') => self.accept(PreBitNot, "~"),
|
||||
// TODO:
|
||||
Some('$') => self.deny_feature("$", "shared variables"),
|
||||
Some('@') => self.accept(AtSign, "@"),
|
||||
Some('=') => match self.peek_cur_ch() {
|
||||
Some('=') => {
|
||||
self.consume();
|
||||
self.accept(DblEq, "==")
|
||||
}
|
||||
Some('>') => {
|
||||
self.consume();
|
||||
self.accept(ProcArrow, "=>")
|
||||
}
|
||||
_ => self.accept(Equal, "="),
|
||||
},
|
||||
Some('!') => {
|
||||
if let Some('=') = self.peek_cur_ch() {
|
||||
self.consume();
|
||||
self.accept(NotEq, "!=")
|
||||
} else {
|
||||
self.accept(Mutate, "!")
|
||||
}
|
||||
}
|
||||
Some('?') => self.accept(Try, "?"),
|
||||
Some('+') => {
|
||||
let kind = if self.is_bin_position().unwrap() {
|
||||
Plus
|
||||
} else {
|
||||
PrePlus
|
||||
};
|
||||
self.accept(kind, "+")
|
||||
}
|
||||
Some('-') => match self.peek_cur_ch() {
|
||||
Some('>') => {
|
||||
self.consume();
|
||||
self.accept(FuncArrow, "->")
|
||||
}
|
||||
_ => {
|
||||
if self.is_bin_position().unwrap() {
|
||||
self.accept(Minus, "-")
|
||||
} else {
|
||||
// IntLit (negative number)
|
||||
if self.peek_cur_ch().map(|t| t.is_ascii_digit()).unwrap_or(false) {
|
||||
Some(self.lex_num('-'))
|
||||
} else {
|
||||
self.accept(Minus, "-")
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Some('*') => match self.peek_cur_ch() {
|
||||
Some('*') => {
|
||||
self.consume();
|
||||
self.accept(Pow, "**")
|
||||
}
|
||||
_ => {
|
||||
let kind = if self.is_bin_position().unwrap() {
|
||||
Star
|
||||
} else {
|
||||
PreStar
|
||||
};
|
||||
self.accept(kind, "*")
|
||||
}
|
||||
},
|
||||
Some('/') => match self.peek_cur_ch() {
|
||||
Some('/') => {
|
||||
self.consume();
|
||||
self.accept(FloorDiv, "//")
|
||||
}
|
||||
_ => self.accept(Slash, "/"),
|
||||
},
|
||||
Some('%') => self.accept(Mod, "%"),
|
||||
// Newline
|
||||
// 改行記号はLexer新規生成時に全て\nにreplaceしてある
|
||||
Some('\n') => {
|
||||
let token = self.emit_token(Newline, "\n");
|
||||
self.lineno_token_starts += 1;
|
||||
self.col_token_starts = 0;
|
||||
Some(Ok(token))
|
||||
}
|
||||
Some('\t') => {
|
||||
let token = self.emit_token(Illegal, "\t");
|
||||
Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
"cannot use a tab as a space",
|
||||
"タブ文字は使用できません"
|
||||
), Some(switch_lang!("use spaces", "スペースを使用してください").into()))))
|
||||
}
|
||||
// TODO:
|
||||
Some('\\') => self.deny_feature("\\", "ignoring line break"),
|
||||
// StrLit
|
||||
Some('\"') => Some(self.lex_str()),
|
||||
// TODO:
|
||||
Some('\'') => self.deny_feature("'", "raw identifier"),
|
||||
// Symbolized operators (シンボル化された演算子)
|
||||
// e.g. `-`(l, r) = l + (-r)
|
||||
Some('`') => {
|
||||
let mut op = "".to_string();
|
||||
while let Some(c) = self.consume() {
|
||||
if c == '`' {
|
||||
if Self::is_definable_operator(&op[..]) {
|
||||
return self.accept(Symbol, &op)
|
||||
} else {
|
||||
let token = self.emit_token(Illegal, &op);
|
||||
return Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
format!("`{}` cannot be defined by user", &token.content),
|
||||
format!("`{}`はユーザー定義できません", &token.content)
|
||||
), None)))
|
||||
}
|
||||
}
|
||||
op.push(c);
|
||||
}
|
||||
let token = self.emit_token(Illegal, &op);
|
||||
Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
format!("back quotes (`) not closed"),
|
||||
format!("バッククォート(`)が閉じられていません")
|
||||
), None)))
|
||||
}
|
||||
// IntLit or RatioLit
|
||||
Some(n) if n.is_ascii_digit() => Some(self.lex_num(n)),
|
||||
// Symbol (includes '_')
|
||||
Some(c) if Self::is_valid_symbol_ch(c) => Some(self.lex_symbol(c)),
|
||||
// Invalid character (e.g. space-like character)
|
||||
Some(invalid) => {
|
||||
let token = self.emit_token(Illegal, &invalid.to_string());
|
||||
Some(Err(LexError::syntax_error(0, token.loc(), switch_lang!(
|
||||
format!("invalid character: '{invalid}'"),
|
||||
format!("この文字は使用できません: '{invalid}'")
|
||||
), None)))
|
||||
}
|
||||
None => {
|
||||
if self.indent_stack.len() == 0 {
|
||||
self.accept(EOF, "")
|
||||
} else {
|
||||
self.indent_stack.pop();
|
||||
self.accept(Dedent, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
12
compiler/erg_parser/lib.rs
Normal file
12
compiler/erg_parser/lib.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
//! Implements `Parser` for Erg. `Parser` parses the source code to generate `AST`,
|
||||
//! and performs type checking and other optimizations if necessary.
|
||||
extern crate erg_common;
|
||||
|
||||
pub mod desugar;
|
||||
pub mod error;
|
||||
pub mod ast;
|
||||
pub mod lex;
|
||||
pub mod parse;
|
||||
pub mod token;
|
||||
|
||||
pub use parse::{Parser, ParserRunner};
|
22
compiler/erg_parser/main.rs
Normal file
22
compiler/erg_parser/main.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
extern crate erg_common;
|
||||
extern crate erg_parser;
|
||||
|
||||
use std::process;
|
||||
|
||||
use erg_common::config::ErgConfig;
|
||||
use erg_common::traits::Runnable;
|
||||
|
||||
use erg_parser::lex::LexerRunner;
|
||||
use erg_parser::ParserRunner;
|
||||
|
||||
fn main() {
|
||||
let cfg = ErgConfig::parse();
|
||||
match cfg.mode {
|
||||
"lex" => { LexerRunner::run(cfg); }
|
||||
"parse" | "exec" => { ParserRunner::run(cfg); }
|
||||
other => {
|
||||
println!("invalid mode: {other}");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
1375
compiler/erg_parser/parse.rs
Normal file
1375
compiler/erg_parser/parse.rs
Normal file
File diff suppressed because it is too large
Load diff
26
compiler/erg_parser/tests/ast_example.txt
Normal file
26
compiler/erg_parser/tests/ast_example.txt
Normal file
|
@ -0,0 +1,26 @@
|
|||
1 + 2 * 3
|
||||
|
||||
[1, 2, 3, *, +]
|
||||
|
||||
x =
|
||||
y = 1
|
||||
z = 2
|
||||
3
|
||||
|
||||
[x y, 1, =, ;, z, 2, =, ;, 3, =]
|
||||
|
||||
f
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
# 直前が=, :=, ->, =>, do!なら改行はセミコロンと解釈し、それ以外ならコンマと解釈する
|
||||
[1, 2, ',', 3, ',', f]
|
||||
|
||||
add! x, y =
|
||||
print! x, y
|
||||
print! x + y
|
||||
x + y
|
||||
|
||||
add!(x, y) = (print! x, y;print! x + y; x + y)
|
||||
[add(x,y), NewBlock, x, y, ',', print!, ;, x, y, +, print!, ;, x, y, +, BlockEnd, =
|
4
compiler/erg_parser/tests/dependent.er
Normal file
4
compiler/erg_parser/tests/dependent.er
Normal file
|
@ -0,0 +1,4 @@
|
|||
concat|T: Type, M, N: Nat|(l: [T; M], r: [T; N]): [T; M + N] = l + r
|
||||
|
||||
l: [Nat; 6] = concat [1, 2, 3], [4, 5, 6]
|
||||
assert l == [1, 2, 3, 4, 5, 6]
|
6
compiler/erg_parser/tests/fib.er
Normal file
6
compiler/erg_parser/tests/fib.er
Normal file
|
@ -0,0 +1,6 @@
|
|||
fib 0 = 0
|
||||
fib 1 = 1
|
||||
fib(n: 2..<Inf): Nat = fib(n-1) + fib(n-2)
|
||||
|
||||
print! fib 10
|
||||
assert fib(10) == 55
|
1
compiler/erg_parser/tests/hello_world.er
Normal file
1
compiler/erg_parser/tests/hello_world.er
Normal file
|
@ -0,0 +1 @@
|
|||
print! "Hello, world!"
|
53
compiler/erg_parser/tests/stack.er
Normal file
53
compiler/erg_parser/tests/stack.er
Normal file
|
@ -0,0 +1,53 @@
|
|||
a =
|
||||
b =
|
||||
c =
|
||||
d =
|
||||
e =
|
||||
f =
|
||||
g =
|
||||
h =
|
||||
i =
|
||||
j =
|
||||
k =
|
||||
l =
|
||||
m =
|
||||
n =
|
||||
o =
|
||||
p =
|
||||
q =
|
||||
r =
|
||||
s =
|
||||
t =
|
||||
u =
|
||||
v =
|
||||
w =
|
||||
x =
|
||||
y =
|
||||
z =
|
||||
aa =
|
||||
ab =
|
||||
ac =
|
||||
ad =
|
||||
ae =
|
||||
af =
|
||||
ag =
|
||||
ah =
|
||||
ai =
|
||||
aj =
|
||||
ak =
|
||||
al =
|
||||
am =
|
||||
an =
|
||||
ao =
|
||||
ap =
|
||||
aq =
|
||||
ar =
|
||||
as =
|
||||
at =
|
||||
au =
|
||||
av =
|
||||
aw =
|
||||
ax =
|
||||
ay =
|
||||
az =
|
||||
None
|
146
compiler/erg_parser/tests/test.rs
Normal file
146
compiler/erg_parser/tests/test.rs
Normal file
|
@ -0,0 +1,146 @@
|
|||
extern crate common;
|
||||
extern crate parser;
|
||||
|
||||
mod tests {
|
||||
use std::iter::Iterator;
|
||||
|
||||
use erg_common::config::{ErgConfig, Input};
|
||||
use erg_common::error::MultiErrorFmt;
|
||||
use erg_common::traits::Runnable;
|
||||
|
||||
// use erg_compiler::parser;
|
||||
|
||||
use erg_parser::error::*;
|
||||
use erg_parser::lex::Lexer;
|
||||
use erg_parser::token::*;
|
||||
use erg_parser::ParserRunner;
|
||||
use TokenKind::*;
|
||||
|
||||
const FILE1: &str = "src/compiler/parser/tests/test1_basic_syntax.er";
|
||||
|
||||
#[test]
|
||||
fn test_lexer() -> ParseResult<()> {
|
||||
let mut cfg = ErgConfig::default();
|
||||
cfg.set_input(Input::File(FILE1));
|
||||
let mut lexer = Lexer::new(cfg);
|
||||
let newline = "\n";
|
||||
let /*mut*/ token_array = vec![
|
||||
(Symbol, "_a"),
|
||||
(Equal, "="),
|
||||
(IntLit, "1234"),
|
||||
(Plus, "+"),
|
||||
(RatioLit, "1113.0"),
|
||||
(Plus, "+"),
|
||||
(RatioLit, "0.30102"),
|
||||
// (Symbol, "a"),
|
||||
(Newline, newline),
|
||||
(Symbol, "a"),
|
||||
(Comma, ","),
|
||||
(UBar, "_"),
|
||||
(Comma, ","),
|
||||
(Spread, "..."),
|
||||
(Symbol, "b"),
|
||||
(Let, "="),
|
||||
(Symbol, "five_elem_tuple"),
|
||||
(Newline, newline),
|
||||
(Symbol, "if!"),
|
||||
(Symbol, "True"),
|
||||
(Comma, ","),
|
||||
(Symbol, "do!"),
|
||||
(Newline, newline),
|
||||
(Indent, " "),
|
||||
(Symbol, "print!"),
|
||||
// (LParen, "("),
|
||||
(StrLit, "\\\\hello, world\\\""),
|
||||
// (RParen, ")"),
|
||||
(Newline, newline),
|
||||
(IntLit, "10"),
|
||||
(Dot, "."),
|
||||
(Symbol, "times!"),
|
||||
// (LParen, "("),
|
||||
// (RParen, ")"),
|
||||
(Symbol, "do!"),
|
||||
(Newline, newline),
|
||||
(Indent, " "),
|
||||
(Symbol, "if!"),
|
||||
(Symbol, "True"),
|
||||
(Comma, ","),
|
||||
(Symbol, "do!"),
|
||||
(Newline, newline),
|
||||
(Indent, " "),
|
||||
(Symbol, "print!"),
|
||||
(StrLit, ""),
|
||||
(Newline, newline),
|
||||
// (Comment, " illegal indent"),
|
||||
// (Illegal, "DEDENT"),
|
||||
// (Symbol, "do_nothing"),
|
||||
(Dedent, ""),
|
||||
(Newline, newline),
|
||||
(Newline, newline),
|
||||
(Symbol, "Hello"),
|
||||
(Let, "="),
|
||||
(Symbol, "S2c"),
|
||||
// (LParen, "("),
|
||||
(StrLit, "hello"),
|
||||
// (RParen, ")"),
|
||||
(Newline, newline),
|
||||
(Dedent, ""),
|
||||
(Dedent, ""),
|
||||
(Symbol, "aあ아"),
|
||||
(Let, "="),
|
||||
(Newline, newline),
|
||||
(Indent, " "),
|
||||
(Newline, newline),
|
||||
(StrLit, "aaa"),
|
||||
(Newline, newline),
|
||||
(Dedent, ""),
|
||||
(Symbol, "x"),
|
||||
(Semi, ";"),
|
||||
(Symbol, "x"),
|
||||
(Semi, ";"),
|
||||
(Semi, ";"),
|
||||
(Symbol, "x"),
|
||||
(Semi, ";"),
|
||||
(Newline, newline),
|
||||
(IntLit, "10"),
|
||||
(Range, ".."),
|
||||
(Symbol, "twelve"),
|
||||
(Semi, ";"),
|
||||
(Newline, newline),
|
||||
(EOF, "EOF"),
|
||||
];
|
||||
|
||||
let mut tok: Token;
|
||||
for i in token_array.into_iter() {
|
||||
tok = lexer.next().unwrap().unwrap();
|
||||
assert_eq!(tok, Token::without_loc(i.0, i.1));
|
||||
println!("{tok}");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tesop_te_prec() {
|
||||
assert_eq!(Mod.precedence(), Some(160));
|
||||
assert_eq!(LParen.precedence(), Some(0));
|
||||
assert_eq!(Illegal.precedence(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser1() -> Result<(), ParseErrors> {
|
||||
let mut cfg = ErgConfig::default();
|
||||
cfg.input = Input::File(FILE1);
|
||||
let lexer = Lexer::new(cfg);
|
||||
let mut parser = ParserRunner::new(&cfg);
|
||||
match parser.parse(lexer.lex()?) {
|
||||
Ok(module) => {
|
||||
println!("{module}");
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
e.fmt_all_stderr();
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
20
compiler/erg_parser/tests/test1_basic_syntax.er
Normal file
20
compiler/erg_parser/tests/test1_basic_syntax.er
Normal file
|
@ -0,0 +1,20 @@
|
|||
# 基本的な構文をパーサーがパスできるかチェックする
|
||||
# Check that a parser can pass the basic syntax
|
||||
|
||||
_a = 1_234 + 1113.* 3_000.2e-4 ** 0003 * .4
|
||||
a, _, ...b = five_elem_tuple
|
||||
f x, y =
|
||||
x + y
|
||||
if! True, do!
|
||||
print! "\\hello, world\""
|
||||
10.times! do!
|
||||
if! x.y.z, do!
|
||||
print! ""
|
||||
# illegal indent
|
||||
# do_nothing!
|
||||
Hello = S2c "hello"
|
||||
aあ아 =
|
||||
# コメント
|
||||
"aaa"
|
||||
x; x;; x;
|
||||
10..twelve;
|
28
compiler/erg_parser/tests/test2_advanced_syntax.er
Normal file
28
compiler/erg_parser/tests/test2_advanced_syntax.er
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Check that a parser can pass the advanced syntax
|
||||
# 高度な文法をチェックする
|
||||
|
||||
# overloading (多重定義)
|
||||
f x = 1 + x + 2
|
||||
f x, y =
|
||||
1 + x + y
|
||||
f x, y, z =
|
||||
1 + x + y + z
|
||||
assert 4 == f 1
|
||||
assert 4 == f 1, 1
|
||||
assert 3 == f 1, 1, 1
|
||||
|
||||
# pattern overloading
|
||||
fib 0 = 0
|
||||
fib 1 = 1
|
||||
fib(n: Nat) -> Nat = fib(n-1) + fib(n-2)
|
||||
|
||||
# keyword arguments (キーワード引数)
|
||||
t = if True:
|
||||
then: 1
|
||||
else: 2
|
||||
assert t == 1
|
||||
|
||||
# import
|
||||
math = import "math"
|
||||
# {*} = "math" # use all
|
||||
{pi} = import "math"
|
418
compiler/erg_parser/token.rs
Normal file
418
compiler/erg_parser/token.rs
Normal file
|
@ -0,0 +1,418 @@
|
|||
//! defines `Token` (The minimum unit in the Erg source code that serves as input to the parser).
|
||||
//!
|
||||
//! Token(パーサーへの入力となる、Ergソースコードにおける最小単位)を定義する
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use erg_common::str::Str;
|
||||
use erg_common::error::Location;
|
||||
use erg_common::impl_displayable_stream_for_wrapper;
|
||||
use erg_common::traits::{Stream, Locational};
|
||||
use erg_common::value::ValueObj;
|
||||
use erg_common::ty::Type;
|
||||
|
||||
/// 意味論的名前と記号自体の名前が混在しているが、Pythonの名残である
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[repr(u8)]
|
||||
pub enum TokenKind {
|
||||
/// e.g. i, p!, $s, T, `+`, `and`, 'd/dx'
|
||||
Symbol,
|
||||
// e.g. 0, 1
|
||||
NatLit,
|
||||
// e.g. -1, -2
|
||||
IntLit,
|
||||
RatioLit,
|
||||
BoolLit,
|
||||
StrLit,
|
||||
NoneLit,
|
||||
NoImplLit,
|
||||
EllipsisLit,
|
||||
InfLit,
|
||||
/// `+` (unary)
|
||||
PrePlus,
|
||||
/// `-` (unary)
|
||||
PreMinus,
|
||||
/// `*` (unary)
|
||||
PreStar,
|
||||
/// ~ (unary)
|
||||
PreBitNot,
|
||||
// PreAmp, // & (unary)
|
||||
// PreAt, // @ (unary)
|
||||
/// ! (unary)
|
||||
Mutate,
|
||||
/// ? (postfix)
|
||||
Try,
|
||||
/// `+`
|
||||
Plus,
|
||||
/// `-`
|
||||
Minus,
|
||||
/// `*`
|
||||
Star,
|
||||
/// /
|
||||
Slash,
|
||||
/// //
|
||||
FloorDiv,
|
||||
/// **
|
||||
Pow,
|
||||
/// %
|
||||
Mod,
|
||||
/// ..
|
||||
Closed,
|
||||
/// ..<
|
||||
RightOpen,
|
||||
/// <..
|
||||
LeftOpen,
|
||||
/// <..<
|
||||
Open,
|
||||
/// &&
|
||||
BitAnd,
|
||||
/// ||
|
||||
BitOr,
|
||||
/// ^^
|
||||
BitXor,
|
||||
/// <<
|
||||
Shl,
|
||||
/// >>
|
||||
Shr,
|
||||
/// <
|
||||
Less,
|
||||
/// >
|
||||
Gre,
|
||||
/// <=
|
||||
LessEq,
|
||||
/// >=
|
||||
GreEq,
|
||||
/// ==
|
||||
DblEq,
|
||||
/// !=
|
||||
NotEq,
|
||||
/// `in`
|
||||
InOp,
|
||||
/// `notin`
|
||||
NotInOp,
|
||||
/// `sub` (subtype of)
|
||||
SubOp,
|
||||
/// `is`
|
||||
IsOp,
|
||||
/// `isnot`
|
||||
IsNotOp,
|
||||
/// `and`
|
||||
AndOp,
|
||||
/// `or`
|
||||
OrOp,
|
||||
/// `dot` (scalar product)
|
||||
DotOp,
|
||||
/// `cross` (vector product)
|
||||
CrossOp,
|
||||
/// =
|
||||
Equal,
|
||||
/// |=
|
||||
OrEqual,
|
||||
/// ->
|
||||
FuncArrow,
|
||||
/// =>
|
||||
ProcArrow,
|
||||
/// (
|
||||
LParen,
|
||||
/// )
|
||||
RParen,
|
||||
/// [
|
||||
LSqBr,
|
||||
/// ]
|
||||
RSqBr,
|
||||
/// {
|
||||
LBrace,
|
||||
/// }
|
||||
RBrace,
|
||||
Indent,
|
||||
Dedent,
|
||||
/// .
|
||||
Dot,
|
||||
/// |>
|
||||
Pipe,
|
||||
/// :
|
||||
Colon,
|
||||
/// ::
|
||||
DblColon,
|
||||
/// :>
|
||||
SupertypeOf,
|
||||
/// <:
|
||||
SubtypeOf,
|
||||
/// ,
|
||||
Comma,
|
||||
/// ^
|
||||
Caret,
|
||||
/// &
|
||||
Amper,
|
||||
/// @
|
||||
AtSign,
|
||||
/// |
|
||||
VBar,
|
||||
/// _
|
||||
UBar,
|
||||
/// ...
|
||||
Spread,
|
||||
/// \n
|
||||
Newline,
|
||||
/// ;
|
||||
Semi,
|
||||
Illegal,
|
||||
/// Beginning Of File
|
||||
BOF,
|
||||
EOF,
|
||||
}
|
||||
|
||||
use TokenKind::*;
|
||||
|
||||
impl From<TokenKind> for Type {
|
||||
#[inline]
|
||||
fn from(tok: TokenKind) -> Type {
|
||||
match tok {
|
||||
NatLit => Type::Nat,
|
||||
IntLit => Type::Int,
|
||||
RatioLit => Type::Ratio,
|
||||
StrLit => Type::Str,
|
||||
BoolLit => Type::Bool,
|
||||
NoneLit => Type::NoneType,
|
||||
NoImplLit => Type::NotImplemented,
|
||||
EllipsisLit => Type::Ellipsis,
|
||||
InfLit => Type::Inf,
|
||||
other => panic!("this has not type: {other}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ValueObj> for TokenKind {
|
||||
fn from(c: &ValueObj) -> TokenKind {
|
||||
match c {
|
||||
ValueObj::Int(_) => TokenKind::IntLit,
|
||||
ValueObj::Nat(_) => TokenKind::NatLit,
|
||||
ValueObj::Float(_) => TokenKind::RatioLit,
|
||||
ValueObj::Str(_) => TokenKind::StrLit,
|
||||
ValueObj::True => TokenKind::BoolLit,
|
||||
ValueObj::False => TokenKind::BoolLit,
|
||||
ValueObj::None => TokenKind::NoneLit,
|
||||
ValueObj::Ellipsis => TokenKind::EllipsisLit,
|
||||
ValueObj::Inf => TokenKind::InfLit,
|
||||
_ => TokenKind::Illegal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenCategory {
|
||||
Symbol,
|
||||
Literal,
|
||||
BinOp,
|
||||
UnaryOp,
|
||||
/// ? <.. ..
|
||||
PostfixOp,
|
||||
/// ( [ { Indent
|
||||
LEnclosure,
|
||||
/// ) } } Dedent
|
||||
REnclosure,
|
||||
/// , : :: :> <: . |> |=
|
||||
SpecialBinOp,
|
||||
/// =
|
||||
DefOp,
|
||||
/// -> =>
|
||||
LambdaOp,
|
||||
/// \n ;
|
||||
Separator,
|
||||
/// ^ (reserved)
|
||||
Caret,
|
||||
/// &
|
||||
Amper,
|
||||
/// @
|
||||
AtSign,
|
||||
/// |
|
||||
VBar,
|
||||
/// _
|
||||
UBar,
|
||||
EOF,
|
||||
Illegal,
|
||||
}
|
||||
|
||||
impl TokenCategory {
|
||||
pub const fn is_block_op(&self) -> bool {
|
||||
matches!(self, Self::DefOp | Self::LambdaOp)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenKind {
|
||||
pub const fn category(&self) -> TokenCategory {
|
||||
match self {
|
||||
Symbol => TokenCategory::Symbol,
|
||||
NatLit | IntLit | RatioLit | StrLit | BoolLit
|
||||
| NoneLit | EllipsisLit | NoImplLit | InfLit => TokenCategory::Literal,
|
||||
PrePlus | PreMinus | PreStar | PreBitNot | Mutate => TokenCategory::UnaryOp,
|
||||
Try => TokenCategory::PostfixOp,
|
||||
Comma | Colon | DblColon | SupertypeOf | SubtypeOf | Dot | Pipe | OrEqual => TokenCategory::SpecialBinOp,
|
||||
Equal => TokenCategory::DefOp,
|
||||
FuncArrow | ProcArrow => TokenCategory::LambdaOp,
|
||||
Semi | Newline => TokenCategory::Separator,
|
||||
LParen | LBrace | LSqBr | Indent => TokenCategory::LEnclosure,
|
||||
RParen | RBrace | RSqBr | Dedent => TokenCategory::REnclosure,
|
||||
Caret => TokenCategory::Caret,
|
||||
Amper => TokenCategory::Amper,
|
||||
AtSign => TokenCategory::AtSign,
|
||||
VBar => TokenCategory::VBar,
|
||||
UBar => TokenCategory::UBar,
|
||||
EOF => TokenCategory::EOF,
|
||||
Illegal | BOF => TokenCategory::Illegal,
|
||||
_ => TokenCategory::BinOp,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn precedence(&self) -> Option<usize> {
|
||||
let prec = match self {
|
||||
Dot | DblColon => 200, // .
|
||||
Pow => 190, // **
|
||||
PrePlus | PreMinus | PreBitNot => 180, // (unary) + - * ~
|
||||
Star | Slash | FloorDiv | Mod | CrossOp | DotOp => 170, // * / // % cross dot
|
||||
Plus | Minus => 160, // + -
|
||||
Shl | Shr => 150, // << >>
|
||||
BitAnd => 140, // &&
|
||||
BitXor => 130, // ^^
|
||||
BitOr => 120, // ||
|
||||
Closed | LeftOpen | RightOpen | Open => 100, // range operators
|
||||
Less | Gre | LessEq | GreEq | DblEq | NotEq
|
||||
| InOp | NotInOp | IsOp | IsNotOp => 90, // < > <= >= == != in notin is isnot
|
||||
AndOp => 80, // and
|
||||
OrOp => 70, // or
|
||||
FuncArrow | ProcArrow => 60, // -> =>
|
||||
Colon | SupertypeOf | SubtypeOf => 50, // : :> <:
|
||||
Comma => 40, // ,
|
||||
Equal | OrEqual => 20, // = |=
|
||||
Newline | Semi => 10, // \n ;
|
||||
LParen | LBrace | LSqBr | Indent => 0, // ( { [ Indent
|
||||
_ => { return None },
|
||||
};
|
||||
Some(prec)
|
||||
}
|
||||
|
||||
pub const fn is_right_associative(&self) -> bool {
|
||||
match self {
|
||||
FuncArrow | ProcArrow | Equal => true,
|
||||
// PreDollar | PreAt => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TokenKind {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") }
|
||||
}
|
||||
|
||||
#[derive(Clone, Eq)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub content: Str,
|
||||
/// 1 origin
|
||||
// TODO: 複数行文字列リテラルもあるのでタプルにするのが妥当?
|
||||
pub lineno: usize,
|
||||
/// a pointer from which the token starts (0 origin)
|
||||
pub col_begin: usize,
|
||||
}
|
||||
|
||||
impl From<Token> for ValueObj {
|
||||
#[inline]
|
||||
fn from(tok: Token) -> ValueObj {
|
||||
ValueObj::from_str(Type::from(tok.kind), tok.content)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Token> for ValueObj {
|
||||
#[inline]
|
||||
fn from(tok: &Token) -> ValueObj {
|
||||
ValueObj::from_str(Type::from(tok.kind), tok.content.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Token")
|
||||
.field("kind", &self.kind)
|
||||
.field("content", &self.content.replace("\n", "\\n"))
|
||||
.field("lineno", &self.lineno)
|
||||
.field("col_begin", &self.col_begin)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{:?} {}", self.kind, self.content.replace("\n", "\\n"))
|
||||
}
|
||||
}
|
||||
|
||||
// the values of lineno and col are not relevant for comparison
|
||||
impl PartialEq for Token {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool { self.is(other.kind) && self.content == other.content }
|
||||
}
|
||||
|
||||
impl Hash for Token {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.kind.hash(state);
|
||||
self.content.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl Locational for Token {
|
||||
fn loc(&self) -> Location {
|
||||
if self.lineno == 0 { Location::Unknown } else {
|
||||
Location::range(
|
||||
self.lineno,
|
||||
self.col_begin,
|
||||
self.lineno,
|
||||
self.col_begin + self.content.len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn col_end(&self) -> Option<usize> { Some(self.col_begin + self.content.len()) }
|
||||
}
|
||||
|
||||
impl Token {
|
||||
#[inline]
|
||||
pub fn dummy() -> Self {
|
||||
Token{ kind: TokenKind::Illegal, content: "DUMMY".into(), lineno: 1, col_begin: 0 }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new<S: Into<Str>>(kind: TokenKind, cont: S, lineno: usize, col_begin: usize) -> Self {
|
||||
Token{ kind, content: cont.into(), lineno, col_begin }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_str(kind: TokenKind, cont: &str) -> Self {
|
||||
Token{ kind, content: Str::rc(cont), lineno: 0, col_begin: 0 }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn symbol(cont: &str) -> Self { Self::from_str(TokenKind::Symbol, cont) }
|
||||
|
||||
pub const fn static_symbol(s: &'static str) -> Self {
|
||||
Token{ kind: TokenKind::Symbol, content: Str::ever(s), lineno: 0, col_begin: 0 }
|
||||
}
|
||||
|
||||
pub const fn category(&self) -> TokenCategory { self.kind.category() }
|
||||
|
||||
pub fn category_is(&self, category: TokenCategory) -> bool { self.kind.category() == category }
|
||||
|
||||
pub fn is(&self, kind: TokenKind) -> bool { self.kind == kind }
|
||||
|
||||
pub const fn is_block_op(&self) -> bool { self.category().is_block_op() }
|
||||
|
||||
pub const fn inspect(&self) -> &Str { &self.content }
|
||||
|
||||
pub fn is_procedural(&self) -> bool { self.inspect().ends_with("!") }
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TokenStream(Vec<Token>);
|
||||
|
||||
impl_displayable_stream_for_wrapper!(TokenStream, Token);
|
Loading…
Add table
Add a link
Reference in a new issue