reorganize compiler crates

This commit is contained in:
Jeong YunWon 2022-08-22 04:02:00 +09:00
parent ffacac05bb
commit 53c48bf6b9
66 changed files with 12079 additions and 152 deletions

View file

@ -1,22 +1,12 @@
[package]
name = "rustpython-compiler-core"
name = "rustpython-compiler"
version = "0.1.2"
description = "Compiler for python code into bytecode for the rustpython VM."
description = "A usability wrapper around rustpython-parser and rustpython-compiler-core"
authors = ["RustPython Team"]
repository = "https://github.com/RustPython/RustPython"
license = "MIT"
edition = "2021"
[dependencies]
indexmap = "1.8.1"
itertools = "0.10.3"
rustpython-bytecode = { path = "../bytecode", version = "0.1.1" }
rustpython-ast = { path = "../ast", features = ["unparse"] }
num-complex = { version = "0.4.0", features = ["serde"] }
num-traits = "0.2.14"
log = "0.4.16"
ahash = "0.7.6"
[dev-dependencies]
rustpython-parser = { path = "../parser" }
insta = "1.14.0"
thiserror = "1.0"
rustpython-codegen = { path = "codegen" }
rustpython-parser = { path = "parser" }
rustpython-bytecode = { path = "bytecode" }

16
ast/Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
[package]
name = "rustpython-ast"
version = "0.1.0"
authors = ["RustPython Team"]
edition = "2021"
[features]
default = ["constant-optimization", "fold"]
constant-optimization = ["fold"]
fold = []
unparse = ["rustpython-common"]
[dependencies]
num-bigint = "0.4.3"
rustpython-common = { path = "../../common", optional = true }
rustpython-bytecode = { path = "../bytecode"}

144
ast/Python.asdl Normal file
View file

@ -0,0 +1,144 @@
-- ASDL's 4 builtin types are:
-- identifier, int, string, constant
module Python
{
mod = Module(stmt* body, type_ignore* type_ignores)
| Interactive(stmt* body)
| Expression(expr body)
| FunctionType(expr* argtypes, expr returns)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| ClassDef(identifier name,
expr* bases,
keyword* keywords,
stmt* body,
expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value, string? type_comment)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body, string? type_comment)
| AsyncWith(withitem* items, stmt* body, string? type_comment)
| Match(expr subject, match_case* cases)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
| ImportFrom(identifier? module, alias* names, int? level)
| Global(identifier* names)
| Nonlocal(identifier* names)
| Expr(expr value)
| Pass | Break | Continue
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
| NamedExpr(expr target, expr value)
| BinOp(expr left, operator op, expr right)
| UnaryOp(unaryop op, expr operand)
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
| Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
| SetComp(expr elt, comprehension* generators)
| DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Await(expr value)
| Yield(expr? value)
| YieldFrom(expr value)
-- need sequences for compare to distinguish between
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| FormattedValue(expr value, int conversion, expr? format_spec)
| JoinedStr(expr* values)
| Constant(constant value, string? kind)
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
| Subscript(expr value, expr slice, expr_context ctx)
| Starred(expr value, expr_context ctx)
| Name(identifier id, expr_context ctx)
| List(expr* elts, expr_context ctx)
| Tuple(expr* elts, expr_context ctx)
-- can appear only in Subscript
| Slice(expr? lower, expr? upper, expr? step)
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
expr_context = Load | Store | Del
boolop = And | Or
operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
| RShift | BitOr | BitXor | BitAnd | FloorDiv
unaryop = Invert | Not | UAdd | USub
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
comprehension = (expr target, expr iter, expr* ifs, int is_async)
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
arguments = (arg* posonlyargs, arg* args, arg? vararg, arg* kwonlyargs,
expr* kw_defaults, arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation, string? type_comment)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
keyword = (identifier? arg, expr value)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
withitem = (expr context_expr, expr? optional_vars)
match_case = (pattern pattern, expr? guard, stmt* body)
pattern = MatchValue(expr value)
| MatchSingleton(constant value)
| MatchSequence(pattern* patterns)
| MatchMapping(expr* keys, pattern* patterns, identifier? rest)
| MatchClass(expr cls, pattern* patterns, identifier* kwd_attrs, pattern* kwd_patterns)
| MatchStar(identifier? name)
-- The optional "rest" MatchMapping parameter handles capturing extra mapping keys
| MatchAs(pattern? pattern, identifier? name)
| MatchOr(pattern* patterns)
attributes (int lineno, int col_offset, int end_lineno, int end_col_offset)
type_ignore = TypeIgnore(int lineno, string tag)
}

385
ast/asdl.py Normal file
View file

@ -0,0 +1,385 @@
#-------------------------------------------------------------------------------
# Parser for ASDL [1] definition files. Reads in an ASDL description and parses
# it into an AST that describes it.
#
# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
# modules and attributes after a product. Words starting with Capital letters
# are terminals. Literal tokens are in "double quotes". Others are
# non-terminals. Id is either TokenId or ConstructorId.
#
# module ::= "module" Id "{" [definitions] "}"
# definitions ::= { TypeId "=" type }
# type ::= product | sum
# product ::= fields ["attributes" fields]
# fields ::= "(" { field, "," } field ")"
# field ::= TypeId ["?" | "*"] [Id]
# sum ::= constructor { "|" constructor } ["attributes" fields]
# constructor ::= ConstructorId [fields]
#
# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
# http://asdl.sourceforge.net/
#-------------------------------------------------------------------------------
from collections import namedtuple
import re
__all__ = [
'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
# The following classes define nodes into which the ASDL description is parsed.
# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
# structure used by a programming language. But ASDL files themselves need to be
# parsed. This module parses ASDL files and uses a simple AST to represent them.
# See the EBNF at the top of the file to understand the logical connection
# between the various node types.
builtin_types = {'identifier', 'string', 'int', 'constant'}
class AST:
def __repr__(self):
raise NotImplementedError
class Module(AST):
def __init__(self, name, dfns):
self.name = name
self.dfns = dfns
self.types = {type.name: type.value for type in dfns}
def __repr__(self):
return 'Module({0.name}, {0.dfns})'.format(self)
class Type(AST):
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return 'Type({0.name}, {0.value})'.format(self)
class Constructor(AST):
def __init__(self, name, fields=None):
self.name = name
self.fields = fields or []
def __repr__(self):
return 'Constructor({0.name}, {0.fields})'.format(self)
class Field(AST):
def __init__(self, type, name=None, seq=False, opt=False):
self.type = type
self.name = name
self.seq = seq
self.opt = opt
def __str__(self):
if self.seq:
extra = "*"
elif self.opt:
extra = "?"
else:
extra = ""
return "{}{} {}".format(self.type, extra, self.name)
def __repr__(self):
if self.seq:
extra = ", seq=True"
elif self.opt:
extra = ", opt=True"
else:
extra = ""
if self.name is None:
return 'Field({0.type}{1})'.format(self, extra)
else:
return 'Field({0.type}, {0.name}{1})'.format(self, extra)
class Sum(AST):
def __init__(self, types, attributes=None):
self.types = types
self.attributes = attributes or []
def __repr__(self):
if self.attributes:
return 'Sum({0.types}, {0.attributes})'.format(self)
else:
return 'Sum({0.types})'.format(self)
class Product(AST):
def __init__(self, fields, attributes=None):
self.fields = fields
self.attributes = attributes or []
def __repr__(self):
if self.attributes:
return 'Product({0.fields}, {0.attributes})'.format(self)
else:
return 'Product({0.fields})'.format(self)
# A generic visitor for the meta-AST that describes ASDL. This can be used by
# emitters. Note that this visitor does not provide a generic visit method, so a
# subclass needs to define visit methods from visitModule to as deep as the
# interesting node.
# We also define a Check visitor that makes sure the parsed ASDL is well-formed.
class VisitorBase(object):
"""Generic tree visitor for ASTs."""
def __init__(self):
self.cache = {}
def visit(self, obj, *args):
klass = obj.__class__
meth = self.cache.get(klass)
if meth is None:
methname = "visit" + klass.__name__
meth = getattr(self, methname, None)
self.cache[klass] = meth
if meth:
try:
meth(obj, *args)
except Exception as e:
print("Error visiting %r: %s" % (obj, e))
raise
class Check(VisitorBase):
"""A visitor that checks a parsed ASDL tree for correctness.
Errors are printed and accumulated.
"""
def __init__(self):
super(Check, self).__init__()
self.cons = {}
self.errors = 0
self.types = {}
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, str(type.name))
def visitSum(self, sum, name):
for t in sum.types:
self.visit(t, name)
def visitConstructor(self, cons, name):
key = str(cons.name)
conflict = self.cons.get(key)
if conflict is None:
self.cons[key] = name
else:
print('Redefinition of constructor {}'.format(key))
print('Defined in {} and {}'.format(conflict, name))
self.errors += 1
for f in cons.fields:
self.visit(f, key)
def visitField(self, field, name):
key = str(field.type)
l = self.types.setdefault(key, [])
l.append(name)
def visitProduct(self, prod, name):
for f in prod.fields:
self.visit(f, name)
def check(mod):
"""Check the parsed ASDL tree for correctness.
Return True if success. For failure, the errors are printed out and False
is returned.
"""
v = Check()
v.visit(mod)
for t in v.types:
if t not in mod.types and not t in builtin_types:
v.errors += 1
uses = ", ".join(v.types[t])
print('Undefined type {}, used in {}'.format(t, uses))
return not v.errors
# The ASDL parser itself comes next. The only interesting external interface
# here is the top-level parse function.
def parse(filename):
"""Parse ASDL from the given file and return a Module node describing it."""
with open(filename, encoding="utf-8") as f:
parser = ASDLParser()
return parser.parse(f.read())
# Types for describing tokens in an ASDL specification.
class TokenKind:
"""TokenKind is provides a scope for enumerated token kinds."""
(ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
LParen, RParen, LBrace, RBrace) = range(11)
operator_table = {
'=': Equals, ',': Comma, '?': Question, '|': Pipe, '(': LParen,
')': RParen, '*': Asterisk, '{': LBrace, '}': RBrace}
Token = namedtuple('Token', 'kind value lineno')
class ASDLSyntaxError(Exception):
def __init__(self, msg, lineno=None):
self.msg = msg
self.lineno = lineno or '<unknown>'
def __str__(self):
return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
def tokenize_asdl(buf):
"""Tokenize the given buffer. Yield Token objects."""
for lineno, line in enumerate(buf.splitlines(), 1):
for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
c = m.group(1)
if c[0].isalpha():
# Some kind of identifier
if c[0].isupper():
yield Token(TokenKind.ConstructorId, c, lineno)
else:
yield Token(TokenKind.TypeId, c, lineno)
elif c[:2] == '--':
# Comment
break
else:
# Operators
try:
op_kind = TokenKind.operator_table[c]
except KeyError:
raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
yield Token(op_kind, c, lineno)
class ASDLParser:
"""Parser for ASDL files.
Create, then call the parse method on a buffer containing ASDL.
This is a simple recursive descent parser that uses tokenize_asdl for the
lexing.
"""
def __init__(self):
self._tokenizer = None
self.cur_token = None
def parse(self, buf):
"""Parse the ASDL in the buffer and return an AST with a Module root.
"""
self._tokenizer = tokenize_asdl(buf)
self._advance()
return self._parse_module()
def _parse_module(self):
if self._at_keyword('module'):
self._advance()
else:
raise ASDLSyntaxError(
'Expected "module" (found {})'.format(self.cur_token.value),
self.cur_token.lineno)
name = self._match(self._id_kinds)
self._match(TokenKind.LBrace)
defs = self._parse_definitions()
self._match(TokenKind.RBrace)
return Module(name, defs)
def _parse_definitions(self):
defs = []
while self.cur_token.kind == TokenKind.TypeId:
typename = self._advance()
self._match(TokenKind.Equals)
type = self._parse_type()
defs.append(Type(typename, type))
return defs
def _parse_type(self):
if self.cur_token.kind == TokenKind.LParen:
# If we see a (, it's a product
return self._parse_product()
else:
# Otherwise it's a sum. Look for ConstructorId
sumlist = [Constructor(self._match(TokenKind.ConstructorId),
self._parse_optional_fields())]
while self.cur_token.kind == TokenKind.Pipe:
# More constructors
self._advance()
sumlist.append(Constructor(
self._match(TokenKind.ConstructorId),
self._parse_optional_fields()))
return Sum(sumlist, self._parse_optional_attributes())
def _parse_product(self):
return Product(self._parse_fields(), self._parse_optional_attributes())
def _parse_fields(self):
fields = []
self._match(TokenKind.LParen)
while self.cur_token.kind == TokenKind.TypeId:
typename = self._advance()
is_seq, is_opt = self._parse_optional_field_quantifier()
id = (self._advance() if self.cur_token.kind in self._id_kinds
else None)
fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
if self.cur_token.kind == TokenKind.RParen:
break
elif self.cur_token.kind == TokenKind.Comma:
self._advance()
self._match(TokenKind.RParen)
return fields
def _parse_optional_fields(self):
if self.cur_token.kind == TokenKind.LParen:
return self._parse_fields()
else:
return None
def _parse_optional_attributes(self):
if self._at_keyword('attributes'):
self._advance()
return self._parse_fields()
else:
return None
def _parse_optional_field_quantifier(self):
is_seq, is_opt = False, False
if self.cur_token.kind == TokenKind.Asterisk:
is_seq = True
self._advance()
elif self.cur_token.kind == TokenKind.Question:
is_opt = True
self._advance()
return is_seq, is_opt
def _advance(self):
""" Return the value of the current token and read the next one into
self.cur_token.
"""
cur_val = None if self.cur_token is None else self.cur_token.value
try:
self.cur_token = next(self._tokenizer)
except StopIteration:
self.cur_token = None
return cur_val
_id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
def _match(self, kind):
"""The 'match' primitive of RD parsers.
* Verifies that the current token is of the given kind (kind can
be a tuple, in which the kind must match one of its members).
* Returns the value of the current token
* Reads in the next token
"""
if (isinstance(kind, tuple) and self.cur_token.kind in kind or
self.cur_token.kind == kind
):
value = self.cur_token.value
self._advance()
return value
else:
raise ASDLSyntaxError(
'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
self.cur_token.lineno)
def _at_keyword(self, keyword):
return (self.cur_token.kind == TokenKind.TypeId and
self.cur_token.value == keyword)

726
ast/asdl_rs.py Executable file
View file

@ -0,0 +1,726 @@
#! /usr/bin/env python
"""Generate Rust code from an ASDL description."""
import sys
import json
import textwrap
from argparse import ArgumentParser
from pathlib import Path
import asdl
TABSIZE = 4
AUTOGEN_MESSAGE = "// File automatically generated by {}.\n"
builtin_type_mapping = {
"identifier": "Ident",
"string": "String",
"int": "usize",
"constant": "Constant",
}
assert builtin_type_mapping.keys() == asdl.builtin_types
def get_rust_type(name):
"""Return a string for the C name of the type.
This function special cases the default types provided by asdl.
"""
if name in asdl.builtin_types:
return builtin_type_mapping[name]
elif name.islower():
return "".join(part.capitalize() for part in name.split("_"))
else:
return name
def is_simple(sum):
"""Return True if a sum is a simple.
A sum is simple if its types have no fields, e.g.
unaryop = Invert | Not | UAdd | USub
"""
for t in sum.types:
if t.fields:
return False
return True
def asdl_of(name, obj):
if isinstance(obj, asdl.Product) or isinstance(obj, asdl.Constructor):
fields = ", ".join(map(str, obj.fields))
if fields:
fields = "({})".format(fields)
return "{}{}".format(name, fields)
else:
if is_simple(obj):
types = " | ".join(type.name for type in obj.types)
else:
sep = "\n{}| ".format(" " * (len(name) + 1))
types = sep.join(asdl_of(type.name, type) for type in obj.types)
return "{} = {}".format(name, types)
class EmitVisitor(asdl.VisitorBase):
"""Visit that emits lines"""
def __init__(self, file):
self.file = file
self.identifiers = set()
super(EmitVisitor, self).__init__()
def emit_identifier(self, name):
name = str(name)
if name in self.identifiers:
return
self.emit("_Py_IDENTIFIER(%s);" % name, 0)
self.identifiers.add(name)
def emit(self, line, depth):
if line:
line = (" " * TABSIZE * depth) + line
self.file.write(line + "\n")
class TypeInfo:
def __init__(self, name):
self.name = name
self.has_userdata = None
self.children = set()
self.boxed = False
def __repr__(self):
return f"<TypeInfo: {self.name}>"
def determine_userdata(self, typeinfo, stack):
if self.name in stack:
return None
stack.add(self.name)
for child, child_seq in self.children:
if child in asdl.builtin_types:
continue
childinfo = typeinfo[child]
child_has_userdata = childinfo.determine_userdata(typeinfo, stack)
if self.has_userdata is None and child_has_userdata is True:
self.has_userdata = True
stack.remove(self.name)
return self.has_userdata
class FindUserdataTypesVisitor(asdl.VisitorBase):
def __init__(self, typeinfo):
self.typeinfo = typeinfo
super().__init__()
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
stack = set()
for info in self.typeinfo.values():
info.determine_userdata(self.typeinfo, stack)
def visitType(self, type):
self.typeinfo[type.name] = TypeInfo(type.name)
self.visit(type.value, type.name)
def visitSum(self, sum, name):
info = self.typeinfo[name]
if is_simple(sum):
info.has_userdata = False
else:
if len(sum.types) > 1:
info.boxed = True
if sum.attributes:
# attributes means Located, which has the `custom: U` field
info.has_userdata = True
for variant in sum.types:
self.add_children(name, variant.fields)
def visitProduct(self, product, name):
info = self.typeinfo[name]
if product.attributes:
# attributes means Located, which has the `custom: U` field
info.has_userdata = True
if len(product.fields) > 2:
info.boxed = True
self.add_children(name, product.fields)
def add_children(self, name, fields):
self.typeinfo[name].children.update((field.type, field.seq) for field in fields)
def rust_field(field_name):
if field_name == "type":
return "type_"
else:
return field_name
class TypeInfoEmitVisitor(EmitVisitor):
def __init__(self, file, typeinfo):
self.typeinfo = typeinfo
super().__init__(file)
def has_userdata(self, typ):
return self.typeinfo[typ].has_userdata
def get_generics(self, typ, *generics):
if self.has_userdata(typ):
return [f"<{g}>" for g in generics]
else:
return ["" for g in generics]
class StructVisitor(TypeInfoEmitVisitor):
"""Visitor to generate typedefs for AST."""
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
if is_simple(sum):
self.simple_sum(sum, name, depth)
else:
self.sum_with_constructors(sum, name, depth)
def emit_attrs(self, depth):
self.emit("#[derive(Debug, PartialEq)]", depth)
def simple_sum(self, sum, name, depth):
rustname = get_rust_type(name)
self.emit_attrs(depth)
self.emit(f"pub enum {rustname} {{", depth)
for variant in sum.types:
self.emit(f"{variant.name},", depth + 1)
self.emit("}", depth)
self.emit("", depth)
def sum_with_constructors(self, sum, name, depth):
typeinfo = self.typeinfo[name]
generics, generics_applied = self.get_generics(name, "U = ()", "U")
enumname = rustname = get_rust_type(name)
# all the attributes right now are for location, so if it has attrs we
# can just wrap it in Located<>
if sum.attributes:
enumname = rustname + "Kind"
self.emit_attrs(depth)
self.emit(f"pub enum {enumname}{generics} {{", depth)
for t in sum.types:
self.visit(t, typeinfo, depth + 1)
self.emit("}", depth)
if sum.attributes:
self.emit(
f"pub type {rustname}<U = ()> = Located<{enumname}{generics_applied}, U>;",
depth,
)
self.emit("", depth)
def visitConstructor(self, cons, parent, depth):
if cons.fields:
self.emit(f"{cons.name} {{", depth)
for f in cons.fields:
self.visit(f, parent, "", depth + 1)
self.emit("},", depth)
else:
self.emit(f"{cons.name},", depth)
def visitField(self, field, parent, vis, depth):
typ = get_rust_type(field.type)
fieldtype = self.typeinfo.get(field.type)
if fieldtype and fieldtype.has_userdata:
typ = f"{typ}<U>"
# don't box if we're doing Vec<T>, but do box if we're doing Vec<Option<Box<T>>>
if fieldtype and fieldtype.boxed and (not field.seq or field.opt):
typ = f"Box<{typ}>"
if field.opt:
typ = f"Option<{typ}>"
if field.seq:
typ = f"Vec<{typ}>"
name = rust_field(field.name)
self.emit(f"{vis}{name}: {typ},", depth)
def visitProduct(self, product, name, depth):
typeinfo = self.typeinfo[name]
generics, generics_applied = self.get_generics(name, "U = ()", "U")
dataname = rustname = get_rust_type(name)
if product.attributes:
dataname = rustname + "Data"
self.emit_attrs(depth)
has_expr = any(f.type != "identifier" for f in product.fields)
if has_expr:
datadef = f"{dataname}{generics}"
else:
datadef = dataname
self.emit(f"pub struct {datadef} {{", depth)
for f in product.fields:
self.visit(f, typeinfo, "pub ", depth + 1)
self.emit("}", depth)
if product.attributes:
# attributes should just be location info
if not has_expr:
generics_applied = ""
self.emit(
f"pub type {rustname}<U = ()> = Located<{dataname}{generics_applied}, U>;",
depth,
)
self.emit("", depth)
class FoldTraitDefVisitor(TypeInfoEmitVisitor):
def visitModule(self, mod, depth):
self.emit("pub trait Fold<U> {", depth)
self.emit("type TargetU;", depth + 1)
self.emit("type Error;", depth + 1)
self.emit(
"fn map_user(&mut self, user: U) -> Result<Self::TargetU, Self::Error>;",
depth + 2,
)
for dfn in mod.dfns:
self.visit(dfn, depth + 2)
self.emit("}", depth)
def visitType(self, type, depth):
name = type.name
apply_u, apply_target_u = self.get_generics(name, "U", "Self::TargetU")
enumname = get_rust_type(name)
self.emit(
f"fn fold_{name}(&mut self, node: {enumname}{apply_u}) -> Result<{enumname}{apply_target_u}, Self::Error> {{",
depth,
)
self.emit(f"fold_{name}(self, node)", depth + 1)
self.emit("}", depth)
class FoldImplVisitor(TypeInfoEmitVisitor):
def visitModule(self, mod, depth):
self.emit(
"fn fold_located<U, F: Fold<U> + ?Sized, T, MT>(folder: &mut F, node: Located<T, U>, f: impl FnOnce(&mut F, T) -> Result<MT, F::Error>) -> Result<Located<MT, F::TargetU>, F::Error> {",
depth,
)
self.emit(
"Ok(Located { custom: folder.map_user(node.custom)?, location: node.location, node: f(folder, node.node)? })",
depth + 1,
)
self.emit("}", depth)
for dfn in mod.dfns:
self.visit(dfn, depth)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
apply_t, apply_u, apply_target_u = self.get_generics(
name, "T", "U", "F::TargetU"
)
enumname = get_rust_type(name)
is_located = bool(sum.attributes)
self.emit(f"impl<T, U> Foldable<T, U> for {enumname}{apply_t} {{", depth)
self.emit(f"type Mapped = {enumname}{apply_u};", depth + 1)
self.emit(
"fn fold<F: Fold<T, TargetU = U> + ?Sized>(self, folder: &mut F) -> Result<Self::Mapped, F::Error> {",
depth + 1,
)
self.emit(f"folder.fold_{name}(self)", depth + 2)
self.emit("}", depth + 1)
self.emit("}", depth)
self.emit(
f"pub fn fold_{name}<U, F: Fold<U> + ?Sized>(#[allow(unused)] folder: &mut F, node: {enumname}{apply_u}) -> Result<{enumname}{apply_target_u}, F::Error> {{",
depth,
)
if is_located:
self.emit("fold_located(folder, node, |folder, node| {", depth)
enumname += "Kind"
self.emit("match node {", depth + 1)
for cons in sum.types:
fields_pattern = self.make_pattern(cons.fields)
self.emit(
f"{enumname}::{cons.name} {{ {fields_pattern} }} => {{", depth + 2
)
self.gen_construction(f"{enumname}::{cons.name}", cons.fields, depth + 3)
self.emit("}", depth + 2)
self.emit("}", depth + 1)
if is_located:
self.emit("})", depth)
self.emit("}", depth)
def visitProduct(self, product, name, depth):
apply_t, apply_u, apply_target_u = self.get_generics(
name, "T", "U", "F::TargetU"
)
structname = get_rust_type(name)
is_located = bool(product.attributes)
self.emit(f"impl<T, U> Foldable<T, U> for {structname}{apply_t} {{", depth)
self.emit(f"type Mapped = {structname}{apply_u};", depth + 1)
self.emit(
"fn fold<F: Fold<T, TargetU = U> + ?Sized>(self, folder: &mut F) -> Result<Self::Mapped, F::Error> {",
depth + 1,
)
self.emit(f"folder.fold_{name}(self)", depth + 2)
self.emit("}", depth + 1)
self.emit("}", depth)
self.emit(
f"pub fn fold_{name}<U, F: Fold<U> + ?Sized>(#[allow(unused)] folder: &mut F, node: {structname}{apply_u}) -> Result<{structname}{apply_target_u}, F::Error> {{",
depth,
)
if is_located:
self.emit("fold_located(folder, node, |folder, node| {", depth)
structname += "Data"
fields_pattern = self.make_pattern(product.fields)
self.emit(f"let {structname} {{ {fields_pattern} }} = node;", depth + 1)
self.gen_construction(structname, product.fields, depth + 1)
if is_located:
self.emit("})", depth)
self.emit("}", depth)
def make_pattern(self, fields):
return ",".join(rust_field(f.name) for f in fields)
def gen_construction(self, cons_path, fields, depth):
self.emit(f"Ok({cons_path} {{", depth)
for field in fields:
name = rust_field(field.name)
self.emit(f"{name}: Foldable::fold({name}, folder)?,", depth + 1)
self.emit("})", depth)
class FoldModuleVisitor(TypeInfoEmitVisitor):
def visitModule(self, mod):
depth = 0
self.emit('#[cfg(feature = "fold")]', depth)
self.emit("pub mod fold {", depth)
self.emit("use super::*;", depth + 1)
self.emit("use crate::fold_helpers::Foldable;", depth + 1)
FoldTraitDefVisitor(self.file, self.typeinfo).visit(mod, depth + 1)
FoldImplVisitor(self.file, self.typeinfo).visit(mod, depth + 1)
self.emit("}", depth)
class ClassDefVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
structname = "NodeKind" + get_rust_type(name)
self.emit(
f'#[pyclass(module = "_ast", name = {json.dumps(name)}, base = "AstNode")]',
depth,
)
self.emit(f"struct {structname};", depth)
self.emit("#[pyclass(flags(HAS_DICT, BASETYPE))]", depth)
self.emit(f"impl {structname} {{}}", depth)
for cons in sum.types:
self.visit(cons, sum.attributes, structname, depth)
def visitConstructor(self, cons, attrs, base, depth):
self.gen_classdef(cons.name, cons.fields, attrs, depth, base)
def visitProduct(self, product, name, depth):
self.gen_classdef(name, product.fields, product.attributes, depth)
def gen_classdef(self, name, fields, attrs, depth, base="AstNode"):
structname = "Node" + get_rust_type(name)
self.emit(
f'#[pyclass(module = "_ast", name = {json.dumps(name)}, base = {json.dumps(base)})]',
depth,
)
self.emit(f"struct {structname};", depth)
self.emit("#[pyclass(flags(HAS_DICT, BASETYPE))]", depth)
self.emit(f"impl {structname} {{", depth)
self.emit(f"#[extend_class]", depth + 1)
self.emit(
"fn extend_class_with_fields(ctx: &Context, class: &'static Py<PyType>) {",
depth + 1,
)
fields = ",".join(
f"ctx.new_str(ascii!({json.dumps(f.name)})).into()" for f in fields
)
self.emit(
f"class.set_attr(identifier!(ctx, _fields), ctx.new_list(vec![{fields}]).into());",
depth + 2,
)
attrs = ",".join(
f"ctx.new_str(ascii!({json.dumps(attr.name)})).into()" for attr in attrs
)
self.emit(
f"class.set_attr(identifier!(ctx, _attributes), ctx.new_list(vec![{attrs}]).into());",
depth + 2,
)
self.emit("}", depth + 1)
self.emit("}", depth)
class ExtendModuleVisitor(EmitVisitor):
def visitModule(self, mod):
depth = 0
self.emit(
"pub fn extend_module_nodes(vm: &VirtualMachine, module: &PyObject) {",
depth,
)
self.emit("extend_module!(vm, module, {", depth + 1)
for dfn in mod.dfns:
self.visit(dfn, depth + 2)
self.emit("})", depth + 1)
self.emit("}", depth)
def visitType(self, type, depth):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
rust_name = get_rust_type(name)
self.emit(
f"{json.dumps(name)} => NodeKind{rust_name}::make_class(&vm.ctx),", depth
)
for cons in sum.types:
self.visit(cons, depth)
def visitConstructor(self, cons, depth):
self.gen_extension(cons.name, depth)
def visitProduct(self, product, name, depth):
self.gen_extension(name, depth)
def gen_extension(self, name, depth):
rust_name = get_rust_type(name)
self.emit(f"{json.dumps(name)} => Node{rust_name}::make_class(&vm.ctx),", depth)
class TraitImplVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
enumname = get_rust_type(name)
if sum.attributes:
enumname += "Kind"
self.emit(f"impl NamedNode for ast::{enumname} {{", depth)
self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1)
self.emit("}", depth)
self.emit(f"impl Node for ast::{enumname} {{", depth)
self.emit(
"fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1
)
self.emit("match self {", depth + 2)
for variant in sum.types:
self.constructor_to_object(variant, enumname, depth + 3)
self.emit("}", depth + 2)
self.emit("}", depth + 1)
self.emit(
"fn ast_from_object(_vm: &VirtualMachine, _object: PyObjectRef) -> PyResult<Self> {",
depth + 1,
)
self.gen_sum_fromobj(sum, name, enumname, depth + 2)
self.emit("}", depth + 1)
self.emit("}", depth)
def constructor_to_object(self, cons, enumname, depth):
fields_pattern = self.make_pattern(cons.fields)
self.emit(f"ast::{enumname}::{cons.name} {{ {fields_pattern} }} => {{", depth)
self.make_node(cons.name, cons.fields, depth + 1)
self.emit("}", depth)
def visitProduct(self, product, name, depth):
structname = get_rust_type(name)
if product.attributes:
structname += "Data"
self.emit(f"impl NamedNode for ast::{structname} {{", depth)
self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1)
self.emit("}", depth)
self.emit(f"impl Node for ast::{structname} {{", depth)
self.emit(
"fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1
)
fields_pattern = self.make_pattern(product.fields)
self.emit(f"let ast::{structname} {{ {fields_pattern} }} = self;", depth + 2)
self.make_node(name, product.fields, depth + 2)
self.emit("}", depth + 1)
self.emit(
"fn ast_from_object(_vm: &VirtualMachine, _object: PyObjectRef) -> PyResult<Self> {",
depth + 1,
)
self.gen_product_fromobj(product, name, structname, depth + 2)
self.emit("}", depth + 1)
self.emit("}", depth)
def make_node(self, variant, fields, depth):
rust_variant = get_rust_type(variant)
self.emit(
f"let _node = AstNode.into_ref_with_type(_vm, Node{rust_variant}::static_type().to_owned()).unwrap();",
depth,
)
if fields:
self.emit("let _dict = _node.as_object().dict().unwrap();", depth)
for f in fields:
self.emit(
f"_dict.set_item({json.dumps(f.name)}, {rust_field(f.name)}.ast_to_object(_vm), _vm).unwrap();",
depth,
)
self.emit("_node.into()", depth)
def make_pattern(self, fields):
return ",".join(rust_field(f.name) for f in fields)
def gen_sum_fromobj(self, sum, sumname, enumname, depth):
if sum.attributes:
self.extract_location(sumname, depth)
self.emit("let _cls = _object.class();", depth)
self.emit("Ok(", depth)
for cons in sum.types:
self.emit(f"if _cls.is(Node{cons.name}::static_type()) {{", depth)
self.gen_construction(f"{enumname}::{cons.name}", cons, sumname, depth + 1)
self.emit("} else", depth)
self.emit("{", depth)
msg = f'format!("expected some sort of {sumname}, but got {{}}",_object.repr(_vm)?)'
self.emit(f"return Err(_vm.new_type_error({msg}));", depth + 1)
self.emit("})", depth)
def gen_product_fromobj(self, product, prodname, structname, depth):
if product.attributes:
self.extract_location(prodname, depth)
self.emit("Ok(", depth)
self.gen_construction(structname, product, prodname, depth + 1)
self.emit(")", depth)
def gen_construction(self, cons_path, cons, name, depth):
self.emit(f"ast::{cons_path} {{", depth)
for field in cons.fields:
self.emit(
f"{rust_field(field.name)}: {self.decode_field(field, name)},",
depth + 1,
)
self.emit("}", depth)
def extract_location(self, typename, depth):
row = self.decode_field(asdl.Field("int", "lineno"), typename)
column = self.decode_field(asdl.Field("int", "col_offset"), typename)
self.emit(f"let _location = ast::Location::new({row}, {column});", depth)
def wrap_located_node(self, depth):
self.emit(f"let node = ast::Located::new(_location, node);", depth)
def decode_field(self, field, typename):
name = json.dumps(field.name)
if field.opt and not field.seq:
return f"get_node_field_opt(_vm, &_object, {name})?.map(|obj| Node::ast_from_object(_vm, obj)).transpose()?"
else:
return f"Node::ast_from_object(_vm, get_node_field(_vm, &_object, {name}, {json.dumps(typename)})?)?"
class ChainOfVisitors:
def __init__(self, *visitors):
self.visitors = visitors
def visit(self, object):
for v in self.visitors:
v.visit(object)
v.emit("", 0)
def write_ast_def(mod, typeinfo, f):
f.write(
textwrap.dedent(
"""
#![allow(clippy::derive_partial_eq_without_eq)]
pub use crate::constant::*;
pub use crate::location::Location;
type Ident = String;
\n
"""
)
)
StructVisitor(f, typeinfo).emit_attrs(0)
f.write(
textwrap.dedent(
"""
pub struct Located<T, U = ()> {
pub location: Location,
pub custom: U,
pub node: T,
}
impl<T> Located<T> {
pub fn new(location: Location, node: T) -> Self {
Self { location, custom: (), node }
}
}
\n
""".lstrip()
)
)
c = ChainOfVisitors(StructVisitor(f, typeinfo), FoldModuleVisitor(f, typeinfo))
c.visit(mod)
def write_ast_mod(mod, f):
f.write(
textwrap.dedent(
"""
#![allow(clippy::all)]
use super::*;
use crate::common::ascii;
"""
)
)
c = ChainOfVisitors(ClassDefVisitor(f), TraitImplVisitor(f), ExtendModuleVisitor(f))
c.visit(mod)
def main(input_filename, ast_mod_filename, ast_def_filename, dump_module=False):
auto_gen_msg = AUTOGEN_MESSAGE.format("/".join(Path(__file__).parts[-2:]))
mod = asdl.parse(input_filename)
if dump_module:
print("Parsed Module:")
print(mod)
if not asdl.check(mod):
sys.exit(1)
typeinfo = {}
FindUserdataTypesVisitor(typeinfo).visit(mod)
with ast_def_filename.open("w") as def_file, ast_mod_filename.open("w") as mod_file:
def_file.write(auto_gen_msg)
write_ast_def(mod, typeinfo, def_file)
mod_file.write(auto_gen_msg)
write_ast_mod(mod, mod_file)
print(f"{ast_def_filename}, {ast_mod_filename} regenerated.")
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("input_file", type=Path)
parser.add_argument("-M", "--mod-file", type=Path, required=True)
parser.add_argument("-D", "--def-file", type=Path, required=True)
parser.add_argument("-d", "--dump-module", action="store_true")
args = parser.parse_args()
main(args.input_file, args.mod_file, args.def_file, args.dump_module)

1282
ast/src/ast_gen.rs Normal file

File diff suppressed because it is too large Load diff

223
ast/src/constant.rs Normal file
View file

@ -0,0 +1,223 @@
use num_bigint::BigInt;
pub use rustpython_bytecode::ConversionFlag;
#[derive(Debug, PartialEq)]
pub enum Constant {
None,
Bool(bool),
Str(String),
Bytes(Vec<u8>),
Int(BigInt),
Tuple(Vec<Constant>),
Float(f64),
Complex { real: f64, imag: f64 },
Ellipsis,
}
impl From<String> for Constant {
fn from(s: String) -> Constant {
Self::Str(s)
}
}
impl From<Vec<u8>> for Constant {
fn from(b: Vec<u8>) -> Constant {
Self::Bytes(b)
}
}
impl From<bool> for Constant {
fn from(b: bool) -> Constant {
Self::Bool(b)
}
}
impl From<BigInt> for Constant {
fn from(i: BigInt) -> Constant {
Self::Int(i)
}
}
#[cfg(feature = "rustpython-common")]
impl std::fmt::Display for Constant {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Constant::None => f.pad("None"),
Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }),
Constant::Str(s) => rustpython_common::str::repr(s).fmt(f),
Constant::Bytes(b) => f.pad(&rustpython_common::bytes::repr(b)),
Constant::Int(i) => i.fmt(f),
Constant::Tuple(tup) => {
if let [elt] = &**tup {
write!(f, "({},)", elt)
} else {
f.write_str("(")?;
for (i, elt) in tup.iter().enumerate() {
if i != 0 {
f.write_str(", ")?;
}
elt.fmt(f)?;
}
f.write_str(")")
}
}
Constant::Float(fp) => f.pad(&rustpython_common::float_ops::to_string(*fp)),
Constant::Complex { real, imag } => {
if *real == 0.0 {
write!(f, "{}j", imag)
} else {
write!(f, "({}{:+}j)", real, imag)
}
}
Constant::Ellipsis => f.pad("..."),
}
}
}
#[cfg(feature = "constant-optimization")]
#[non_exhaustive]
#[derive(Default)]
pub struct ConstantOptimizer {}
#[cfg(feature = "constant-optimization")]
impl ConstantOptimizer {
#[inline]
pub fn new() -> Self {
Self {}
}
}
#[cfg(feature = "constant-optimization")]
impl<U> crate::fold::Fold<U> for ConstantOptimizer {
type TargetU = U;
type Error = std::convert::Infallible;
#[inline]
fn map_user(&mut self, user: U) -> Result<Self::TargetU, Self::Error> {
Ok(user)
}
fn fold_expr(&mut self, node: crate::Expr<U>) -> Result<crate::Expr<U>, Self::Error> {
match node.node {
crate::ExprKind::Tuple { elts, ctx } => {
let elts = elts
.into_iter()
.map(|x| self.fold_expr(x))
.collect::<Result<Vec<_>, _>>()?;
let expr = if elts
.iter()
.all(|e| matches!(e.node, crate::ExprKind::Constant { .. }))
{
let tuple = elts
.into_iter()
.map(|e| match e.node {
crate::ExprKind::Constant { value, .. } => value,
_ => unreachable!(),
})
.collect();
crate::ExprKind::Constant {
value: Constant::Tuple(tuple),
kind: None,
}
} else {
crate::ExprKind::Tuple { elts, ctx }
};
Ok(crate::Expr {
node: expr,
custom: node.custom,
location: node.location,
})
}
_ => crate::fold::fold_expr(self, node),
}
}
}
#[cfg(test)]
mod tests {
#[cfg(feature = "constant-optimization")]
#[test]
fn test_constant_opt() {
use super::*;
use crate::fold::Fold;
use crate::*;
let location = Location::new(0, 0);
let custom = ();
let ast = Located {
location,
custom,
node: ExprKind::Tuple {
ctx: ExprContext::Load,
elts: vec![
Located {
location,
custom,
node: ExprKind::Constant {
value: BigInt::from(1).into(),
kind: None,
},
},
Located {
location,
custom,
node: ExprKind::Constant {
value: BigInt::from(2).into(),
kind: None,
},
},
Located {
location,
custom,
node: ExprKind::Tuple {
ctx: ExprContext::Load,
elts: vec![
Located {
location,
custom,
node: ExprKind::Constant {
value: BigInt::from(3).into(),
kind: None,
},
},
Located {
location,
custom,
node: ExprKind::Constant {
value: BigInt::from(4).into(),
kind: None,
},
},
Located {
location,
custom,
node: ExprKind::Constant {
value: BigInt::from(5).into(),
kind: None,
},
},
],
},
},
],
},
};
let new_ast = ConstantOptimizer::new()
.fold_expr(ast)
.unwrap_or_else(|e| match e {});
assert_eq!(
new_ast,
Located {
location,
custom,
node: ExprKind::Constant {
value: Constant::Tuple(vec![
BigInt::from(1).into(),
BigInt::from(2).into(),
Constant::Tuple(vec![
BigInt::from(3).into(),
BigInt::from(4).into(),
BigInt::from(5).into(),
])
]),
kind: None
},
}
);
}
}

66
ast/src/fold_helpers.rs Normal file
View file

@ -0,0 +1,66 @@
use crate::constant;
use crate::fold::Fold;
pub(crate) trait Foldable<T, U> {
type Mapped;
fn fold<F: Fold<T, TargetU = U> + ?Sized>(
self,
folder: &mut F,
) -> Result<Self::Mapped, F::Error>;
}
impl<T, U, X> Foldable<T, U> for Vec<X>
where
X: Foldable<T, U>,
{
type Mapped = Vec<X::Mapped>;
fn fold<F: Fold<T, TargetU = U> + ?Sized>(
self,
folder: &mut F,
) -> Result<Self::Mapped, F::Error> {
self.into_iter().map(|x| x.fold(folder)).collect()
}
}
impl<T, U, X> Foldable<T, U> for Option<X>
where
X: Foldable<T, U>,
{
type Mapped = Option<X::Mapped>;
fn fold<F: Fold<T, TargetU = U> + ?Sized>(
self,
folder: &mut F,
) -> Result<Self::Mapped, F::Error> {
self.map(|x| x.fold(folder)).transpose()
}
}
impl<T, U, X> Foldable<T, U> for Box<X>
where
X: Foldable<T, U>,
{
type Mapped = Box<X::Mapped>;
fn fold<F: Fold<T, TargetU = U> + ?Sized>(
self,
folder: &mut F,
) -> Result<Self::Mapped, F::Error> {
(*self).fold(folder).map(Box::new)
}
}
macro_rules! simple_fold {
($($t:ty),+$(,)?) => {
$(impl<T, U> $crate::fold_helpers::Foldable<T, U> for $t {
type Mapped = Self;
#[inline]
fn fold<F: Fold<T, TargetU = U> + ?Sized>(
self,
_folder: &mut F,
) -> Result<Self::Mapped, F::Error> {
Ok(self)
}
})+
};
}
simple_fold!(usize, String, bool, constant::Constant);

60
ast/src/impls.rs Normal file
View file

@ -0,0 +1,60 @@
use crate::{Constant, ExprKind};
impl<U> ExprKind<U> {
/// Returns a short name for the node suitable for use in error messages.
pub fn name(&self) -> &'static str {
match self {
ExprKind::BoolOp { .. } | ExprKind::BinOp { .. } | ExprKind::UnaryOp { .. } => {
"operator"
}
ExprKind::Subscript { .. } => "subscript",
ExprKind::Await { .. } => "await expression",
ExprKind::Yield { .. } | ExprKind::YieldFrom { .. } => "yield expression",
ExprKind::Compare { .. } => "comparison",
ExprKind::Attribute { .. } => "attribute",
ExprKind::Call { .. } => "function call",
ExprKind::Constant { value, .. } => match value {
Constant::Str(_)
| Constant::Int(_)
| Constant::Float(_)
| Constant::Complex { .. }
| Constant::Bytes(_) => "literal",
Constant::Tuple(_) => "tuple",
Constant::Bool(b) => {
if *b {
"True"
} else {
"False"
}
}
Constant::None => "None",
Constant::Ellipsis => "ellipsis",
},
ExprKind::List { .. } => "list",
ExprKind::Tuple { .. } => "tuple",
ExprKind::Dict { .. } => "dict display",
ExprKind::Set { .. } => "set display",
ExprKind::ListComp { .. } => "list comprehension",
ExprKind::DictComp { .. } => "dict comprehension",
ExprKind::SetComp { .. } => "set comprehension",
ExprKind::GeneratorExp { .. } => "generator expression",
ExprKind::Starred { .. } => "starred",
ExprKind::Slice { .. } => "slice",
ExprKind::JoinedStr { values } => {
if values
.iter()
.any(|e| matches!(e.node, ExprKind::JoinedStr { .. }))
{
"f-string expression"
} else {
"literal"
}
}
ExprKind::FormattedValue { .. } => "f-string expression",
ExprKind::Name { .. } => "name",
ExprKind::Lambda { .. } => "lambda",
ExprKind::IfExp { .. } => "conditional expression",
ExprKind::NamedExpr { .. } => "named expression",
}
}
}

13
ast/src/lib.rs Normal file
View file

@ -0,0 +1,13 @@
mod ast_gen;
mod constant;
#[cfg(feature = "fold")]
mod fold_helpers;
mod impls;
mod location;
#[cfg(feature = "unparse")]
mod unparse;
pub use ast_gen::*;
pub use location::Location;
pub type Suite<U = ()> = Vec<Stmt<U>>;

79
ast/src/location.rs Normal file
View file

@ -0,0 +1,79 @@
//! Datatypes to support source location information.
use std::fmt;
/// A location somewhere in the sourcecode.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct Location {
row: usize,
column: usize,
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "line {} column {}", self.row, self.column)
}
}
impl Location {
pub fn visualize<'a>(
&self,
line: &'a str,
desc: impl fmt::Display + 'a,
) -> impl fmt::Display + 'a {
struct Visualize<'a, D: fmt::Display> {
loc: Location,
line: &'a str,
desc: D,
}
impl<D: fmt::Display> fmt::Display for Visualize<'_, D> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}\n{}{arrow:>pad$}",
self.desc,
self.line,
pad = self.loc.column,
arrow = "^",
)
}
}
Visualize {
loc: *self,
line,
desc,
}
}
}
impl Location {
pub fn new(row: usize, column: usize) -> Self {
Location { row, column }
}
pub fn row(&self) -> usize {
self.row
}
pub fn column(&self) -> usize {
self.column
}
pub fn reset(&mut self) {
self.row = 1;
self.column = 1;
}
pub fn go_right(&mut self) {
self.column += 1;
}
pub fn go_left(&mut self) {
self.column -= 1;
}
pub fn newline(&mut self) {
self.row += 1;
self.column = 1;
}
}

530
ast/src/unparse.rs Normal file
View file

@ -0,0 +1,530 @@
use crate::{
Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Expr, ExprKind,
Operator,
};
use std::fmt;
mod precedence {
macro_rules! precedence {
($($op:ident,)*) => {
precedence!(@0, $($op,)*);
};
(@$i:expr, $op1:ident, $($op:ident,)*) => {
pub const $op1: u8 = $i;
precedence!(@$i + 1, $($op,)*);
};
(@$i:expr,) => {};
}
precedence!(
TUPLE, TEST, OR, AND, NOT, CMP, // "EXPR" =
BOR, BXOR, BAND, SHIFT, ARITH, TERM, FACTOR, POWER, AWAIT, ATOM,
);
pub const EXPR: u8 = BOR;
}
#[repr(transparent)]
struct Unparser<'a> {
f: fmt::Formatter<'a>,
}
impl<'a> Unparser<'a> {
fn new<'b>(f: &'b mut fmt::Formatter<'a>) -> &'b mut Unparser<'a> {
unsafe { &mut *(f as *mut fmt::Formatter<'a> as *mut Unparser<'a>) }
}
fn p(&mut self, s: &str) -> fmt::Result {
self.f.write_str(s)
}
fn p_if(&mut self, cond: bool, s: &str) -> fmt::Result {
if cond {
self.f.write_str(s)?;
}
Ok(())
}
fn p_delim(&mut self, first: &mut bool, s: &str) -> fmt::Result {
self.p_if(!std::mem::take(first), s)
}
fn write_fmt(&mut self, f: fmt::Arguments<'_>) -> fmt::Result {
self.f.write_fmt(f)
}
fn unparse_expr<U>(&mut self, ast: &Expr<U>, level: u8) -> fmt::Result {
macro_rules! opprec {
($opty:ident, $x:expr, $enu:path, $($var:ident($op:literal, $prec:ident)),*$(,)?) => {
match $x {
$(<$enu>::$var => (opprec!(@space $opty, $op), precedence::$prec),)*
}
};
(@space bin, $op:literal) => {
concat!(" ", $op, " ")
};
(@space un, $op:literal) => {
$op
};
}
macro_rules! group_if {
($lvl:expr, $body:block) => {{
let group = level > $lvl;
self.p_if(group, "(")?;
let ret = $body;
self.p_if(group, ")")?;
ret
}};
}
match &ast.node {
ExprKind::BoolOp { op, values } => {
let (op, prec) = opprec!(bin, op, Boolop, And("and", AND), Or("or", OR));
group_if!(prec, {
let mut first = true;
for val in values {
self.p_delim(&mut first, op)?;
self.unparse_expr(val, prec + 1)?;
}
})
}
ExprKind::NamedExpr { target, value } => {
group_if!(precedence::TUPLE, {
self.unparse_expr(target, precedence::ATOM)?;
self.p(" := ")?;
self.unparse_expr(value, precedence::ATOM)?;
})
}
ExprKind::BinOp { left, op, right } => {
let rassoc = matches!(op, Operator::Pow);
let (op, prec) = opprec!(
bin,
op,
Operator,
Add("+", ARITH),
Sub("-", ARITH),
Mult("*", TERM),
MatMult("@", TERM),
Div("/", TERM),
Mod("%", TERM),
Pow("**", POWER),
LShift("<<", SHIFT),
RShift(">>", SHIFT),
BitOr("|", BOR),
BitXor("^", BXOR),
BitAnd("&", BAND),
FloorDiv("//", TERM),
);
group_if!(prec, {
self.unparse_expr(left, prec + rassoc as u8)?;
self.p(op)?;
self.unparse_expr(right, prec + !rassoc as u8)?;
})
}
ExprKind::UnaryOp { op, operand } => {
let (op, prec) = opprec!(
un,
op,
crate::Unaryop,
Invert("~", FACTOR),
Not("not ", NOT),
UAdd("+", FACTOR),
USub("-", FACTOR)
);
group_if!(prec, {
self.p(op)?;
self.unparse_expr(operand, prec)?;
})
}
ExprKind::Lambda { args, body } => {
group_if!(precedence::TEST, {
let npos = args.args.len() + args.posonlyargs.len();
self.p(if npos > 0 { "lambda " } else { "lambda" })?;
self.unparse_args(args)?;
write!(self, ": {}", **body)?;
})
}
ExprKind::IfExp { test, body, orelse } => {
group_if!(precedence::TEST, {
self.unparse_expr(body, precedence::TEST + 1)?;
self.p(" if ")?;
self.unparse_expr(test, precedence::TEST + 1)?;
self.p(" else ")?;
self.unparse_expr(orelse, precedence::TEST)?;
})
}
ExprKind::Dict { keys, values } => {
self.p("{")?;
let mut first = true;
let (packed, unpacked) = values.split_at(keys.len());
for (k, v) in keys.iter().zip(packed) {
self.p_delim(&mut first, ", ")?;
write!(self, "{}: {}", *k, *v)?;
}
for d in unpacked {
self.p_delim(&mut first, ", ")?;
write!(self, "**{}", *d)?;
}
self.p("}")?;
}
ExprKind::Set { elts } => {
self.p("{")?;
let mut first = true;
for v in elts {
self.p_delim(&mut first, ", ")?;
self.unparse_expr(v, precedence::TEST)?;
}
self.p("}")?;
}
ExprKind::ListComp { elt, generators } => {
self.p("[")?;
self.unparse_expr(elt, precedence::TEST)?;
self.unparse_comp(generators)?;
self.p("]")?;
}
ExprKind::SetComp { elt, generators } => {
self.p("{")?;
self.unparse_expr(elt, precedence::TEST)?;
self.unparse_comp(generators)?;
self.p("}")?;
}
ExprKind::DictComp {
key,
value,
generators,
} => {
self.p("{")?;
self.unparse_expr(key, precedence::TEST)?;
self.p(": ")?;
self.unparse_expr(value, precedence::TEST)?;
self.unparse_comp(generators)?;
self.p("}")?;
}
ExprKind::GeneratorExp { elt, generators } => {
self.p("(")?;
self.unparse_expr(elt, precedence::TEST)?;
self.unparse_comp(generators)?;
self.p(")")?;
}
ExprKind::Await { value } => {
group_if!(precedence::AWAIT, {
self.p("await ")?;
self.unparse_expr(value, precedence::ATOM)?;
})
}
ExprKind::Yield { value } => {
if let Some(value) = value {
write!(self, "(yield {})", **value)?;
} else {
self.p("(yield)")?;
}
}
ExprKind::YieldFrom { value } => {
write!(self, "(yield from {})", **value)?;
}
ExprKind::Compare {
left,
ops,
comparators,
} => {
group_if!(precedence::CMP, {
let new_lvl = precedence::CMP + 1;
self.unparse_expr(left, new_lvl)?;
for (op, cmp) in ops.iter().zip(comparators) {
let op = match op {
Cmpop::Eq => " == ",
Cmpop::NotEq => " != ",
Cmpop::Lt => " < ",
Cmpop::LtE => " <= ",
Cmpop::Gt => " > ",
Cmpop::GtE => " >= ",
Cmpop::Is => " is ",
Cmpop::IsNot => " is not ",
Cmpop::In => " in ",
Cmpop::NotIn => " not in ",
};
self.p(op)?;
self.unparse_expr(cmp, new_lvl)?;
}
})
}
ExprKind::Call {
func,
args,
keywords,
} => {
self.unparse_expr(func, precedence::ATOM)?;
self.p("(")?;
if let (
[Expr {
node: ExprKind::GeneratorExp { elt, generators },
..
}],
[],
) = (&**args, &**keywords)
{
// make sure a single genexp doesn't get double parens
self.unparse_expr(elt, precedence::TEST)?;
self.unparse_comp(generators)?;
} else {
let mut first = true;
for arg in args {
self.p_delim(&mut first, ", ")?;
self.unparse_expr(arg, precedence::TEST)?;
}
for kw in keywords {
self.p_delim(&mut first, ", ")?;
if let Some(arg) = &kw.node.arg {
self.p(arg)?;
self.p("=")?;
} else {
self.p("**")?;
}
self.unparse_expr(&kw.node.value, precedence::TEST)?;
}
}
self.p(")")?;
}
ExprKind::FormattedValue {
value,
conversion,
format_spec,
} => self.unparse_formatted(value, *conversion, format_spec.as_deref())?,
ExprKind::JoinedStr { values } => self.unparse_joinedstr(values, false)?,
ExprKind::Constant { value, kind } => {
if let Some(kind) = kind {
self.p(kind)?;
}
assert_eq!(f64::MAX_10_EXP, 308);
let inf_str = "1e309";
match value {
Constant::Float(f) if f.is_infinite() => self.p(inf_str)?,
Constant::Complex { real, imag }
if real.is_infinite() || imag.is_infinite() =>
{
self.p(&value.to_string().replace("inf", inf_str))?
}
_ => fmt::Display::fmt(value, &mut self.f)?,
}
}
ExprKind::Attribute { value, attr, .. } => {
self.unparse_expr(value, precedence::ATOM)?;
let period = if let ExprKind::Constant {
value: Constant::Int(_),
..
} = &value.node
{
" ."
} else {
"."
};
self.p(period)?;
self.p(attr)?;
}
ExprKind::Subscript { value, slice, .. } => {
self.unparse_expr(value, precedence::ATOM)?;
let mut lvl = precedence::TUPLE;
if let ExprKind::Tuple { elts, .. } = &slice.node {
if elts
.iter()
.any(|expr| matches!(expr.node, ExprKind::Starred { .. }))
{
lvl += 1
}
}
self.p("[")?;
self.unparse_expr(slice, lvl)?;
self.p("]")?;
}
ExprKind::Starred { value, .. } => {
self.p("*")?;
self.unparse_expr(value, precedence::EXPR)?;
}
ExprKind::Name { id, .. } => self.p(id)?,
ExprKind::List { elts, .. } => {
self.p("[")?;
let mut first = true;
for elt in elts {
self.p_delim(&mut first, ", ")?;
self.unparse_expr(elt, precedence::TEST)?;
}
self.p("]")?;
}
ExprKind::Tuple { elts, .. } => {
if elts.is_empty() {
self.p("()")?;
} else {
group_if!(precedence::TUPLE, {
let mut first = true;
for elt in elts {
self.p_delim(&mut first, ", ")?;
self.unparse_expr(elt, precedence::TEST)?;
}
self.p_if(elts.len() == 1, ",")?;
})
}
}
ExprKind::Slice { lower, upper, step } => {
if let Some(lower) = lower {
self.unparse_expr(lower, precedence::TEST)?;
}
self.p(":")?;
if let Some(upper) = upper {
self.unparse_expr(upper, precedence::TEST)?;
}
if let Some(step) = step {
self.p(":")?;
self.unparse_expr(step, precedence::TEST)?;
}
}
}
Ok(())
}
fn unparse_args<U>(&mut self, args: &Arguments<U>) -> fmt::Result {
let mut first = true;
let defaults_start = args.posonlyargs.len() + args.args.len() - args.defaults.len();
for (i, arg) in args.posonlyargs.iter().chain(&args.args).enumerate() {
self.p_delim(&mut first, ", ")?;
self.unparse_arg(arg)?;
if let Some(i) = i.checked_sub(defaults_start) {
write!(self, "={}", &args.defaults[i])?;
}
self.p_if(i + 1 == args.posonlyargs.len(), ", /")?;
}
if args.vararg.is_some() || !args.kwonlyargs.is_empty() {
self.p_delim(&mut first, ", ")?;
self.p("*")?;
}
if let Some(vararg) = &args.vararg {
self.unparse_arg(vararg)?;
}
let defaults_start = args.kwonlyargs.len() - args.kw_defaults.len();
for (i, kwarg) in args.kwonlyargs.iter().enumerate() {
self.p_delim(&mut first, ", ")?;
self.unparse_arg(kwarg)?;
if let Some(default) = i
.checked_sub(defaults_start)
.and_then(|i| args.kw_defaults.get(i))
{
write!(self, "={}", default)?;
}
}
if let Some(kwarg) = &args.kwarg {
self.p_delim(&mut first, ", ")?;
self.p("**")?;
self.unparse_arg(kwarg)?;
}
Ok(())
}
fn unparse_arg<U>(&mut self, arg: &Arg<U>) -> fmt::Result {
self.p(&arg.node.arg)?;
if let Some(ann) = &arg.node.annotation {
write!(self, ": {}", **ann)?;
}
Ok(())
}
fn unparse_comp<U>(&mut self, generators: &[Comprehension<U>]) -> fmt::Result {
for comp in generators {
self.p(if comp.is_async > 0 {
" async for "
} else {
" for "
})?;
self.unparse_expr(&comp.target, precedence::TUPLE)?;
self.p(" in ")?;
self.unparse_expr(&comp.iter, precedence::TEST + 1)?;
for cond in &comp.ifs {
self.p(" if ")?;
self.unparse_expr(cond, precedence::TEST + 1)?;
}
}
Ok(())
}
fn unparse_fstring_body<U>(&mut self, values: &[Expr<U>], is_spec: bool) -> fmt::Result {
for value in values {
self.unparse_fstring_elem(value, is_spec)?;
}
Ok(())
}
fn unparse_formatted<U>(
&mut self,
val: &Expr<U>,
conversion: usize,
spec: Option<&Expr<U>>,
) -> fmt::Result {
let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1));
let brace = if buffered.starts_with('{') {
// put a space to avoid escaping the bracket
"{ "
} else {
"{"
};
self.p(brace)?;
self.p(&buffered)?;
drop(buffered);
if conversion != ConversionFlag::None as usize {
self.p("!")?;
let buf = &[conversion as u8];
let c = std::str::from_utf8(buf).unwrap();
self.p(c)?;
}
if let Some(spec) = spec {
self.p(":")?;
self.unparse_fstring_elem(spec, true)?;
}
self.p("}")?;
Ok(())
}
fn unparse_fstring_elem<U>(&mut self, expr: &Expr<U>, is_spec: bool) -> fmt::Result {
match &expr.node {
ExprKind::Constant { value, .. } => {
if let Constant::Str(s) = value {
self.unparse_fstring_str(s)
} else {
unreachable!()
}
}
ExprKind::JoinedStr { values } => self.unparse_joinedstr(values, is_spec),
ExprKind::FormattedValue {
value,
conversion,
format_spec,
} => self.unparse_formatted(value, *conversion, format_spec.as_deref()),
_ => unreachable!(),
}
}
fn unparse_fstring_str(&mut self, s: &str) -> fmt::Result {
let s = s.replace('{', "{{").replace('}', "}}");
self.p(&s)
}
fn unparse_joinedstr<U>(&mut self, values: &[Expr<U>], is_spec: bool) -> fmt::Result {
if is_spec {
self.unparse_fstring_body(values, is_spec)
} else {
self.p("f")?;
let body = to_string_fmt(|f| Unparser::new(f).unparse_fstring_body(values, is_spec));
fmt::Display::fmt(&rustpython_common::str::repr(&body), &mut self.f)
}
}
}
impl<U> fmt::Display for Expr<U> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Unparser::new(f).unparse_expr(self, precedence::TEST)
}
}
fn to_string_fmt(f: impl FnOnce(&mut fmt::Formatter) -> fmt::Result) -> String {
use std::cell::Cell;
struct Fmt<F>(Cell<Option<F>>);
impl<F: FnOnce(&mut fmt::Formatter) -> fmt::Result> fmt::Display for Fmt<F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.take().unwrap()(f)
}
}
Fmt(Cell::new(Some(f))).to_string()
}

20
bytecode/Cargo.toml Normal file
View file

@ -0,0 +1,20 @@
[package]
name = "rustpython-bytecode"
description = "RustPython specific bytecode."
version = "0.1.2"
authors = ["RustPython Team"]
edition = "2021"
repository = "https://github.com/RustPython/RustPython"
license = "MIT"
[dependencies]
bincode = "1.3.3"
bitflags = "1.3.2"
lz4_flex = "0.9.2"
num-bigint = { version = "0.4.3", features = ["serde"] }
num-complex = { version = "0.4.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
itertools = "0.10.3"
bstr = "0.2.17"
static_assertions = "1.1.0"

1395
bytecode/src/lib.rs Normal file

File diff suppressed because it is too large Load diff

31
parser/Cargo.toml Normal file
View file

@ -0,0 +1,31 @@
[package]
name = "rustpython-parser"
version = "0.1.2"
description = "Parser for python code."
authors = [ "RustPython Team" ]
build = "build.rs"
repository = "https://github.com/RustPython/RustPython"
license = "MIT"
edition = "2021"
[build-dependencies]
anyhow = "1.0.45"
lalrpop = { version = "0.19.8", optional = true }
phf_codegen = "0.10"
tiny-keccak = { version = "2", features = ["sha3"] }
[dependencies]
ahash = "0.7.6"
itertools = "0.10.3"
lalrpop-util = "0.19.8"
log = "0.4.16"
num-bigint = "0.4.3"
num-traits = "0.2.14"
phf = "0.10.1"
rustpython-ast = { path = "../ast" }
unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
unicode_names2 = "0.5.0"
[dev-dependencies]
insta = "1.14.0"

66
parser/README.md Normal file
View file

@ -0,0 +1,66 @@
# RustPython/parser
This directory has the code for python lexing, parsing and generating Abstract Syntax Trees (AST).
The steps are:
- Lexical analysis: splits the source code into tokens.
- Parsing and generating the AST: transforms those tokens into an AST. Uses `LALRPOP`, a Rust parser generator framework.
This crate is published on [https://docs.rs/rustpython-parser](https://docs.rs/rustpython-parser).
We wrote [a blog post](https://rustpython.github.io/2020/04/02/thing-explainer-parser.html) with screenshots and an explanation to help you understand the steps by seeing them in action.
For more information on LALRPOP, here is a link to the [LALRPOP book](https://github.com/lalrpop/lalrpop).
There is a readme in the `src` folder with the details of each file.
## Directory content
`build.rs`: The build script.
`Cargo.toml`: The config file.
The `src` directory has:
**lib.rs**
This is the crate's root.
**lexer.rs**
This module takes care of lexing python source text. This means source code is translated into separate tokens.
**parser.rs**
A python parsing module. Use this module to parse python code into an AST. There are three ways to parse python code. You could parse a whole program, a single statement, or a single expression.
**ast.rs**
Implements abstract syntax tree (AST) nodes for the python language. Roughly equivalent to [the python AST](https://docs.python.org/3/library/ast.html).
**python.lalrpop**
Python grammar.
**token.rs**
Different token definitions. Loosely based on token.h from CPython source.
**errors.rs**
Define internal parse error types. The goal is to provide a matching and a safe error API, masking errors from LALR.
**fstring.rs**
Format strings.
**function.rs**
Collection of functions for parsing parameters, arguments.
**location.rs**
Datatypes to support source location information.
**mode.rs**
Execution mode check. Allowed modes are `exec`, `eval` or `single`.
## How to use
For example, one could do this:
```
use rustpython_parser::{parser, ast};
let python_source = "print('Hello world')";
let python_ast = parser::parse_expression(python_source).unwrap();
```

143
parser/build.rs Normal file
View file

@ -0,0 +1,143 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::PathBuf;
use tiny_keccak::{Hasher, Sha3};
fn main() -> anyhow::Result<()> {
const SOURCE: &str = "python.lalrpop";
const TARGET: &str = "python.rs";
println!("cargo:rerun-if-changed={SOURCE}");
try_lalrpop(SOURCE, TARGET)?;
gen_phf();
Ok(())
}
fn requires_lalrpop(source: &str, target: &str) -> bool {
let target = if let Ok(target) = File::open(target) {
target
} else {
println!("cargo:warning=python.rs doesn't exist. regenerate.");
return true;
};
let sha_prefix = "// sha3: ";
let sha3_line = BufReader::with_capacity(128, target)
.lines()
.find_map(|line| {
let line = line.unwrap();
line.starts_with(sha_prefix).then_some(line)
})
.expect("no sha3 line?");
let expected_sha3_str = sha3_line.strip_prefix(sha_prefix).unwrap();
let actual_sha3 = {
let mut hasher = Sha3::v256();
let mut f = BufReader::new(File::open(source).unwrap());
let mut line = String::new();
while f.read_line(&mut line).unwrap() != 0 {
if line.ends_with('\n') {
line.pop();
if line.ends_with('\r') {
line.pop();
}
}
hasher.update(line.as_bytes());
hasher.update(b"\n");
line.clear();
}
let mut hash = [0u8; 32];
hasher.finalize(&mut hash);
hash
};
let eq = sha_equal(expected_sha3_str, &actual_sha3);
if !eq {
println!("cargo:warning=python.rs hash expected: {expected_sha3_str}");
let mut actual_sha3_str = String::new();
for byte in actual_sha3 {
write!(actual_sha3_str, "{byte:02x}").unwrap();
}
println!("cargo:warning=python.rs hash actual: {actual_sha3_str}");
}
!eq
}
fn try_lalrpop(source: &str, target: &str) -> anyhow::Result<()> {
if !requires_lalrpop(source, target) {
return Ok(());
}
#[cfg(feature = "lalrpop")]
{
lalrpop::process_root().expect("running lalrpop failed");
Ok(())
}
#[cfg(not(feature = "lalrpop"))]
panic!("try: cargo build --manifest-path=compiler/parser/Cargo.toml --features=lalrpop");
}
fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
if expected_sha3_str.len() != 64 {
panic!("lalrpop version? hash bug is fixed in 0.19.8");
}
let mut expected_sha3 = [0u8; 32];
for (i, b) in expected_sha3.iter_mut().enumerate() {
*b = u8::from_str_radix(&expected_sha3_str[i * 2..][..2], 16).unwrap();
}
*actual_sha3 == expected_sha3
}
fn gen_phf() {
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
let mut kwds = phf_codegen::Map::new();
let kwds = kwds
// Alphabetical keywords:
.entry("...", "Tok::Ellipsis")
.entry("False", "Tok::False")
.entry("None", "Tok::None")
.entry("True", "Tok::True")
// moreso "standard" keywords
.entry("and", "Tok::And")
.entry("as", "Tok::As")
.entry("assert", "Tok::Assert")
.entry("async", "Tok::Async")
.entry("await", "Tok::Await")
.entry("break", "Tok::Break")
.entry("class", "Tok::Class")
.entry("continue", "Tok::Continue")
.entry("def", "Tok::Def")
.entry("del", "Tok::Del")
.entry("elif", "Tok::Elif")
.entry("else", "Tok::Else")
.entry("except", "Tok::Except")
.entry("finally", "Tok::Finally")
.entry("for", "Tok::For")
.entry("from", "Tok::From")
.entry("global", "Tok::Global")
.entry("if", "Tok::If")
.entry("import", "Tok::Import")
.entry("in", "Tok::In")
.entry("is", "Tok::Is")
.entry("lambda", "Tok::Lambda")
.entry("nonlocal", "Tok::Nonlocal")
.entry("not", "Tok::Not")
.entry("or", "Tok::Or")
.entry("pass", "Tok::Pass")
.entry("raise", "Tok::Raise")
.entry("return", "Tok::Return")
.entry("try", "Tok::Try")
.entry("while", "Tok::While")
.entry("with", "Tok::With")
.entry("yield", "Tok::Yield")
.build();
writeln!(
BufWriter::new(File::create(out_dir.join("keywords.rs")).unwrap()),
"{kwds}",
)
.unwrap();
}

1352
parser/python.lalrpop Normal file

File diff suppressed because it is too large Load diff

235
parser/src/error.rs Normal file
View file

@ -0,0 +1,235 @@
//! Define internal parse error types
//! The goal is to provide a matching and a safe error API, maksing errors from LALR
use lalrpop_util::ParseError as LalrpopError;
use crate::ast::Location;
use crate::token::Tok;
use std::error::Error;
use std::fmt;
/// Represents an error during lexical scanning.
#[derive(Debug, PartialEq)]
pub struct LexicalError {
pub error: LexicalErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum LexicalErrorType {
StringError,
UnicodeError,
NestingError,
IndentationError,
TabError,
TabsAfterSpaces,
DefaultArgumentError,
PositionalArgumentError,
DuplicateKeywordArgumentError,
UnrecognizedToken { tok: char },
FStringError(FStringErrorType),
LineContinuationError,
Eof,
OtherError(String),
}
impl fmt::Display for LexicalErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
LexicalErrorType::FStringError(error) => write!(f, "Got error in f-string: {}", error),
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
LexicalErrorType::NestingError => write!(f, "Got unexpected nesting"),
LexicalErrorType::IndentationError => {
write!(f, "unindent does not match any outer indentation level")
}
LexicalErrorType::TabError => {
write!(f, "inconsistent use of tabs and spaces in indentation")
}
LexicalErrorType::TabsAfterSpaces => {
write!(f, "Tabs not allowed as part of indentation after spaces")
}
LexicalErrorType::DefaultArgumentError => {
write!(f, "non-default argument follows default argument")
}
LexicalErrorType::DuplicateKeywordArgumentError => {
write!(f, "keyword argument repeated")
}
LexicalErrorType::PositionalArgumentError => {
write!(f, "positional argument follows keyword argument")
}
LexicalErrorType::UnrecognizedToken { tok } => {
write!(f, "Got unexpected token {}", tok)
}
LexicalErrorType::LineContinuationError => {
write!(f, "unexpected character after line continuation character")
}
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
LexicalErrorType::OtherError(msg) => write!(f, "{}", msg),
}
}
}
// TODO: consolidate these with ParseError
#[derive(Debug, PartialEq)]
pub struct FStringError {
pub error: FStringErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum FStringErrorType {
UnclosedLbrace,
UnopenedRbrace,
ExpectedRbrace,
InvalidExpression(Box<ParseErrorType>),
InvalidConversionFlag,
EmptyExpression,
MismatchedDelimiter,
ExpressionNestedTooDeeply,
}
impl fmt::Display for FStringErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
FStringErrorType::UnclosedLbrace => write!(f, "Unclosed '{{'"),
FStringErrorType::UnopenedRbrace => write!(f, "Unopened '}}'"),
FStringErrorType::ExpectedRbrace => write!(f, "Expected '}}' after conversion flag."),
FStringErrorType::InvalidExpression(error) => {
write!(f, "Invalid expression: {}", error)
}
FStringErrorType::InvalidConversionFlag => write!(f, "Invalid conversion flag"),
FStringErrorType::EmptyExpression => write!(f, "Empty expression"),
FStringErrorType::MismatchedDelimiter => write!(f, "Mismatched delimiter"),
FStringErrorType::ExpressionNestedTooDeeply => {
write!(f, "expressions nested too deeply")
}
}
}
}
impl From<FStringError> for LalrpopError<Location, Tok, LexicalError> {
fn from(err: FStringError) -> Self {
lalrpop_util::ParseError::User {
error: LexicalError {
error: LexicalErrorType::FStringError(err.error),
location: err.location,
},
}
}
}
/// Represents an error during parsing
#[derive(Debug, PartialEq)]
pub struct ParseError {
pub error: ParseErrorType,
pub location: Location,
}
#[derive(Debug, PartialEq)]
pub enum ParseErrorType {
/// Parser encountered an unexpected end of input
Eof,
/// Parser encountered an extra token
ExtraToken(Tok),
/// Parser encountered an invalid token
InvalidToken,
/// Parser encountered an unexpected token
UnrecognizedToken(Tok, Option<String>),
/// Maps to `User` type from `lalrpop-util`
Lexical(LexicalErrorType),
}
/// Convert `lalrpop_util::ParseError` to our internal type
impl From<LalrpopError<Location, Tok, LexicalError>> for ParseError {
fn from(err: LalrpopError<Location, Tok, LexicalError>) -> Self {
match err {
// TODO: Are there cases where this isn't an EOF?
LalrpopError::InvalidToken { location } => ParseError {
error: ParseErrorType::Eof,
location,
},
LalrpopError::ExtraToken { token } => ParseError {
error: ParseErrorType::ExtraToken(token.1),
location: token.0,
},
LalrpopError::User { error } => ParseError {
error: ParseErrorType::Lexical(error.error),
location: error.location,
},
LalrpopError::UnrecognizedToken { token, expected } => {
// Hacky, but it's how CPython does it. See PyParser_AddToken,
// in particular "Only one possible expected token" comment.
let expected = (expected.len() == 1).then(|| expected[0].clone());
ParseError {
error: ParseErrorType::UnrecognizedToken(token.1, expected),
location: token.0,
}
}
LalrpopError::UnrecognizedEOF { location, .. } => ParseError {
error: ParseErrorType::Eof,
location,
},
}
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} at {}", self.error, self.location)
}
}
impl fmt::Display for ParseErrorType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseErrorType::Eof => write!(f, "Got unexpected EOF"),
ParseErrorType::ExtraToken(ref tok) => write!(f, "Got extraneous token: {:?}", tok),
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
if *tok == Tok::Indent {
write!(f, "unexpected indent")
} else if expected.as_deref() == Some("Indent") {
write!(f, "expected an indented block")
} else {
write!(f, "invalid syntax. Got unexpected token {}", tok)
}
}
ParseErrorType::Lexical(ref error) => write!(f, "{}", error),
}
}
}
impl Error for ParseErrorType {}
impl ParseErrorType {
pub fn is_indentation_error(&self) -> bool {
match self {
ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true,
ParseErrorType::UnrecognizedToken(token, expected) => {
*token == Tok::Indent || expected.clone() == Some("Indent".to_owned())
}
_ => false,
}
}
pub fn is_tab_error(&self) -> bool {
matches!(
self,
ParseErrorType::Lexical(LexicalErrorType::TabError)
| ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces)
)
}
}
impl std::ops::Deref for ParseError {
type Target = ParseErrorType;
fn deref(&self) -> &Self::Target {
&self.error
}
}
impl Error for ParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}

427
parser/src/fstring.rs Normal file
View file

@ -0,0 +1,427 @@
use self::FStringErrorType::*;
use crate::{
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
error::{FStringError, FStringErrorType, ParseError},
parser::parse_expression,
};
use std::{iter, mem, str};
struct FStringParser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
str_location: Location,
recurse_lvl: u8,
}
impl<'a> FStringParser<'a> {
fn new(source: &'a str, str_location: Location, recurse_lvl: u8) -> Self {
Self {
chars: source.chars().peekable(),
str_location,
recurse_lvl,
}
}
#[inline]
fn expr(&self, node: ExprKind) -> Expr {
Expr::new(self.str_location, node)
}
fn parse_formatted_value(&mut self) -> Result<Vec<Expr>, FStringErrorType> {
let mut expression = String::new();
let mut spec = None;
let mut delims = Vec::new();
let mut conversion = ConversionFlag::None;
let mut self_documenting = false;
let mut trailing_seq = String::new();
while let Some(ch) = self.chars.next() {
match ch {
// can be integrated better with the remainign code, but as a starting point ok
// in general I would do here a tokenizing of the fstrings to omit this peeking.
'!' if self.chars.peek() == Some(&'=') => {
expression.push_str("!=");
self.chars.next();
}
'=' if self.chars.peek() == Some(&'=') => {
expression.push_str("==");
self.chars.next();
}
'>' if self.chars.peek() == Some(&'=') => {
expression.push_str(">=");
self.chars.next();
}
'<' if self.chars.peek() == Some(&'=') => {
expression.push_str("<=");
self.chars.next();
}
'!' if delims.is_empty() && self.chars.peek() != Some(&'=') => {
if expression.trim().is_empty() {
return Err(EmptyExpression);
}
conversion = match self.chars.next() {
Some('s') => ConversionFlag::Str,
Some('a') => ConversionFlag::Ascii,
Some('r') => ConversionFlag::Repr,
Some(_) => {
return Err(InvalidConversionFlag);
}
None => {
return Err(ExpectedRbrace);
}
};
if let Some(&peek) = self.chars.peek() {
if peek != '}' && peek != ':' {
return Err(ExpectedRbrace);
}
} else {
return Err(ExpectedRbrace);
}
}
// match a python 3.8 self documenting expression
// format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
'=' if self.chars.peek() != Some(&'=') && delims.is_empty() => {
self_documenting = true;
}
':' if delims.is_empty() => {
let mut nested = 0;
let mut spec_constructor = Vec::new();
let mut constant_piece = String::new();
let mut formatted_value_piece = String::new();
while let Some(&next) = self.chars.peek() {
match next {
'{' if nested > 0 => {
nested += 1;
formatted_value_piece.push(next);
}
'}' if nested > 0 => {
nested -= 1;
if nested == 0 {
formatted_value_piece.push(next);
spec_constructor.push(
self.expr(ExprKind::FormattedValue {
value: Box::new(
FStringParser::new(
&formatted_value_piece,
Location::default(),
&self.recurse_lvl + 1,
)
.parse()?,
),
conversion: ConversionFlag::None as _,
format_spec: None,
}),
);
formatted_value_piece.clear();
} else {
formatted_value_piece.push(next);
}
}
_ if nested > 0 => {
formatted_value_piece.push(next);
}
'{' => {
nested += 1;
spec_constructor.push(self.expr(ExprKind::Constant {
value: constant_piece.to_owned().into(),
kind: None,
}));
constant_piece.clear();
formatted_value_piece.push(next);
formatted_value_piece.push(' ');
}
'}' => break,
_ => {
constant_piece.push(next);
}
}
self.chars.next();
}
spec_constructor.push(self.expr(ExprKind::Constant {
value: constant_piece.to_owned().into(),
kind: None,
}));
constant_piece.clear();
if nested > 0 {
return Err(UnclosedLbrace);
}
spec = Some(Box::new(self.expr(ExprKind::JoinedStr {
values: spec_constructor,
})))
}
'(' | '{' | '[' => {
expression.push(ch);
delims.push(ch);
}
')' => {
if delims.pop() != Some('(') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
']' => {
if delims.pop() != Some('[') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
'}' if !delims.is_empty() => {
if delims.pop() != Some('{') {
return Err(MismatchedDelimiter);
}
expression.push(ch);
}
'}' => {
if expression.is_empty() {
return Err(EmptyExpression);
}
let ret = if !self_documenting {
vec![self.expr(ExprKind::FormattedValue {
value: Box::new(
parse_fstring_expr(&expression)
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
),
conversion: conversion as _,
format_spec: spec,
})]
} else {
vec![
self.expr(ExprKind::Constant {
value: Constant::Str(expression.clone() + "="),
kind: None,
}),
self.expr(ExprKind::Constant {
value: trailing_seq.into(),
kind: None,
}),
self.expr(ExprKind::FormattedValue {
value: Box::new(
parse_fstring_expr(&expression)
.map_err(|e| InvalidExpression(Box::new(e.error)))?,
),
conversion: (if conversion == ConversionFlag::None && spec.is_none()
{
ConversionFlag::Repr
} else {
conversion
}) as _,
format_spec: spec,
}),
]
};
return Ok(ret);
}
'"' | '\'' => {
expression.push(ch);
for next in &mut self.chars {
expression.push(next);
if next == ch {
break;
}
}
}
' ' if self_documenting => {
trailing_seq.push(ch);
}
_ => {
if self_documenting {
return Err(ExpectedRbrace);
}
expression.push(ch);
}
}
}
Err(UnclosedLbrace)
}
fn parse(mut self) -> Result<Expr, FStringErrorType> {
if self.recurse_lvl >= 2 {
return Err(ExpressionNestedTooDeeply);
}
let mut content = String::new();
let mut values = vec![];
while let Some(ch) = self.chars.next() {
match ch {
'{' => {
if let Some('{') = self.chars.peek() {
self.chars.next();
content.push('{');
} else {
if !content.is_empty() {
values.push(self.expr(ExprKind::Constant {
value: mem::take(&mut content).into(),
kind: None,
}));
}
values.extend(self.parse_formatted_value()?);
}
}
'}' => {
if let Some('}') = self.chars.peek() {
self.chars.next();
content.push('}');
} else {
return Err(UnopenedRbrace);
}
}
_ => {
content.push(ch);
}
}
}
if !content.is_empty() {
values.push(self.expr(ExprKind::Constant {
value: content.into(),
kind: None,
}))
}
Ok(self.expr(ExprKind::JoinedStr { values }))
}
}
fn parse_fstring_expr(source: &str) -> Result<Expr, ParseError> {
let fstring_body = format!("({})", source);
parse_expression(&fstring_body)
}
/// Parse an fstring from a string, located at a certain position in the sourcecode.
/// In case of errors, we will get the location and the error returned.
pub fn parse_located_fstring(source: &str, location: Location) -> Result<Expr, FStringError> {
FStringParser::new(source, location, 0)
.parse()
.map_err(|error| FStringError { error, location })
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_fstring(source: &str) -> Result<Expr, FStringErrorType> {
FStringParser::new(source, Location::default(), 0).parse()
}
#[test]
fn test_parse_fstring() {
let source = "{a}{ b }{{foo}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_spec() {
let source = "{foo:{spec}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_not_nested_spec() {
let source = "{foo:spec}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_empty_fstring() {
insta::assert_debug_snapshot!(parse_fstring("").unwrap());
}
#[test]
fn test_fstring_parse_selfdocumenting_base() {
let src = "{user=}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_selfdocumenting_base_more() {
let src = "mix {user=} with text and {second=}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_selfdocumenting_format() {
let src = "{user=:>10}";
let parse_ast = parse_fstring(src).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_invalid_fstring() {
assert_eq!(parse_fstring("{5!a"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("{5!a1}"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("{5!"), Err(ExpectedRbrace));
assert_eq!(parse_fstring("abc{!a 'cat'}"), Err(EmptyExpression));
assert_eq!(parse_fstring("{!a"), Err(EmptyExpression));
assert_eq!(parse_fstring("{ !a}"), Err(EmptyExpression));
assert_eq!(parse_fstring("{5!}"), Err(InvalidConversionFlag));
assert_eq!(parse_fstring("{5!x}"), Err(InvalidConversionFlag));
assert_eq!(parse_fstring("{a:{a:{b}}}"), Err(ExpressionNestedTooDeeply));
assert_eq!(parse_fstring("{a:b}}"), Err(UnopenedRbrace));
assert_eq!(parse_fstring("}"), Err(UnopenedRbrace));
assert_eq!(parse_fstring("{a:{b}"), Err(UnclosedLbrace));
assert_eq!(parse_fstring("{"), Err(UnclosedLbrace));
assert_eq!(parse_fstring("{}"), Err(EmptyExpression));
// TODO: check for InvalidExpression enum?
assert!(parse_fstring("{class}").is_err());
}
#[test]
fn test_parse_fstring_not_equals() {
let source = "{1 != 2}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_equals() {
let source = "{42 == 42}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_selfdoc_prec_space() {
let source = "{x =}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_selfdoc_trailing_space() {
let source = "{x= }";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_yield_expr() {
let source = "{yield}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

96
parser/src/function.rs Normal file
View file

@ -0,0 +1,96 @@
use ahash::RandomState;
use std::collections::HashSet;
use crate::ast;
use crate::error::{LexicalError, LexicalErrorType};
pub struct ArgumentList {
pub args: Vec<ast::Expr>,
pub keywords: Vec<ast::Keyword>,
}
type ParameterDefs = (Vec<ast::Arg>, Vec<ast::Arg>, Vec<ast::Expr>);
type ParameterDef = (ast::Arg, Option<ast::Expr>);
pub fn parse_params(
params: (Vec<ParameterDef>, Vec<ParameterDef>),
) -> Result<ParameterDefs, LexicalError> {
let mut posonly = Vec::with_capacity(params.0.len());
let mut names = Vec::with_capacity(params.1.len());
let mut defaults = vec![];
let mut try_default = |name: &ast::Arg, default| {
if let Some(default) = default {
defaults.push(default);
} else if !defaults.is_empty() {
// Once we have started with defaults, all remaining arguments must
// have defaults
return Err(LexicalError {
error: LexicalErrorType::DefaultArgumentError,
location: name.location,
});
}
Ok(())
};
for (name, default) in params.0 {
try_default(&name, default)?;
posonly.push(name);
}
for (name, default) in params.1 {
try_default(&name, default)?;
names.push(name);
}
Ok((posonly, names, defaults))
}
type FunctionArgument = (Option<(ast::Location, Option<String>)>, ast::Expr);
pub fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentList, LexicalError> {
let mut args = vec![];
let mut keywords = vec![];
let mut keyword_names = HashSet::with_capacity_and_hasher(func_args.len(), RandomState::new());
for (name, value) in func_args {
match name {
Some((location, name)) => {
if let Some(keyword_name) = &name {
if keyword_names.contains(keyword_name) {
return Err(LexicalError {
error: LexicalErrorType::DuplicateKeywordArgumentError,
location,
});
}
keyword_names.insert(keyword_name.clone());
}
keywords.push(ast::Keyword::new(
location,
ast::KeywordData {
arg: name,
value: Box::new(value),
},
));
}
None => {
// Allow starred args after keyword arguments.
if !keywords.is_empty() && !is_starred(&value) {
return Err(LexicalError {
error: LexicalErrorType::PositionalArgumentError,
location: value.location,
});
}
args.push(value);
}
}
}
Ok(ArgumentList { args, keywords })
}
fn is_starred(exp: &ast::Expr) -> bool {
matches!(exp.node, ast::ExprKind::Starred { .. })
}

1712
parser/src/lexer.rs Normal file

File diff suppressed because it is too large Load diff

34
parser/src/lib.rs Normal file
View file

@ -0,0 +1,34 @@
//! This crate can be used to parse python sourcecode into a so
//! called AST (abstract syntax tree).
//!
//! The stages involved in this process are lexical analysis and
//! parsing. The lexical analysis splits the sourcecode into
//! tokens, and the parsing transforms those tokens into an AST.
//!
//! For example, one could do this:
//!
//! ```
//! use rustpython_parser::{parser, ast};
//!
//! let python_source = "print('Hello world')";
//! let python_ast = parser::parse_expression(python_source).unwrap();
//!
//! ```
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
#[macro_use]
extern crate log;
pub use rustpython_ast as ast;
pub mod error;
mod fstring;
mod function;
pub mod lexer;
pub mod mode;
pub mod parser;
#[rustfmt::skip]
mod python;
mod string;
pub mod token;

40
parser/src/mode.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::token::Tok;
#[derive(Clone, Copy)]
pub enum Mode {
Module,
Interactive,
Expression,
}
impl Mode {
pub(crate) fn to_marker(self) -> Tok {
match self {
Self::Module => Tok::StartModule,
Self::Interactive => Tok::StartInteractive,
Self::Expression => Tok::StartExpression,
}
}
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Expression),
_ => Err(ModeParseError { _priv: () }),
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}

202
parser/src/parser.rs Normal file
View file

@ -0,0 +1,202 @@
//! Python parsing.
//!
//! Use this module to parse python code into an AST.
//! There are three ways to parse python code. You could
//! parse a whole program, a single statement, or a single
//! expression.
use std::iter;
use crate::ast;
use crate::error::ParseError;
use crate::lexer;
pub use crate::mode::Mode;
use crate::python;
/*
* Parse python code.
* Grammar may be inspired by antlr grammar for python:
* https://github.com/antlr/grammars-v4/tree/master/python3
*/
/// Parse a full python program, containing usually multiple lines.
pub fn parse_program(source: &str) -> Result<ast::Suite, ParseError> {
parse(source, Mode::Module).map(|top| match top {
ast::Mod::Module { body, .. } => body,
_ => unreachable!(),
})
}
/// Parses a python expression
///
/// # Example
/// ```
/// extern crate num_bigint;
/// use rustpython_parser::{parser, ast};
/// let expr = parser::parse_expression("1 + 2").unwrap();
///
/// assert_eq!(
/// expr,
/// ast::Expr {
/// location: ast::Location::new(1, 3),
/// custom: (),
/// node: ast::ExprKind::BinOp {
/// left: Box::new(ast::Expr {
/// location: ast::Location::new(1, 1),
/// custom: (),
/// node: ast::ExprKind::Constant {
/// value: ast::Constant::Int(1.into()),
/// kind: None,
/// }
/// }),
/// op: ast::Operator::Add,
/// right: Box::new(ast::Expr {
/// location: ast::Location::new(1, 5),
/// custom: (),
/// node: ast::ExprKind::Constant {
/// value: ast::Constant::Int(2.into()),
/// kind: None,
/// }
/// })
/// }
/// },
/// );
///
/// ```
pub fn parse_expression(source: &str) -> Result<ast::Expr, ParseError> {
parse(source, Mode::Expression).map(|top| match top {
ast::Mod::Expression { body } => *body,
_ => unreachable!(),
})
}
// Parse a given source code
pub fn parse(source: &str, mode: Mode) -> Result<ast::Mod, ParseError> {
let lxr = lexer::make_tokenizer(source);
let marker_token = (Default::default(), mode.to_marker(), Default::default());
let tokenizer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(tokenizer)
.map_err(ParseError::from)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_empty() {
let parse_ast = parse_program("").unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string() {
let source = String::from("'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string() {
let source = String::from("f'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_hello() {
let source = String::from("print('Hello world')");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_2() {
let source = String::from("print('Hello world', 2)");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_kwargs() {
let source = String::from("my_func('positional', keyword=2)");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_elif_else() {
let source = String::from("if 1: 10\nelif 2: 20\nelse: 30");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_lambda() {
let source = "lambda x, y: x * y"; // lambda(x, y): x * y";
let parse_ast = parse_program(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_tuples() {
let source = "a, b = 4, 5";
insta::assert_debug_snapshot!(parse_program(source).unwrap());
}
#[test]
fn test_parse_class() {
let source = "\
class Foo(A, B):
def __init__(self):
pass
def method_with_default(self, arg='default'):
pass";
insta::assert_debug_snapshot!(parse_program(source).unwrap());
}
#[test]
fn test_parse_dict_comprehension() {
let source = String::from("{x1: x2 for y in z}");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_list_comprehension() {
let source = String::from("[x for y in z]");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_double_list_comprehension() {
let source = String::from("[x for y, y2 in z for a in b if a < 5 if a > 10]");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_generator_comprehension() {
let source = String::from("(x for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_named_expression_generator_comprehension() {
let source = String::from("(x := y + 1 for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_else_generator_comprehension() {
let source = String::from("(x if y else y for y in z)");
let parse_ast = parse_expression(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

3
parser/src/python.rs Normal file
View file

@ -0,0 +1,3 @@
#![allow(clippy::all)]
#![allow(unused)]
include!("../python.rs");

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,137 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"mix ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
" with text and ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"second=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "second",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,88 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"user=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "user",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
">10",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,15 @@
---
source: parser/src/fstring.rs
expression: "parse_fstring(\"\").unwrap()"
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [],
},
}

View file

@ -0,0 +1,72 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"{foo}",
),
kind: None,
},
},
],
},
}

View file

@ -0,0 +1,66 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 5,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Int(
42,
),
kind: None,
},
},
ops: [
Eq,
],
comparators: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Int(
42,
),
kind: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,119 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "foo",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Name {
id: "spec",
ctx: Load,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,66 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
ops: [
NotEq,
],
comparators: [
Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
],
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,62 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "foo",
ctx: Load,
},
},
conversion: 0,
format_spec: Some(
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"spec",
),
kind: None,
},
},
],
},
},
),
},
},
],
},
}

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"x =",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,63 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
"x=",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: Constant {
value: Str(
" ",
),
kind: None,
},
},
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
conversion: 114,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,36 @@
---
source: parser/src/fstring.rs
expression: parse_ast
---
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 0,
column: 0,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Yield {
value: None,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
}

View file

@ -0,0 +1,160 @@
---
source: parser/src/parser.rs
expression: parse_program(&source).unwrap()
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ClassDef {
name: "Foo",
bases: [
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Name {
id: "A",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "B",
ctx: Load,
},
},
],
keywords: [],
body: [
Located {
location: Location {
row: 2,
column: 2,
},
custom: (),
node: FunctionDef {
name: "__init__",
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 2,
column: 15,
},
custom: (),
node: ArgData {
arg: "self",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [],
},
body: [
Located {
location: Location {
row: 3,
column: 3,
},
custom: (),
node: Pass,
},
],
decorator_list: [],
returns: None,
type_comment: None,
},
},
Located {
location: Location {
row: 4,
column: 2,
},
custom: (),
node: FunctionDef {
name: "method_with_default",
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 4,
column: 26,
},
custom: (),
node: ArgData {
arg: "self",
annotation: None,
type_comment: None,
},
},
Located {
location: Location {
row: 4,
column: 32,
},
custom: (),
node: ArgData {
arg: "arg",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [
Located {
location: Location {
row: 4,
column: 37,
},
custom: (),
node: Constant {
value: Str(
"default",
),
kind: None,
},
},
],
},
body: [
Located {
location: Location {
row: 5,
column: 3,
},
custom: (),
node: Pass,
},
],
decorator_list: [],
returns: None,
type_comment: None,
},
},
],
decorator_list: [],
},
},
]

View file

@ -0,0 +1,63 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: DictComp {
key: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x1",
ctx: Load,
},
},
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Name {
id: "x2",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 18,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,178 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ListComp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Name {
id: "y2",
ctx: Load,
},
},
],
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 17,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
Comprehension {
target: Located {
location: Location {
row: 1,
column: 23,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 28,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
ifs: [
Located {
location: Location {
row: 1,
column: 35,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 33,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
ops: [
Lt,
],
comparators: [
Located {
location: Location {
row: 1,
column: 37,
},
custom: (),
node: Constant {
value: Int(
5,
),
kind: None,
},
},
],
},
},
Located {
location: Location {
row: 1,
column: 44,
},
custom: (),
node: Compare {
left: Located {
location: Location {
row: 1,
column: 42,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
ops: [
Gt,
],
comparators: [
Located {
location: Location {
row: 1,
column: 46,
},
custom: (),
node: Constant {
value: Int(
10,
),
kind: None,
},
},
],
},
},
],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,5 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[]

View file

@ -0,0 +1,39 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,52 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,125 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: If {
test: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
body: [
Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Constant {
value: Int(
10,
),
kind: None,
},
},
},
},
],
orelse: [
Located {
location: Location {
row: 2,
column: 1,
},
custom: (),
node: If {
test: Located {
location: Location {
row: 2,
column: 6,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
body: [
Located {
location: Location {
row: 2,
column: 9,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 2,
column: 9,
},
custom: (),
node: Constant {
value: Int(
20,
),
kind: None,
},
},
},
},
],
orelse: [
Located {
location: Location {
row: 3,
column: 7,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 3,
column: 7,
},
custom: (),
node: Constant {
value: Int(
30,
),
kind: None,
},
},
},
},
],
},
},
],
},
},
]

View file

@ -0,0 +1,83 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: IfExp {
test: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
body: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
orelse: Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 20,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 25,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,78 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "my_func",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 10,
},
custom: (),
node: Constant {
value: Str(
"positional",
),
kind: None,
},
},
],
keywords: [
Located {
location: Location {
row: 1,
column: 23,
},
custom: (),
node: KeywordData {
arg: Some(
"keyword",
),
value: Located {
location: Location {
row: 1,
column: 31,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,90 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Lambda {
args: Arguments {
posonlyargs: [],
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: ArgData {
arg: "x",
annotation: None,
type_comment: None,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: ArgData {
arg: "y",
annotation: None,
type_comment: None,
},
},
],
vararg: None,
kwonlyargs: [],
kw_defaults: [],
kwarg: None,
defaults: [],
},
body: Located {
location: Location {
row: 1,
column: 16,
},
custom: (),
node: BinOp {
left: Located {
location: Location {
row: 1,
column: 14,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
op: Mult,
right: Located {
location: Location {
row: 1,
column: 18,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
},
},
},
},
},
},
]

View file

@ -0,0 +1,52 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: ListComp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Load,
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 13,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,95 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: GeneratorExp {
elt: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: NamedExpr {
target: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Name {
id: "x",
ctx: Store,
},
},
value: Located {
location: Location {
row: 1,
column: 9,
},
custom: (),
node: BinOp {
left: Located {
location: Location {
row: 1,
column: 7,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
op: Add,
right: Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Constant {
value: Int(
1,
),
kind: None,
},
},
},
},
},
},
generators: [
Comprehension {
target: Located {
location: Location {
row: 1,
column: 17,
},
custom: (),
node: Name {
id: "y",
ctx: Load,
},
},
iter: Located {
location: Location {
row: 1,
column: 22,
},
custom: (),
node: Name {
id: "z",
ctx: Load,
},
},
ifs: [],
is_async: 0,
},
],
},
}

View file

@ -0,0 +1,65 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "print",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 22,
},
custom: (),
node: Constant {
value: Int(
2,
),
kind: None,
},
},
],
keywords: [],
},
},
},
},
]

View file

@ -0,0 +1,51 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 6,
},
custom: (),
node: Call {
func: Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "print",
ctx: Load,
},
},
args: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
keywords: [],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,91 @@
---
source: parser/src/parser.rs
expression: parse_program(&source).unwrap()
---
[
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Assign {
targets: [
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 1,
},
custom: (),
node: Name {
id: "a",
ctx: Load,
},
},
Located {
location: Location {
row: 1,
column: 4,
},
custom: (),
node: Name {
id: "b",
ctx: Load,
},
},
],
ctx: Load,
},
},
],
value: Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Tuple {
elts: [
Located {
location: Location {
row: 1,
column: 8,
},
custom: (),
node: Constant {
value: Int(
4,
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 11,
},
custom: (),
node: Constant {
value: Int(
5,
),
kind: None,
},
},
],
ctx: Load,
},
},
type_comment: None,
},
},
]

View file

@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,63 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 12,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"!",
),
kind: None,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world!",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View file

@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View file

@ -0,0 +1,30 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
},
},
]

143
parser/src/string.rs Normal file
View file

@ -0,0 +1,143 @@
use crate::{
ast::{Constant, Expr, ExprKind, Location},
error::{LexicalError, LexicalErrorType},
fstring::parse_located_fstring,
token::StringKind,
};
use itertools::Itertools;
pub fn parse_strings(values: Vec<(Location, (String, StringKind))>) -> Result<Expr, LexicalError> {
// Preserve the initial location and kind.
let initial_location = values[0].0;
let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned());
// Determine whether the list of values contains any f-strings. (If not, we can return a
// single Constant at the end, rather than a JoinedStr.)
let mut has_fstring = false;
// De-duplicate adjacent constants.
let mut deduped: Vec<Expr> = vec![];
let mut current: Vec<String> = vec![];
let take_current = |current: &mut Vec<String>| -> Expr {
Expr::new(
initial_location,
ExprKind::Constant {
value: Constant::Str(current.drain(..).join("")),
kind: initial_kind.clone(),
},
)
};
for (location, (string, string_kind)) in values {
match string_kind {
StringKind::Normal | StringKind::U => current.push(string),
StringKind::F => {
has_fstring = true;
let values = if let ExprKind::JoinedStr { values } =
parse_located_fstring(&string, location)
.map_err(|e| LexicalError {
location,
error: LexicalErrorType::FStringError(e.error),
})?
.node
{
values
} else {
unreachable!("parse_located_fstring returned a non-JoinedStr.")
};
for value in values {
match value.node {
ExprKind::FormattedValue { .. } => {
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
deduped.push(value)
}
ExprKind::Constant { value, .. } => {
if let Constant::Str(value) = value {
current.push(value);
} else {
unreachable!("Unexpected non-string constant.");
}
}
_ => unreachable!("Unexpected non-string expression."),
}
}
}
}
}
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
Ok(if has_fstring {
Expr::new(initial_location, ExprKind::JoinedStr { values: deduped })
} else {
deduped
.into_iter()
.exactly_one()
.expect("String must be concatenated to a single element.")
})
}
#[cfg(test)]
mod tests {
use crate::parser::parse_program;
#[test]
fn test_parse_string_concat() {
let source = String::from("'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_1() {
let source = String::from("'Hello ' u'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_2() {
let source = String::from("u'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_1() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_2() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_3() {
let source = String::from("'Hello ' f'world{\"!\"}'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_1() {
let source = String::from("u'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_2() {
let source = String::from("u'Hello ' f'world' '!'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

236
parser/src/token.rs Normal file
View file

@ -0,0 +1,236 @@
//! Different token definitions.
//! Loosely based on token.h from CPython source:
use num_bigint::BigInt;
use std::fmt::{self, Write};
/// Python source code can be tokenized in a sequence of these tokens.
#[derive(Clone, Debug, PartialEq)]
pub enum Tok {
Name { name: String },
Int { value: BigInt },
Float { value: f64 },
Complex { real: f64, imag: f64 },
String { value: String, kind: StringKind },
Bytes { value: Vec<u8> },
Newline,
Indent,
Dedent,
StartModule,
StartInteractive,
StartExpression,
EndOfFile,
Lpar,
Rpar,
Lsqb,
Rsqb,
Colon,
Comma,
Semi,
Plus,
Minus,
Star,
Slash,
Vbar, // '|'
Amper, // '&'
Less,
Greater,
Equal,
Dot,
Percent,
Lbrace,
Rbrace,
EqEqual,
NotEqual,
LessEqual,
GreaterEqual,
Tilde,
CircumFlex,
LeftShift,
RightShift,
DoubleStar,
DoubleStarEqual, // '**='
PlusEqual,
MinusEqual,
StarEqual,
SlashEqual,
PercentEqual,
AmperEqual, // '&='
VbarEqual,
CircumflexEqual, // '^='
LeftShiftEqual,
RightShiftEqual,
DoubleSlash, // '//'
DoubleSlashEqual,
ColonEqual,
At,
AtEqual,
Rarrow,
Ellipsis,
// Keywords (alphabetically):
False,
None,
True,
And,
As,
Assert,
Async,
Await,
Break,
Class,
Continue,
Def,
Del,
Elif,
Else,
Except,
Finally,
For,
From,
Global,
If,
Import,
In,
Is,
Lambda,
Nonlocal,
Not,
Or,
Pass,
Raise,
Return,
Try,
While,
With,
Yield,
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum StringKind {
Normal,
F,
U,
}
impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*;
match self {
Name { name } => write!(f, "'{}'", name),
Int { value } => write!(f, "'{}'", value),
Float { value } => write!(f, "'{}'", value),
Complex { real, imag } => write!(f, "{}j{}", real, imag),
String { value, kind } => {
match kind {
StringKind::F => f.write_str("f")?,
StringKind::U => f.write_str("u")?,
StringKind::Normal => {}
}
write!(f, "{:?}", value)
}
Bytes { value } => {
write!(f, "b\"")?;
for i in value {
match i {
9 => f.write_str("\\t")?,
10 => f.write_str("\\n")?,
13 => f.write_str("\\r")?,
32..=126 => f.write_char(*i as char)?,
_ => write!(f, "\\x{:02x}", i)?,
}
}
f.write_str("\"")
}
Newline => f.write_str("Newline"),
Indent => f.write_str("Indent"),
Dedent => f.write_str("Dedent"),
StartModule => f.write_str("StartProgram"),
StartInteractive => f.write_str("StartInteractive"),
StartExpression => f.write_str("StartExpression"),
EndOfFile => f.write_str("EOF"),
Lpar => f.write_str("'('"),
Rpar => f.write_str("')'"),
Lsqb => f.write_str("'['"),
Rsqb => f.write_str("']'"),
Colon => f.write_str("':'"),
Comma => f.write_str("','"),
Semi => f.write_str("';'"),
Plus => f.write_str("'+'"),
Minus => f.write_str("'-'"),
Star => f.write_str("'*'"),
Slash => f.write_str("'/'"),
Vbar => f.write_str("'|'"),
Amper => f.write_str("'&'"),
Less => f.write_str("'<'"),
Greater => f.write_str("'>'"),
Equal => f.write_str("'='"),
Dot => f.write_str("'.'"),
Percent => f.write_str("'%'"),
Lbrace => f.write_str("'{'"),
Rbrace => f.write_str("'}'"),
EqEqual => f.write_str("'=='"),
NotEqual => f.write_str("'!='"),
LessEqual => f.write_str("'<='"),
GreaterEqual => f.write_str("'>='"),
Tilde => f.write_str("'~'"),
CircumFlex => f.write_str("'^'"),
LeftShift => f.write_str("'<<'"),
RightShift => f.write_str("'>>'"),
DoubleStar => f.write_str("'**'"),
DoubleStarEqual => f.write_str("'**='"),
PlusEqual => f.write_str("'+='"),
MinusEqual => f.write_str("'-='"),
StarEqual => f.write_str("'*='"),
SlashEqual => f.write_str("'/='"),
PercentEqual => f.write_str("'%='"),
AmperEqual => f.write_str("'&='"),
VbarEqual => f.write_str("'|='"),
CircumflexEqual => f.write_str("'^='"),
LeftShiftEqual => f.write_str("'<<='"),
RightShiftEqual => f.write_str("'>>='"),
DoubleSlash => f.write_str("'//'"),
DoubleSlashEqual => f.write_str("'//='"),
At => f.write_str("'@'"),
AtEqual => f.write_str("'@='"),
Rarrow => f.write_str("'->'"),
Ellipsis => f.write_str("'...'"),
False => f.write_str("'False'"),
None => f.write_str("'None'"),
True => f.write_str("'True'"),
And => f.write_str("'and'"),
As => f.write_str("'as'"),
Assert => f.write_str("'assert'"),
Async => f.write_str("'async'"),
Await => f.write_str("'await'"),
Break => f.write_str("'break'"),
Class => f.write_str("'class'"),
Continue => f.write_str("'continue'"),
Def => f.write_str("'def'"),
Del => f.write_str("'del'"),
Elif => f.write_str("'elif'"),
Else => f.write_str("'else'"),
Except => f.write_str("'except'"),
Finally => f.write_str("'finally'"),
For => f.write_str("'for'"),
From => f.write_str("'from'"),
Global => f.write_str("'global'"),
If => f.write_str("'if'"),
Import => f.write_str("'import'"),
In => f.write_str("'in'"),
Is => f.write_str("'is'"),
Lambda => f.write_str("'lambda'"),
Nonlocal => f.write_str("'nonlocal'"),
Not => f.write_str("'not'"),
Or => f.write_str("'or'"),
Pass => f.write_str("'pass'"),
Raise => f.write_str("'raise'"),
Return => f.write_str("'return'"),
Try => f.write_str("'try'"),
While => f.write_str("'while'"),
With => f.write_str("'with'"),
Yield => f.write_str("'yield'"),
ColonEqual => f.write_str("':='"),
}
}
}

View file

@ -1,12 +0,0 @@
[package]
name = "rustpython-compiler"
version = "0.1.2"
description = "A usability wrapper around rustpython-parser and rustpython-compiler-core"
authors = ["RustPython Team"]
edition = "2021"
[dependencies]
thiserror = "1.0"
rustpython-compiler-core = { path = ".." }
rustpython-parser = { path = "../../parser" }
rustpython-bytecode = { path = "../../bytecode" }

View file

@ -1,124 +0,0 @@
use rustpython_bytecode::CodeObject;
use rustpython_compiler_core::{compile, symboltable};
use rustpython_parser::{
ast::{fold::Fold, ConstantOptimizer, Location},
parser,
};
use std::fmt;
pub use compile::{CompileOpts, Mode};
pub use symboltable::{Symbol, SymbolScope, SymbolTable, SymbolTableType};
#[derive(Debug, thiserror::Error)]
pub enum CompileErrorType {
#[error(transparent)]
Compile(#[from] rustpython_compiler_core::error::CompileErrorType),
#[error(transparent)]
Parse(#[from] rustpython_parser::error::ParseErrorType),
}
#[derive(Debug, thiserror::Error)]
pub struct CompileError {
pub error: CompileErrorType,
pub statement: Option<String>,
pub source_path: String,
pub location: Location,
}
impl fmt::Display for CompileError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let loc = self.location;
if let Some(ref stmt) = self.statement {
// visualize the error when location and statement are provided
write!(
f,
"{}",
loc.visualize(stmt, &format_args!("{} at {}", self.error, loc))
)
} else {
write!(f, "{} at {}", self.error, loc)
}
}
}
impl CompileError {
fn from_compile(error: rustpython_compiler_core::error::CompileError, source: &str) -> Self {
CompileError {
error: error.error.into(),
location: error.location,
source_path: error.source_path,
statement: get_statement(source, error.location),
}
}
fn from_parse(
error: rustpython_parser::error::ParseError,
source: &str,
source_path: String,
) -> Self {
CompileError {
error: error.error.into(),
location: error.location,
source_path,
statement: get_statement(source, error.location),
}
}
fn from_symtable(
error: symboltable::SymbolTableError,
source: &str,
source_path: String,
) -> Self {
Self::from_compile(error.into_compile_error(source_path), source)
}
}
/// Compile a given sourcecode into a bytecode object.
pub fn compile(
source: &str,
mode: compile::Mode,
source_path: String,
opts: CompileOpts,
) -> Result<CodeObject, CompileError> {
let parser_mode = match mode {
compile::Mode::Exec => parser::Mode::Module,
compile::Mode::Eval => parser::Mode::Expression,
compile::Mode::Single | compile::Mode::BlockExpr => parser::Mode::Interactive,
};
let mut ast = match parser::parse(source, parser_mode) {
Ok(x) => x,
Err(e) => return Err(CompileError::from_parse(e, source, source_path)),
};
if opts.optimize > 0 {
ast = ConstantOptimizer::new()
.fold_mod(ast)
.unwrap_or_else(|e| match e {});
}
compile::compile_top(&ast, source_path, mode, opts)
.map_err(|e| CompileError::from_compile(e, source))
}
pub fn compile_symtable(
source: &str,
mode: compile::Mode,
source_path: &str,
) -> Result<symboltable::SymbolTable, CompileError> {
let parse_err = |e| CompileError::from_parse(e, source, source_path.to_owned());
let res = match mode {
compile::Mode::Exec | compile::Mode::Single | compile::Mode::BlockExpr => {
let ast = parser::parse_program(source).map_err(parse_err)?;
symboltable::make_symbol_table(&ast)
}
compile::Mode::Eval => {
let expr = parser::parse_expression(source).map_err(parse_err)?;
symboltable::make_symbol_table_expr(&expr)
}
};
res.map_err(|e| CompileError::from_symtable(e, source, source_path.to_owned()))
}
fn get_statement(source: &str, loc: Location) -> Option<String> {
if loc.column() == 0 || loc.row() == 0 {
return None;
}
let line = source.split('\n').nth(loc.row() - 1)?.to_owned();
Some(line + "\n")
}