feat: support pyi files

This commit is contained in:
Shunsuke Shibayama 2024-10-06 18:46:19 +09:00
parent fddc571eea
commit c4b7aa7faa
8 changed files with 293 additions and 52 deletions

20
Cargo.lock generated
View file

@ -145,9 +145,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "els"
version = "0.1.58-nightly.2"
version = "0.1.58-nightly.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b8f4bd082ef9c4b0acd2557d89fdcf886a04355357255a1f4e8a04009ebc9de"
checksum = "82ca64c7e007a801f3c026026d4f7c65193ca2ccfab19018cf47b0946ed1de86"
dependencies = [
"erg_common",
"erg_compiler",
@ -159,9 +159,9 @@ dependencies = [
[[package]]
name = "erg_common"
version = "0.6.46-nightly.2"
version = "0.6.46-nightly.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf40ea506598a316dfb4abe6ae9af54d6d3d2ebe8ab0a59c9e17506a96d4eb15"
checksum = "c91d7308be743f27d0bcb6778d85d76bfad86fc54ae53ae5fab06b37bd54fd74"
dependencies = [
"backtrace-on-stack-overflow",
"erg_proc_macros",
@ -172,9 +172,9 @@ dependencies = [
[[package]]
name = "erg_compiler"
version = "0.6.46-nightly.2"
version = "0.6.46-nightly.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e58c92221e2dea780f3103d4ce14835d694aff8337ab0f8c184a25818a0f463f"
checksum = "a2ca9d5eb0b29b60d7ac8d7d639add33a4b331b35e4739775f0bd0f1e94be764"
dependencies = [
"erg_common",
"erg_parser",
@ -182,9 +182,9 @@ dependencies = [
[[package]]
name = "erg_parser"
version = "0.6.46-nightly.2"
version = "0.6.46-nightly.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8df0a04d8e3ffd5c77d1d194ca37e1bd808a6a3b032b73bf861754544c30d57"
checksum = "f0d0f70495239bd721afb1be7ba33c9146cbd7d4d578bd65fcb86e52561224e0"
dependencies = [
"erg_common",
"erg_proc_macros",
@ -193,9 +193,9 @@ dependencies = [
[[package]]
name = "erg_proc_macros"
version = "0.6.46-nightly.2"
version = "0.6.46-nightly.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfaf0544746cc53a805a17dc61c4966802462d8151659109eb2050b023df004d"
checksum = "61073a06b84b2e9c36b3645494102780936b560ba80f8c466cf2cdc374740f3e"
dependencies = [
"quote",
"syn 1.0.109",

View file

@ -24,9 +24,9 @@ edition = "2021"
repository = "https://github.com/mtshiba/pylyzer"
[workspace.dependencies]
erg_common = { version = "0.6.46-nightly.2", features = ["py_compat", "els"] }
erg_compiler = { version = "0.6.46-nightly.2", features = ["py_compat", "els"] }
els = { version = "0.1.58-nightly.2", features = ["py_compat"] }
erg_common = { version = "0.6.46-nightly.4", features = ["py_compat", "els"] }
erg_compiler = { version = "0.6.46-nightly.4", features = ["py_compat", "els"] }
els = { version = "0.1.58-nightly.4", features = ["py_compat"] }
# rustpython-parser = { version = "0.3.0", features = ["all-nodes-with-ranges", "location"] }
# rustpython-ast = { version = "0.3.0", features = ["all-nodes-with-ranges", "location"] }
rustpython-parser = { git = "https://github.com/RustPython/Parser", version = "0.4.0", features = ["all-nodes-with-ranges", "location"] }

View file

@ -96,6 +96,7 @@ pylyzer converts Python ASTs to Erg ASTs and passes them to Erg's type checker.
* [x] operator
* [x] function/method
* [x] class
* [ ] `async/await`
* [x] type inference
* [x] variable
* [x] operator
@ -145,7 +146,7 @@ pylyzer converts Python ASTs to Erg ASTs and passes them to Erg's type checker.
* [ ] others
* [x] type assertion (`typing.cast`)
* [x] type narrowing (`is`, `isinstance`)
* [ ] `pyi` (stub) files support
* [x] `pyi` (stub) files support
* [ ] glob pattern file check
* [x] type comment (`# type: ...`)

View file

@ -294,7 +294,8 @@ impl TypeVarInfo {
#[derive(Debug)]
pub struct LocalContext {
name: String,
pub name: String,
pub kind: BlockKind,
/// Erg does not allow variables to be defined multiple times, so rename them using this
names: HashMap<String, NameInfo>,
type_vars: HashMap<String, TypeVarInfo>,
@ -303,9 +304,10 @@ pub struct LocalContext {
}
impl LocalContext {
pub fn new(name: String) -> Self {
pub fn new(name: String, kind: BlockKind) -> Self {
Self {
name,
kind,
names: HashMap::new(),
type_vars: HashMap::new(),
appeared_type_names: HashSet::new(),
@ -368,6 +370,124 @@ impl CommentStorage {
}
}
#[derive(Debug, Clone)]
pub struct PyFuncTypeSpec {
type_params: Vec<py_ast::TypeParam>,
args: py_ast::Arguments,
returns: Option<py_ast::Expr>,
}
#[derive(Debug, Clone)]
pub enum PyTypeSpec {
Var(py_ast::Expr),
Func(PyFuncTypeSpec),
}
#[derive(Debug, Default)]
pub struct PyiTypeStorage {
decls: HashMap<String, PyTypeSpec>,
classes: HashMap<String, HashMap<String, PyTypeSpec>>,
}
impl fmt::Display for PyiTypeStorage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (name, t_spec) in &self.decls {
writeln!(f, "{name}: {t_spec:?}")?;
}
for (class, methods) in &self.classes {
writeln!(f, "class {class}:")?;
for (name, t_spec) in methods {
writeln!(f, " {name}: {t_spec:?}")?;
}
}
Ok(())
}
}
impl PyiTypeStorage {
pub fn new() -> Self {
Self {
decls: HashMap::new(),
classes: HashMap::new(),
}
}
pub fn parse(&mut self, filename: &str) {
let Ok(code) = std::fs::read_to_string(filename) else {
return;
};
let Ok(py_program) = rustpython_ast::ModModule::parse(&code, filename) else {
return;
};
let mut locator = RandomLocator::new(&code);
let Ok(py_program) = locator.fold(py_program) else {
return;
};
for stmt in py_program.body {
match stmt {
py_ast::Stmt::AnnAssign(assign) => {
let py_ast::Expr::Name(name) = *assign.target else {
continue;
};
self.decls
.insert(name.id.to_string(), PyTypeSpec::Var(*assign.annotation));
}
py_ast::Stmt::FunctionDef(def) => {
let returns = def.returns.map(|anot| *anot);
self.decls.insert(
def.name.to_string(),
PyTypeSpec::Func(PyFuncTypeSpec {
type_params: def.type_params,
args: *def.args,
returns,
}),
);
}
py_ast::Stmt::ClassDef(class) => {
let mut methods = HashMap::new();
for stmt in class.body {
match stmt {
py_ast::Stmt::AnnAssign(assign) => {
let py_ast::Expr::Name(name) = *assign.target else {
continue;
};
methods.insert(
name.id.to_string(),
PyTypeSpec::Var(*assign.annotation),
);
}
py_ast::Stmt::FunctionDef(def) => {
let returns = def.returns.map(|anot| *anot);
methods.insert(
def.name.to_string(),
PyTypeSpec::Func(PyFuncTypeSpec {
type_params: def.type_params,
args: *def.args,
returns,
}),
);
}
_ => {}
}
}
self.classes.insert(class.name.to_string(), methods);
}
_ => {}
}
}
}
pub fn get_type(&self, name: &str) -> Option<&PyTypeSpec> {
self.decls.get(name)
}
pub fn get_class_member_type(&self, class: &str, name: &str) -> Option<&PyTypeSpec> {
self.classes
.get(class)
.and_then(|methods| methods.get(name))
}
}
/// AST must be converted in the following order:
///
/// Params -> Block -> Signature
@ -397,6 +517,7 @@ pub struct ASTConverter {
cfg: ErgConfig,
shadowing: ShadowingMode,
comments: CommentStorage,
pyi_types: PyiTypeStorage,
block_id_counter: usize,
block_ids: Vec<usize>,
contexts: Vec<LocalContext>,
@ -406,13 +527,16 @@ pub struct ASTConverter {
impl ASTConverter {
pub fn new(cfg: ErgConfig, shadowing: ShadowingMode, comments: CommentStorage) -> Self {
let mut pyi_types = PyiTypeStorage::new();
pyi_types.parse(&cfg.input.path().with_extension("pyi").to_string_lossy());
Self {
shadowing,
pyi_types,
cfg,
comments,
block_id_counter: 0,
block_ids: vec![0],
contexts: vec![LocalContext::new("<module>".into())],
contexts: vec![LocalContext::new("<module>".into(), BlockKind::Module)],
warns: CompileErrors::empty(),
errs: CompileErrors::empty(),
}
@ -461,8 +585,8 @@ impl ASTConverter {
.insert(name, info);
}
fn grow(&mut self, namespace: String) {
self.contexts.push(LocalContext::new(namespace));
fn grow(&mut self, namespace: String, kind: BlockKind) {
self.contexts.push(LocalContext::new(namespace, kind));
}
fn pop(&mut self) {
@ -487,6 +611,10 @@ impl ASTConverter {
&self.contexts.last().unwrap().name
}
fn parent_name(&self) -> &str {
&self.contexts[self.contexts.len().saturating_sub(2)].name
}
fn cur_appeared_type_names(&self) -> &HashSet<String> {
&self.contexts.last().unwrap().appeared_type_names
}
@ -603,10 +731,32 @@ impl ASTConverter {
ParamPattern::VarName(ident.name)
}
fn get_cur_scope_t_spec(&self) -> Option<&PyTypeSpec> {
if self.contexts.len() == 2 {
let func_name = self.cur_name();
self.pyi_types.get_type(func_name)
} else {
let class = self.parent_name();
let func_name = self.cur_name();
self.pyi_types.get_class_member_type(class, func_name)
}
}
fn convert_nd_param(&mut self, param: Arg) -> NonDefaultParamSignature {
let pat = self.convert_param_pattern(param.arg.to_string(), param.location());
let t_spec = param
.annotation
.or_else(|| {
let PyTypeSpec::Func(func) = self.get_cur_scope_t_spec()? else {
return None;
};
func.args
.args
.iter()
.chain(&func.args.kwonlyargs)
.find(|arg| arg.def.arg == param.arg)
.and_then(|arg| arg.def.annotation.clone())
})
.map(|anot| {
(
self.convert_type_spec(*anot.clone()),
@ -1537,7 +1687,7 @@ impl ASTConverter {
Expr::BinOp(BinOp::new(op, lhs, rhs))
}
py_ast::Expr::Lambda(lambda) => {
self.grow("<lambda>".to_string());
self.grow("<lambda>".to_string(), BlockKind::Function);
let params = self.convert_params(*lambda.args);
let body = vec![self.convert_expr(*lambda.body)];
self.pop();
@ -2100,19 +2250,39 @@ impl ASTConverter {
column: loc.column.saturating_add(4),
};
let ident = self.convert_ident(name, func_name_loc);
self.grow(ident.inspect().to_string());
self.grow(ident.inspect().to_string(), BlockKind::Function);
let params = self.convert_params(params);
let return_t = returns.map(|ret| {
let t_spec = self.convert_type_spec(ret.clone());
let colon = Token::new(
TokenKind::Colon,
":",
t_spec.ln_begin().unwrap_or(0),
t_spec.col_begin().unwrap_or(0),
);
TypeSpecWithOp::new(colon, t_spec, self.convert_expr(ret))
});
let bounds = self.get_type_bounds(func_def.type_params);
let return_t = returns
.or_else(|| {
let PyTypeSpec::Func(func) = self.get_cur_scope_t_spec()? else {
return None;
};
func.returns.clone()
})
.map(|ret| {
let t_spec = self.convert_type_spec(ret.clone());
let colon = Token::new(
TokenKind::Colon,
":",
t_spec.ln_begin().unwrap_or(0),
t_spec.col_begin().unwrap_or(0),
);
TypeSpecWithOp::new(colon, t_spec, self.convert_expr(ret))
});
let type_params = if !func_def.type_params.is_empty() {
func_def.type_params
} else {
self.get_cur_scope_t_spec()
.and_then(|ty| {
if let PyTypeSpec::Func(func) = ty {
(!func.type_params.is_empty()).then(|| func.type_params.clone())
} else {
None
}
})
.unwrap_or(func_def.type_params)
};
let bounds = self.get_type_bounds(type_params);
let sig = Signature::Subr(SubrSignature::new(decos, ident, bounds, params, return_t));
let block = self.convert_block(func_def.body, BlockKind::Function);
let body = DefBody::new(EQUAL, block, DefId(0));
@ -2157,7 +2327,7 @@ impl ASTConverter {
};
let ident = self.convert_ident(name, class_name_loc);
let sig = Signature::Var(VarSignature::new(VarPattern::Ident(ident.clone()), None));
self.grow(ident.inspect().to_string());
self.grow(ident.inspect().to_string(), BlockKind::Class);
let (base_type, methods) = self.extract_method_list(ident, class_def.body, inherit);
let classdef = if inherit {
// TODO: multiple inheritance
@ -2196,6 +2366,47 @@ impl ASTConverter {
Expr::ClassDef(classdef)
}
fn get_t_spec(&self, name: &str) -> Option<&PyTypeSpec> {
if self.contexts.len() == 1 {
self.pyi_types.get_type(name)
} else {
let class = self.cur_name();
self.pyi_types.get_class_member_type(class, name)
}
}
fn get_assign_t_spec(
&mut self,
name: &py_ast::ExprName,
expr: &Expr,
) -> Option<TypeSpecWithOp> {
expr.ln_end()
.and_then(|i| {
i.checked_sub(1)
.and_then(|line| self.comments.get_type(line))
})
.cloned()
.or_else(|| {
let type_spec = self.get_t_spec(&name.id)?;
let PyTypeSpec::Var(expr) = type_spec else {
return None;
};
Some(expr.clone())
})
.map(|mut expr| {
// The range of `expr` is not correct, so we need to change it
if let py_ast::Expr::Subscript(sub) = &mut expr {
sub.range = name.range;
*sub.slice.range_mut() = name.range;
*sub.value.range_mut() = name.range;
} else {
*expr.range_mut() = name.range;
}
let t_as_expr = self.convert_expr(expr.clone());
TypeSpecWithOp::new(AS, self.convert_type_spec(expr), t_as_expr)
})
}
fn convert_statement(&mut self, stmt: Stmt, dont_call_return: bool) -> Expr {
match stmt {
py_ast::Stmt::Expr(stmt) => self.convert_expr(*stmt.value),
@ -2289,25 +2500,7 @@ impl ASTConverter {
}
let can_shadow = self.register_name_info(&name.id, NameKind::Variable);
let ident = self.convert_ident(name.id.to_string(), name.location());
let t_spec = expr
.ln_end()
.and_then(|i| {
i.checked_sub(1)
.and_then(|line| self.comments.get_type(line))
})
.cloned()
.map(|mut expr| {
// The range of `expr` is not correct, so we need to change it
if let py_ast::Expr::Subscript(sub) = &mut expr {
sub.range = name.range;
*sub.slice.range_mut() = name.range;
*sub.value.range_mut() = name.range;
} else {
*expr.range_mut() = name.range;
}
let t_as_expr = self.convert_expr(expr.clone());
TypeSpecWithOp::new(AS, self.convert_type_spec(expr), t_as_expr)
});
let t_spec = self.get_assign_t_spec(&name, &expr);
if can_shadow.is_yes() {
let block = Block::new(vec![expr]);
let body = DefBody::new(EQUAL, block, DefId(0));

View file

@ -9,3 +9,7 @@ for i in range(3):
# ERR
for i in "abcd":
print(l[i])
lis = "a,b,c".split(",") if True is not None else []
if "a" in lis:
lis.remove("a") # OK

25
tests/pyi.py Normal file
View file

@ -0,0 +1,25 @@
x = 1
x + "a" # OK, because x: Any
def f(x, y):
return x + y
class C:
y = 1
def __init__(self, x):
self.x = x
def f(self, x):
return self.x + x
print(f(1, 2)) # OK
print(f("a", "b")) # ERR*2
c = C(1)
print(c.f(2)) # OK
print(c.f("a")) # ERR
_ = C("a") # ERR
def g(x):
pass
print(g(c)) # OK
print(g(1)) # ERR

13
tests/pyi.pyi Normal file
View file

@ -0,0 +1,13 @@
import typing
x: typing.Any
def f(x: int, y: int) -> int: ...
class C:
x: int
y: int
def __init__(self, x: int): ...
def f(self, x: int) -> int: ...
def g[T: C](x: T) -> T: ...

View file

@ -112,6 +112,11 @@ fn exec_projection() -> Result<(), String> {
expect("tests/projection.py", 0, 5)
}
#[test]
fn exec_pyi() -> Result<(), String> {
expect("tests/pyi.py", 0, 5)
}
#[test]
fn exec_list() -> Result<(), String> {
expect("tests/list.py", 0, 2)