gh-81283: compiler: remove indent from docstring (#106411)

Co-authored-by: Éric <merwok@netwok.org>
This commit is contained in:
Inada Naoki 2023-07-15 19:33:32 +09:00 committed by GitHub
parent bbf6297985
commit 2566b74b26
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 246 additions and 30 deletions

View file

@ -1704,10 +1704,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
if (c->c_optimize < 2) {
docstring = _PyAST_GetDocString(stmts);
if (docstring) {
PyObject *cleandoc = _PyCompile_CleanDoc(docstring);
if (cleandoc == NULL) {
return ERROR;
}
i = 1;
st = (stmt_ty)asdl_seq_GET(stmts, 0);
assert(st->kind == Expr_kind);
VISIT(c, expr, st->v.Expr.value);
location loc = LOC(st->v.Expr.value);
ADDOP_LOAD_CONST(c, loc, cleandoc);
Py_DECREF(cleandoc);
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store));
}
}
@ -2252,11 +2258,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f
/* if not -OO mode, add docstring */
if (c->c_optimize < 2) {
docstring = _PyAST_GetDocString(body);
if (docstring) {
docstring = _PyCompile_CleanDoc(docstring);
if (docstring == NULL) {
compiler_exit_scope(c);
return ERROR;
}
}
}
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) {
compiler_exit_scope(c);
return ERROR;
}
Py_XDECREF(docstring);
c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args);
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
@ -7967,6 +7981,89 @@ error:
return NULL;
}
// C implementation of inspect.cleandoc()
//
// Difference from inspect.cleandoc():
// - Do not remove leading and trailing blank lines to keep lineno.
PyObject *
_PyCompile_CleanDoc(PyObject *doc)
{
doc = PyObject_CallMethod(doc, "expandtabs", NULL);
if (doc == NULL) {
return NULL;
}
Py_ssize_t doc_size;
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size);
if (doc_utf8 == NULL) {
Py_DECREF(doc);
return NULL;
}
const char *p = doc_utf8;
const char *pend = p + doc_size;
// First pass: find minimum indentation of any non-blank lines
// after first line.
while (p < pend && *p++ != '\n') {
}
Py_ssize_t margin = PY_SSIZE_T_MAX;
while (p < pend) {
const char *s = p;
while (*p == ' ') p++;
if (p < pend && *p != '\n') {
margin = Py_MIN(margin, p - s);
}
while (p < pend && *p++ != '\n') {
}
}
if (margin == PY_SSIZE_T_MAX) {
margin = 0;
}
// Second pass: write cleandoc into buff.
// copy first line without leading spaces.
p = doc_utf8;
while (*p == ' ') {
p++;
}
if (p == doc_utf8 && margin == 0 ) {
// doc is already clean.
return doc;
}
char *buff = PyMem_Malloc(doc_size);
char *w = buff;
while (p < pend) {
int ch = *w++ = *p++;
if (ch == '\n') {
break;
}
}
// copy subsequent lines without margin.
while (p < pend) {
for (Py_ssize_t i = 0; i < margin; i++, p++) {
if (*p != ' ') {
assert(*p == '\n' || *p == '\0');
break;
}
}
while (p < pend) {
int ch = *w++ = *p++;
if (ch == '\n') {
break;
}
}
}
Py_DECREF(doc);
return PyUnicode_FromStringAndSize(buff, w - buff);
}
PyObject *
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
int optimize, int compile_mode)