Rewrite make_source_pathname using Unicode API.

This commit is contained in:
Martin v. Löwis 2011-10-23 18:05:06 +02:00
parent 30260a7fe3
commit 8a0ef78e8c

View file

@ -906,11 +906,11 @@ rightmost_sep(Py_UCS4 *s)
/* Like rightmost_sep, but operate on unicode objects. */ /* Like rightmost_sep, but operate on unicode objects. */
static Py_ssize_t static Py_ssize_t
rightmost_sep_obj(PyObject* o) rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
{ {
Py_ssize_t found, i; Py_ssize_t found, i;
Py_UCS4 c; Py_UCS4 c;
for (found = -1, i = 0; i < PyUnicode_GET_LENGTH(o); i++) { for (found = -1, i = start; i < end; i++) {
c = PyUnicode_READ_CHAR(o, i); c = PyUnicode_READ_CHAR(o, i);
if (c == SEP if (c == SEP
#ifdef ALTSEP #ifdef ALTSEP
@ -947,7 +947,7 @@ make_compiled_pathname(PyObject *pathstr, int debug)
len = PyUnicode_GET_LENGTH(pathstr); len = PyUnicode_GET_LENGTH(pathstr);
/* If there is no separator, this returns -1, so /* If there is no separator, this returns -1, so
lastsep will be 0. */ lastsep will be 0. */
fname = rightmost_sep_obj(pathstr) + 1; fname = rightmost_sep_obj(pathstr, 0, len) + 1;
ext = fname - 1; ext = fname - 1;
for(i = fname; i < len; i++) for(i = fname; i < len; i++)
if (PyUnicode_READ_CHAR(pathstr, i) == '.') if (PyUnicode_READ_CHAR(pathstr, i) == '.')
@ -992,63 +992,66 @@ make_compiled_pathname(PyObject *pathstr, int debug)
(...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */ (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
static PyObject* static PyObject*
make_source_pathname(PyObject *pathobj) make_source_pathname(PyObject *path)
{ {
Py_UCS4 buf[MAXPATHLEN]; Py_ssize_t left, right, dot0, dot1, len;
Py_UCS4 *pathname; Py_ssize_t i, j;
Py_UCS4 *left, *right, *dot0, *dot1, sep; PyObject *result;
size_t i, j; int kind;
void *data;
if (PyUnicode_GET_LENGTH(pathobj) > MAXPATHLEN) len = PyUnicode_GET_LENGTH(path);
return NULL; if (len > MAXPATHLEN)
pathname = PyUnicode_AsUCS4Copy(pathobj);
if (!pathname)
return NULL; return NULL;
/* Look back two slashes from the end. In between these two slashes /* Look back two slashes from the end. In between these two slashes
must be the string __pycache__ or this is not a PEP 3147 style must be the string __pycache__ or this is not a PEP 3147 style
path. It's possible for there to be only one slash. path. It's possible for there to be only one slash.
*/ */
right = rightmost_sep(pathname); right = rightmost_sep_obj(path, 0, len);
if (right == NULL) if (right == -1)
return NULL; return NULL;
sep = *right; left = rightmost_sep_obj(path, 0, right);
*right = '\0'; if (left == -1)
left = rightmost_sep(pathname); left = 0;
*right = sep;
if (left == NULL)
left = pathname;
else else
left++; left++;
if (right-left != Py_UCS4_strlen(CACHEDIR_UNICODE) || if (right-left != sizeof(CACHEDIR)-1)
Py_UCS4_strncmp(left, CACHEDIR_UNICODE, right-left) != 0) return NULL;
goto error; for (i = 0; i < sizeof(CACHEDIR)-1; i++)
if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
return NULL;
/* Now verify that the path component to the right of the last slash /* Now verify that the path component to the right of the last slash
has two dots in it. has two dots in it.
*/ */
if ((dot0 = Py_UCS4_strchr(right + 1, '.')) == NULL) dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
goto error; if (dot0 < 0)
if ((dot1 = Py_UCS4_strchr(dot0 + 1, '.')) == NULL) return NULL;
goto error; dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
if (dot1 < 0)
return NULL;
/* Too many dots? */ /* Too many dots? */
if (Py_UCS4_strchr(dot1 + 1, '.') != NULL) if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
goto error; return NULL;
/* This is a PEP 3147 path. Start by copying everything from the /* This is a PEP 3147 path. Start by copying everything from the
start of pathname up to and including the leftmost slash. Then start of pathname up to and including the leftmost slash. Then
copy the file's basename, removing the magic tag and adding a .py copy the file's basename, removing the magic tag and adding a .py
suffix. suffix.
*/ */
Py_UCS4_strncpy(buf, pathname, (i=left-pathname)); result = PyUnicode_New(left + (dot0-right) + 2,
Py_UCS4_strncpy(buf+i, right+1, (j=dot0-right)); PyUnicode_MAX_CHAR_VALUE(path));
buf[i+j] = 'p'; if (!result)
buf[i+j+1] = 'y'; return NULL;
PyMem_Free(pathname); kind = PyUnicode_KIND(result);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, i+j+2); data = PyUnicode_DATA(result);
error: PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
PyMem_Free(pathname); PyUnicode_CopyCharacters(result, left, path, right+1,
return NULL; (j = dot0-right));
PyUnicode_WRITE(kind, data, i+j, 'p');
PyUnicode_WRITE(kind, data, i+j+1, 'y');
return result;
} }
/* Given a pathname for a Python source file, its time of last /* Given a pathname for a Python source file, its time of last