Implement PEP 393.

This commit is contained in:
Martin v. Löwis 2011-09-28 07:41:54 +02:00
parent 48d49497c5
commit d63a3b8beb
102 changed files with 8153 additions and 5431 deletions

View file

@ -498,17 +498,19 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno,
*filename = PyDict_GetItemString(globals, "__file__");
if (*filename != NULL && PyUnicode_Check(*filename)) {
Py_ssize_t len = PyUnicode_GetSize(*filename);
Py_UNICODE *unicode = PyUnicode_AS_UNICODE(*filename);
int kind = PyUnicode_KIND(*filename);
void *data = PyUnicode_DATA(*filename);
/* if filename.lower().endswith((".pyc", ".pyo")): */
if (len >= 4 &&
unicode[len-4] == '.' &&
Py_UNICODE_TOLOWER(unicode[len-3]) == 'p' &&
Py_UNICODE_TOLOWER(unicode[len-2]) == 'y' &&
(Py_UNICODE_TOLOWER(unicode[len-1]) == 'c' ||
Py_UNICODE_TOLOWER(unicode[len-1]) == 'o'))
PyUnicode_READ(kind, data, len-4) == '.' &&
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-3)) == 'p' &&
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-2)) == 'y' &&
(Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-1)) == 'c' ||
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-1)) == 'o'))
{
*filename = PyUnicode_FromUnicode(unicode, len-1);
*filename = PyUnicode_Substring(*filename, 0,
PyUnicode_GET_LENGTH(*filename)-1);
if (*filename == NULL)
goto handle_error;
}

View file

@ -528,26 +528,21 @@ static identifier
new_identifier(const char* n, PyArena *arena)
{
PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
Py_UNICODE *u;
if (!id)
if (!id || PyUnicode_READY(id) == -1)
return NULL;
u = PyUnicode_AS_UNICODE(id);
/* Check whether there are non-ASCII characters in the
identifier; if so, normalize to NFKC. */
for (; *u; u++) {
if (*u >= 128) {
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
PyObject *id2;
if (!m)
return NULL;
id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
Py_DECREF(m);
if (!id2)
return NULL;
Py_DECREF(id);
id = id2;
break;
}
if (PyUnicode_MAX_CHAR_VALUE((PyUnicodeObject *)id) >= 128) {
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
PyObject *id2;
if (!m)
return NULL;
id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
Py_DECREF(m);
if (!id2)
return NULL;
Py_DECREF(id);
id = id2;
}
PyUnicode_InternInPlace(&id);
PyArena_AddPyObject(arena, id);
@ -3660,20 +3655,14 @@ parsenumber(struct compiling *c, const char *s)
}
static PyObject *
decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
{
PyObject *u, *v;
char *s, *t;
t = s = (char *)*sPtr;
/* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
while (s < end && (*s & 0x80)) s++;
*sPtr = s;
u = PyUnicode_DecodeUTF8(t, s - t, NULL);
if (u == NULL)
return NULL;
v = PyUnicode_AsEncodedString(u, encoding, NULL);
Py_DECREF(u);
return v;
return PyUnicode_DecodeUTF8(t, s - t, NULL);
}
static PyObject *
@ -3707,22 +3696,20 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
}
if (*s & 0x80) { /* XXX inefficient */
PyObject *w;
char *r;
Py_ssize_t rn, i;
w = decode_utf8(c, &s, end, "utf-32-be");
int kind;
void *data;
Py_ssize_t len, i;
w = decode_utf8(c, &s, end);
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyBytes_AS_STRING(w);
rn = Py_SIZE(w);
assert(rn % 4 == 0);
for (i = 0; i < rn; i += 4) {
sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
r[i + 1] & 0xFF,
r[i + 2] & 0xFF,
r[i + 3] & 0xFF);
kind = PyUnicode_KIND(w);
data = PyUnicode_DATA(w);
len = PyUnicode_GET_LENGTH(w);
for (i = 0; i < len; i++) {
Py_UCS4 chr = PyUnicode_READ(kind, data, i);
sprintf(p, "\\U%08x", chr);
p += 10;
}
/* Should be impossible to overflow */

View file

@ -508,8 +508,8 @@ source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
if (PyUnicode_Check(cmd)) {
cf->cf_flags |= PyCF_IGNORE_COOKIE;
cmd = _PyUnicode_AsDefaultEncodedString(cmd);
if (cmd == NULL)
str = PyUnicode_AsUTF8AndSize(cmd, &size);
if (str == NULL)
return NULL;
}
else if (!PyObject_CheckReadBuffer(cmd)) {
@ -518,9 +518,10 @@ source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
funcname, what);
return NULL;
}
if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
else if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
return NULL;
}
if (strlen(str) != size) {
PyErr_SetString(PyExc_TypeError,
"source code string cannot contain null bytes");
@ -1395,24 +1396,13 @@ builtin_ord(PyObject *self, PyObject* obj)
}
}
else if (PyUnicode_Check(obj)) {
size = PyUnicode_GET_SIZE(obj);
if (PyUnicode_READY(obj) == -1)
return NULL;
size = PyUnicode_GET_LENGTH(obj);
if (size == 1) {
ord = (long)*PyUnicode_AS_UNICODE(obj);
ord = (long)PyUnicode_READ_CHAR(obj, 0);
return PyLong_FromLong(ord);
}
#ifndef Py_UNICODE_WIDE
if (size == 2) {
/* Decode a valid surrogate pair */
int c0 = PyUnicode_AS_UNICODE(obj)[0];
int c1 = PyUnicode_AS_UNICODE(obj)[1];
if (0xD800 <= c0 && c0 <= 0xDBFF &&
0xDC00 <= c1 && c1 <= 0xDFFF) {
ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) +
0x00010000);
return PyLong_FromLong(ord);
}
}
#endif
}
else if (PyByteArray_Check(obj)) {
/* XXX Hopefully this is temporary */

View file

@ -2054,7 +2054,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
/* Inline the PyDict_GetItem() calls.
WARNING: this is an extreme speed hack.
Do not try this at home. */
Py_hash_t hash = ((PyUnicodeObject *)w)->hash;
Py_hash_t hash = ((PyASCIIObject *)w)->hash;
if (hash != -1) {
PyDictObject *d;
PyDictEntry *e;
@ -4456,7 +4456,8 @@ import_all_from(PyObject *locals, PyObject *v)
}
if (skip_leading_underscores &&
PyUnicode_Check(name) &&
PyUnicode_AS_UNICODE(name)[0] == '_')
PyUnicode_READY(name) != -1 &&
PyUnicode_READ_CHAR(name, 0) == '_')
{
Py_DECREF(name);
continue;
@ -4520,6 +4521,14 @@ unicode_concatenate(PyObject *v, PyObject *w,
{
/* This function implements 'variable += expr' when both arguments
are (Unicode) strings. */
w = PyUnicode_Concat(v, w);
Py_DECREF(v);
return w;
/* XXX: This optimization is currently disabled as unicode objects in the
new flexible representation are not in-place resizable anymore. */
#if 0
Py_ssize_t v_len = PyUnicode_GET_SIZE(v);
Py_ssize_t w_len = PyUnicode_GET_SIZE(w);
Py_ssize_t new_len = v_len + w_len;
@ -4570,7 +4579,8 @@ unicode_concatenate(PyObject *v, PyObject *w,
}
}
if (Py_REFCNT(v) == 1 && !PyUnicode_CHECK_INTERNED(v)) {
if (Py_REFCNT(v) == 1 && !PyUnicode_CHECK_INTERNED(v) &&
!PyUnicode_IS_COMPACT((PyUnicodeObject *)v)) {
/* Now we own the last reference to 'v', so we can resize it
* in-place.
*/
@ -4594,6 +4604,7 @@ unicode_concatenate(PyObject *v, PyObject *w,
Py_DECREF(v);
return w;
}
#endif
}
#ifdef DYNAMIC_EXECUTION_PROFILE

View file

@ -513,27 +513,25 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
PyObject *restuple;
Py_ssize_t start;
Py_ssize_t end;
Py_ssize_t i;
Py_ssize_t start, end, i, len;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *res;
Py_UNICODE *p;
int kind;
void *data;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
res = PyUnicode_FromUnicode(NULL, end-start);
len = end - start;
res = PyUnicode_New(len, '?');
if (res == NULL)
return NULL;
for (p = PyUnicode_AS_UNICODE(res), i = start;
i<end; ++p, ++i)
*p = '?';
restuple = Py_BuildValue("(On)", res, end);
Py_DECREF(res);
return restuple;
kind = PyUnicode_KIND(res);
data = PyUnicode_DATA(res);
for (i = 0; i < len; ++i)
PyUnicode_WRITE(kind, data, i, '?');
return Py_BuildValue("(Nn)", res, end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
@ -543,20 +541,21 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
PyObject *res;
Py_UNICODE *p;
int kind;
void *data;
if (PyUnicodeTranslateError_GetStart(exc, &start))
return NULL;
if (PyUnicodeTranslateError_GetEnd(exc, &end))
return NULL;
res = PyUnicode_FromUnicode(NULL, end-start);
len = end - start;
res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
if (res == NULL)
return NULL;
for (p = PyUnicode_AS_UNICODE(res), i = start;
i<end; ++p, ++i)
*p = Py_UNICODE_REPLACEMENT_CHARACTER;
restuple = Py_BuildValue("(On)", res, end);
Py_DECREF(res);
return restuple;
kind = PyUnicode_KIND(res);
data = PyUnicode_DATA(res);
for (i=0; i < len; i++)
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
return Py_BuildValue("(Nn)", res, end);
}
else {
wrong_exception_type(exc);
@ -671,10 +670,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
}
}
static Py_UNICODE hexdigits[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
static const char *hexdigits = "0123456789abcdef";
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{

View file

@ -197,16 +197,17 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
{
/* Name mangling: __private becomes _classname__private.
This is independent from how the name is used. */
const Py_UNICODE *p, *name = PyUnicode_AS_UNICODE(ident);
Py_UNICODE *buffer;
size_t nlen, plen;
PyObject *result;
size_t nlen, plen, ipriv;
Py_UCS4 maxchar;
if (privateobj == NULL || !PyUnicode_Check(privateobj) ||
name == NULL || name[0] != '_' || name[1] != '_') {
PyUnicode_READ_CHAR(ident, 0) != '_' ||
PyUnicode_READ_CHAR(ident, 1) != '_') {
Py_INCREF(ident);
return ident;
}
p = PyUnicode_AS_UNICODE(privateobj);
nlen = Py_UNICODE_strlen(name);
nlen = PyUnicode_GET_LENGTH(ident);
plen = PyUnicode_GET_LENGTH(privateobj);
/* Don't mangle __id__ or names with dots.
The only time a name with a dot can occur is when
@ -216,32 +217,37 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
TODO(jhylton): Decide whether we want to support
mangling of the module name, e.g. __M.X.
*/
if ((name[nlen-1] == '_' && name[nlen-2] == '_')
|| Py_UNICODE_strchr(name, '.')) {
if ((PyUnicode_READ_CHAR(ident, nlen-1) == '_' &&
PyUnicode_READ_CHAR(ident, nlen-2) == '_') ||
PyUnicode_FindChar(ident, '.', 0, nlen, 1) != -1) {
Py_INCREF(ident);
return ident; /* Don't mangle __whatever__ */
}
/* Strip leading underscores from class name */
while (*p == '_')
p++;
if (*p == 0) {
ipriv = 0;
while (PyUnicode_READ_CHAR(privateobj, ipriv) == '_')
ipriv++;
if (ipriv == plen) {
Py_INCREF(ident);
return ident; /* Don't mangle if class is just underscores */
}
plen = Py_UNICODE_strlen(p);
plen -= ipriv;
assert(1 <= PY_SSIZE_T_MAX - nlen);
assert(1 + nlen <= PY_SSIZE_T_MAX - plen);
ident = PyUnicode_FromStringAndSize(NULL, 1 + nlen + plen);
if (!ident)
maxchar = PyUnicode_MAX_CHAR_VALUE(ident);
if (PyUnicode_MAX_CHAR_VALUE(privateobj) > maxchar)
maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj);
result = PyUnicode_New(1 + nlen + plen, maxchar);
if (!result)
return 0;
/* ident = "_" + p[:plen] + name # i.e. 1+plen+nlen bytes */
buffer = PyUnicode_AS_UNICODE(ident);
buffer[0] = '_';
Py_UNICODE_strncpy(buffer+1, p, plen);
Py_UNICODE_strcpy(buffer+1+plen, name);
return ident;
/* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */
PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_');
PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen);
PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen);
return result;
}
static int
@ -2085,22 +2091,27 @@ compiler_import_as(struct compiler *c, identifier name, identifier asname)
If there is a dot in name, we need to split it and emit a
LOAD_ATTR for each name.
*/
const Py_UNICODE *src = PyUnicode_AS_UNICODE(name);
const Py_UNICODE *dot = Py_UNICODE_strchr(src, '.');
if (dot) {
Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0,
PyUnicode_GET_LENGTH(name), 1);
if (dot == -2)
return -1;
if (dot != -1) {
/* Consume the base module name to get the first attribute */
src = dot + 1;
while (dot) {
/* NB src is only defined when dot != NULL */
Py_ssize_t pos = dot + 1;
while (dot != -1) {
PyObject *attr;
dot = Py_UNICODE_strchr(src, '.');
attr = PyUnicode_FromUnicode(src,
dot ? dot - src : Py_UNICODE_strlen(src));
dot = PyUnicode_FindChar(name, '.', pos,
PyUnicode_GET_LENGTH(name), 1);
if (dot == -2)
return -1;
attr = PyUnicode_Substring(name, pos,
(dot != -1) ? dot :
PyUnicode_GET_LENGTH(name));
if (!attr)
return -1;
ADDOP_O(c, LOAD_ATTR, attr, names);
Py_DECREF(attr);
src = dot + 1;
pos = dot + 1;
}
}
return compiler_nameop(c, asname, Store);
@ -2139,13 +2150,12 @@ compiler_import(struct compiler *c, stmt_ty s)
}
else {
identifier tmp = alias->name;
const Py_UNICODE *base = PyUnicode_AS_UNICODE(alias->name);
Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
if (dot)
tmp = PyUnicode_FromUnicode(base,
dot - base);
Py_ssize_t dot = PyUnicode_FindChar(
alias->name, '.', 0, PyUnicode_GET_LENGTH(alias->name), 1);
if (dot != -1)
tmp = PyUnicode_Substring(alias->name, 0, dot);
r = compiler_nameop(c, tmp, Store);
if (dot) {
if (dot != -1) {
Py_DECREF(tmp);
}
if (!r)
@ -2208,7 +2218,7 @@ compiler_from_import(struct compiler *c, stmt_ty s)
alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
identifier store_name;
if (i == 0 && *PyUnicode_AS_UNICODE(alias->name) == '*') {
if (i == 0 && PyUnicode_READ_CHAR(alias->name, 0) == '*') {
assert(n == 1);
ADDOP(c, IMPORT_STAR);
return 1;
@ -2522,7 +2532,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
}
/* XXX Leave assert here, but handle __doc__ and the like better */
assert(scope || PyUnicode_AS_UNICODE(name)[0] == '_');
assert(scope || PyUnicode_READ_CHAR(name, 0) == '_');
switch (optype) {
case OP_DEREF:
@ -3045,8 +3055,7 @@ expr_constant(struct compiler *c, expr_ty e)
return PyObject_IsTrue(e->v.Str.s);
case Name_kind:
/* optimize away names that can't be reassigned */
id = PyBytes_AS_STRING(
_PyUnicode_AsDefaultEncodedString(e->v.Name.id));
id = PyUnicode_AsUTF8(e->v.Name.id);
if (strcmp(id, "True") == 0) return 1;
if (strcmp(id, "False") == 0) return 0;
if (strcmp(id, "None") == 0) return 0;

View file

@ -395,7 +395,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
/* remove trailing cr/lf and dots */
while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.'))
s_buf[--len] = L'\0';
message = PyUnicode_FromUnicode(s_buf, len);
message = PyUnicode_FromWideChar(s_buf, len);
}
}
}
@ -487,7 +487,7 @@ PyObject *PyErr_SetExcFromWindowsErrWithFilenameObject(
/* remove trailing cr/lf and dots */
while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.'))
s_buf[--len] = L'\0';
message = PyUnicode_FromUnicode(s_buf, len);
message = PyUnicode_FromWideChar(s_buf, len);
}
if (message == NULL)

File diff suppressed because it is too large Load diff

View file

@ -546,9 +546,6 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags,
#define UNICODE_DEFAULT_ENCODING(arg) \
_PyUnicode_AsDefaultEncodedString(arg)
/* Format an error message generated by convertsimple(). */
static char *
@ -611,7 +608,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
const char *format = *p_format;
char c = *format++;
PyObject *uarg;
char *sarg;
switch (c) {
@ -838,8 +835,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
case 'C': {/* unicode char */
int *p = va_arg(*p_va, int *);
if (PyUnicode_Check(arg) &&
PyUnicode_GET_SIZE(arg) == 1)
*p = PyUnicode_AS_UNICODE(arg)[0];
PyUnicode_GET_LENGTH(arg) == 1) {
int kind = PyUnicode_KIND(arg);
void *data = PyUnicode_DATA(arg);
*p = PyUnicode_READ(kind, data, 0);
}
else
return converterr("a unicode character", arg, msgbuf, bufsize);
break;
@ -889,13 +889,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
if (c == 'z' && arg == Py_None)
PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0);
else if (PyUnicode_Check(arg)) {
uarg = UNICODE_DEFAULT_ENCODING(arg);
if (uarg == NULL)
Py_ssize_t len;
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
if (sarg == NULL)
return converterr(CONV_UNICODE,
arg, msgbuf, bufsize);
PyBuffer_FillInfo(p, arg,
PyBytes_AS_STRING(uarg), PyBytes_GET_SIZE(uarg),
1, 0);
PyBuffer_FillInfo(p, arg, sarg, len, 1, 0);
}
else { /* any buffer-like object */
char *buf;
@ -918,12 +917,13 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
STORE_SIZE(0);
}
else if (PyUnicode_Check(arg)) {
uarg = UNICODE_DEFAULT_ENCODING(arg);
if (uarg == NULL)
Py_ssize_t len;
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
if (sarg == NULL)
return converterr(CONV_UNICODE,
arg, msgbuf, bufsize);
*p = PyBytes_AS_STRING(uarg);
STORE_SIZE(PyBytes_GET_SIZE(uarg));
*p = sarg;
STORE_SIZE(len);
}
else { /* any buffer-like object */
/* XXX Really? */
@ -937,22 +937,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
} else {
/* "s" or "z" */
char **p = va_arg(*p_va, char **);
uarg = NULL;
Py_ssize_t len;
sarg = NULL;
if (c == 'z' && arg == Py_None)
*p = NULL;
else if (PyUnicode_Check(arg)) {
uarg = UNICODE_DEFAULT_ENCODING(arg);
if (uarg == NULL)
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
if (sarg == NULL)
return converterr(CONV_UNICODE,
arg, msgbuf, bufsize);
*p = PyBytes_AS_STRING(uarg);
*p = sarg;
}
else
return converterr(c == 'z' ? "str or None" : "str",
arg, msgbuf, bufsize);
if (*p != NULL && uarg != NULL &&
(Py_ssize_t) strlen(*p) != PyBytes_GET_SIZE(uarg))
if (*p != NULL && sarg != NULL && (Py_ssize_t) strlen(*p) != len)
return converterr(
c == 'z' ? "str without null bytes or None"
: "str without null bytes",
@ -976,6 +976,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
}
else if (PyUnicode_Check(arg)) {
*p = PyUnicode_AS_UNICODE(arg);
if (*p == NULL)
RETURN_ERR_OCCURRED;
STORE_SIZE(PyUnicode_GET_SIZE(arg));
}
else
@ -987,6 +989,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
*p = NULL;
else if (PyUnicode_Check(arg)) {
*p = PyUnicode_AS_UNICODE(arg);
if (*p == NULL)
RETURN_ERR_OCCURRED;
if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg))
return converterr(
"str without null character or None",

View file

@ -118,12 +118,12 @@ typedef unsigned short mode_t;
#define MAGIC (3190 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define TAG "cpython-" MAJOR MINOR;
#define CACHEDIR "__pycache__"
static const Py_UNICODE CACHEDIR_UNICODE[] = {
static const Py_UCS4 CACHEDIR_UNICODE[] = {
'_', '_', 'p', 'y', 'c', 'a', 'c', 'h', 'e', '_', '_', '\0'};
/* Current magic word and string tag as globals. */
static long pyc_magic = MAGIC;
static const char *pyc_tag = TAG;
static const Py_UNICODE PYC_TAG_UNICODE[] = {
static const Py_UCS4 PYC_TAG_UNICODE[] = {
'c', 'p', 'y', 't', 'h', 'o', 'n', '-', PY_MAJOR_VERSION + 48, PY_MINOR_VERSION + 48, '\0'};
#undef QUOTE
#undef STRIFY
@ -762,7 +762,7 @@ remove_module(PyObject *name)
static PyObject * get_sourcefile(PyObject *filename);
static PyObject *make_source_pathname(PyObject *pathname);
static PyObject* make_compiled_pathname(Py_UNICODE *pathname, int debug);
static PyObject* make_compiled_pathname(PyObject *pathname, int debug);
/* Execute a code object in a module and return the module object
* WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is
@ -886,10 +886,10 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
/* Like strrchr(string, '/') but searches for the rightmost of either SEP
or ALTSEP, if the latter is defined.
*/
static Py_UNICODE*
rightmost_sep(Py_UNICODE *s)
static Py_UCS4*
rightmost_sep(Py_UCS4 *s)
{
Py_UNICODE *found, c;
Py_UCS4 *found, c;
for (found = NULL; (c = *s); s++) {
if (c == SEP
#ifdef ALTSEP
@ -912,15 +912,21 @@ rightmost_sep(Py_UNICODE *s)
foo.py -> __pycache__/foo.<tag>.pyc */
static PyObject*
make_compiled_pathname(Py_UNICODE *pathname, int debug)
make_compiled_pathname(PyObject *pathstr, int debug)
{
Py_UNICODE buf[MAXPATHLEN];
Py_UCS4 *pathname;
Py_UCS4 buf[MAXPATHLEN];
size_t buflen = (size_t)MAXPATHLEN;
size_t len = Py_UNICODE_strlen(pathname);
size_t len;
size_t i, save;
Py_UNICODE *pos;
Py_UCS4 *pos;
int sep = SEP;
pathname = PyUnicode_AsUCS4Copy(pathstr);
if (!pathname)
return NULL;
len = Py_UCS4_strlen(pathname);
/* Sanity check that the buffer has roughly enough space to hold what
will eventually be the full path to the compiled file. The 5 extra
bytes include the slash afer __pycache__, the two extra dots, the
@ -930,8 +936,10 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
sanity check before writing the extension to ensure we do not
overflow the buffer.
*/
if (len + Py_UNICODE_strlen(CACHEDIR_UNICODE) + Py_UNICODE_strlen(PYC_TAG_UNICODE) + 5 > buflen)
if (len + Py_UCS4_strlen(CACHEDIR_UNICODE) + Py_UCS4_strlen(PYC_TAG_UNICODE) + 5 > buflen) {
PyMem_Free(pathname);
return NULL;
}
/* Find the last path separator and copy everything from the start of
the source string up to and including the separator.
@ -943,24 +951,28 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
else {
sep = *pos;
i = pos - pathname + 1;
Py_UNICODE_strncpy(buf, pathname, i);
Py_UCS4_strncpy(buf, pathname, i);
}
save = i;
buf[i++] = '\0';
/* Add __pycache__/ */
Py_UNICODE_strcat(buf, CACHEDIR_UNICODE);
i += Py_UNICODE_strlen(CACHEDIR_UNICODE) - 1;
Py_UCS4_strcat(buf, CACHEDIR_UNICODE);
i += Py_UCS4_strlen(CACHEDIR_UNICODE) - 1;
buf[i++] = sep;
buf[i] = '\0';
/* Add the base filename, but remove the .py or .pyw extension, since
the tag name must go before the extension.
*/
Py_UNICODE_strcat(buf, pathname + save);
pos = Py_UNICODE_strrchr(buf + i, '.');
Py_UCS4_strcat(buf, pathname + save);
pos = Py_UCS4_strrchr(buf + i, '.');
if (pos != NULL)
*++pos = '\0';
Py_UNICODE_strcat(buf, PYC_TAG_UNICODE);
/* pathname is not used from here on. */
PyMem_Free(pathname);
Py_UCS4_strcat(buf, PYC_TAG_UNICODE);
/* The length test above assumes that we're only adding one character
to the end of what would normally be the extension. What if there
is no extension, or the string ends in '.' or '.p', and otherwise
@ -1010,7 +1022,7 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
#if 0
printf("strlen(buf): %d; buflen: %d\n", (int)strlen(buf), (int)buflen);
#endif
len = Py_UNICODE_strlen(buf);
len = Py_UCS4_strlen(buf);
if (len + 5 > buflen)
return NULL;
buf[len] = '.'; len++;
@ -1018,7 +1030,7 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
buf[len] = 'y'; len++;
buf[len] = debug ? 'c' : 'o'; len++;
assert(len <= buflen);
return PyUnicode_FromUnicode(buf, len);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, len);
}
@ -1033,14 +1045,16 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
static PyObject*
make_source_pathname(PyObject *pathobj)
{
Py_UNICODE buf[MAXPATHLEN];
Py_UNICODE *pathname;
Py_UNICODE *left, *right, *dot0, *dot1, sep;
Py_UCS4 buf[MAXPATHLEN];
Py_UCS4 *pathname;
Py_UCS4 *left, *right, *dot0, *dot1, sep;
size_t i, j;
if (PyUnicode_GET_SIZE(pathobj) > MAXPATHLEN)
if (PyUnicode_GET_LENGTH(pathobj) > MAXPATHLEN)
return NULL;
pathname = PyUnicode_AsUCS4Copy(pathobj);
if (!pathname)
return NULL;
pathname = PyUnicode_AS_UNICODE(pathobj);
/* Look back two slashes from the end. In between these two slashes
must be the string __pycache__ or this is not a PEP 3147 style
@ -1057,31 +1071,35 @@ make_source_pathname(PyObject *pathobj)
left = pathname;
else
left++;
if (right-left != Py_UNICODE_strlen(CACHEDIR_UNICODE) ||
Py_UNICODE_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
return NULL;
if (right-left != Py_UCS4_strlen(CACHEDIR_UNICODE) ||
Py_UCS4_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
goto error;
/* Now verify that the path component to the right of the last slash
has two dots in it.
*/
if ((dot0 = Py_UNICODE_strchr(right + 1, '.')) == NULL)
return NULL;
if ((dot1 = Py_UNICODE_strchr(dot0 + 1, '.')) == NULL)
return NULL;
if ((dot0 = Py_UCS4_strchr(right + 1, '.')) == NULL)
goto error;
if ((dot1 = Py_UCS4_strchr(dot0 + 1, '.')) == NULL)
goto error;
/* Too many dots? */
if (Py_UNICODE_strchr(dot1 + 1, '.') != NULL)
return NULL;
if (Py_UCS4_strchr(dot1 + 1, '.') != NULL)
goto error;
/* This is a PEP 3147 path. Start by copying everything from the
start of pathname up to and including the leftmost slash. Then
copy the file's basename, removing the magic tag and adding a .py
suffix.
*/
Py_UNICODE_strncpy(buf, pathname, (i=left-pathname));
Py_UNICODE_strncpy(buf+i, right+1, (j=dot0-right));
Py_UCS4_strncpy(buf, pathname, (i=left-pathname));
Py_UCS4_strncpy(buf+i, right+1, (j=dot0-right));
buf[i+j] = 'p';
buf[i+j+1] = 'y';
return PyUnicode_FromUnicode(buf, i+j+2);
PyMem_Free(pathname);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, i+j+2);
error:
PyMem_Free(pathname);
return NULL;
}
/* Given a pathname for a Python source file, its time of last
@ -1250,6 +1268,7 @@ static void
write_compiled_module(PyCodeObject *co, PyObject *cpathname,
struct stat *srcstat)
{
Py_UCS4 *cpathname_ucs4;
FILE *fp;
time_t mtime = srcstat->st_mtime;
#ifdef MS_WINDOWS /* since Windows uses different permissions */
@ -1267,18 +1286,23 @@ write_compiled_module(PyCodeObject *co, PyObject *cpathname,
PyObject *cpathbytes;
#endif
PyObject *dirname;
Py_UNICODE *dirsep;
Py_UCS4 *dirsep;
int res, ok;
/* Ensure that the __pycache__ directory exists. */
dirsep = rightmost_sep(PyUnicode_AS_UNICODE(cpathname));
cpathname_ucs4 = PyUnicode_AsUCS4Copy(cpathname);
if (!cpathname_ucs4)
return;
dirsep = rightmost_sep(cpathname_ucs4);
if (dirsep == NULL) {
if (Py_VerboseFlag)
PySys_FormatStderr("# no %s path found %R\n", CACHEDIR, cpathname);
return;
}
dirname = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(cpathname),
dirsep - PyUnicode_AS_UNICODE(cpathname));
dirname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
cpathname_ucs4,
dirsep - cpathname_ucs4);
PyMem_Free(cpathname_ucs4);
if (dirname == NULL) {
PyErr_Clear();
return;
@ -1461,9 +1485,7 @@ load_source_module(PyObject *name, PyObject *pathname, FILE *fp)
goto error;
}
#endif
cpathname = make_compiled_pathname(
PyUnicode_AS_UNICODE(pathname),
!Py_OptimizeFlag);
cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag);
if (cpathname != NULL)
fpc = check_compiled_module(pathname, st.st_mtime, cpathname);
@ -1512,16 +1534,18 @@ static PyObject *
get_sourcefile(PyObject *filename)
{
Py_ssize_t len;
Py_UNICODE *fileuni;
Py_UCS4 *fileuni;
PyObject *py;
struct stat statbuf;
len = PyUnicode_GET_SIZE(filename);
len = PyUnicode_GET_LENGTH(filename);
if (len == 0)
Py_RETURN_NONE;
/* don't match *.pyc or *.pyo? */
fileuni = PyUnicode_AS_UNICODE(filename);
fileuni = PyUnicode_AsUCS4Copy(filename);
if (!fileuni)
return NULL;
if (len < 5
|| fileuni[len-4] != '.'
|| (fileuni[len-3] != 'p' && fileuni[len-3] != 'P')
@ -1535,7 +1559,7 @@ get_sourcefile(PyObject *filename)
py = make_source_pathname(filename);
if (py == NULL) {
PyErr_Clear();
py = PyUnicode_FromUnicode(fileuni, len - 1);
py = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, fileuni, len - 1);
}
if (py == NULL)
goto error;
@ -1548,6 +1572,7 @@ get_sourcefile(PyObject *filename)
error:
PyErr_Clear();
unchanged:
PyMem_Free(fileuni);
Py_INCREF(filename);
return filename;
}
@ -1739,8 +1764,7 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
PyObject *path_hooks, PyObject *path_importer_cache,
PyObject **p_path, PyObject **p_loader, struct filedescr **p_fd)
{
Py_UNICODE buf[MAXPATHLEN+1];
Py_ssize_t buflen = MAXPATHLEN+1;
Py_UCS4 buf[MAXPATHLEN+1];
PyObject *path_unicode, *filename;
Py_ssize_t len;
struct stat statbuf;
@ -1759,15 +1783,15 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
else
return 0;
len = PyUnicode_GET_SIZE(path_unicode);
if (len + 2 + PyUnicode_GET_SIZE(name) + MAXSUFFIXSIZE >= buflen) {
len = PyUnicode_GET_LENGTH(path_unicode);
if (!PyUnicode_AsUCS4(path_unicode, buf, PY_ARRAY_LENGTH(buf), 1)) {
Py_DECREF(path_unicode);
return 0; /* Too long */
PyErr_Clear();
return 0;
}
Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(path_unicode));
Py_DECREF(path_unicode);
if (Py_UNICODE_strlen(buf) != len)
if (Py_UCS4_strlen(buf) != len)
return 0; /* path contains '\0' */
/* sys.path_hooks import hook */
@ -1804,10 +1828,14 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
#endif
)
buf[len++] = SEP;
Py_UNICODE_strcpy(buf+len, PyUnicode_AS_UNICODE(name));
len += PyUnicode_GET_SIZE(name);
if (!PyUnicode_AsUCS4(name, buf+len, PY_ARRAY_LENGTH(buf)-len, 1)) {
PyErr_Clear();
return 0;
}
len += PyUnicode_GET_LENGTH(name);
filename = PyUnicode_FromUnicode(buf, len);
filename = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
buf, len);
if (filename == NULL)
return -1;
@ -1989,6 +2017,12 @@ find_module(PyObject *fullname, PyObject *name, PyObject *search_path_list,
if (p_loader != NULL)
*p_loader = NULL;
if (PyUnicode_GET_LENGTH(name) > MAXPATHLEN) {
PyErr_SetString(PyExc_OverflowError,
"module name is too long");
return NULL;
}
/* sys.meta_path import hook */
if (p_loader != NULL) {
PyObject *meta_path;
@ -2704,7 +2738,7 @@ static PyObject *get_parent(PyObject *globals,
int level);
static PyObject *load_next(PyObject *mod, PyObject *altmod,
PyObject *inputname, PyObject **p_outputname,
Py_UNICODE *buf, Py_ssize_t *p_buflen,
Py_UCS4 *buf, Py_ssize_t *p_buflen,
Py_ssize_t bufsize);
static int mark_miss(PyObject *name);
static int ensure_fromlist(PyObject *mod, PyObject *fromlist,
@ -2718,37 +2752,47 @@ static PyObject *
import_module_level(PyObject *name, PyObject *globals, PyObject *locals,
PyObject *fromlist, int level)
{
Py_UNICODE buf[MAXPATHLEN+1];
Py_UCS4 buf[MAXPATHLEN+1];
Py_ssize_t buflen;
Py_ssize_t bufsize = MAXPATHLEN+1;
PyObject *parent, *head, *next, *tail, *inputname, *outputname;
PyObject *parent_name, *ensure_name;
const Py_UNICODE *nameunicode;
Py_ssize_t sep, altsep;
nameunicode = PyUnicode_AS_UNICODE(name);
if (PyUnicode_READY(name))
return NULL;
if (Py_UNICODE_strchr(nameunicode, SEP) != NULL
sep = PyUnicode_FindChar(name, SEP, 0, PyUnicode_GET_LENGTH(name), 1);
if (sep == -2)
return NULL;
#ifdef ALTSEP
|| Py_UNICODE_strchr(nameunicode, ALTSEP) != NULL
altsep = PyUnicode_FindChar(name, ALTSEP, 0, PyUnicode_GET_LENGTH(name), 1);
if (altsep == -2)
return NULL;
#else
altsep = -1;
#endif
) {
if (sep != -1 || altsep != -1)
{
PyErr_SetString(PyExc_ImportError,
"Import by filename is not supported.");
return NULL;
}
parent = get_parent(globals, &parent_name, level);
if (parent == NULL)
if (parent == NULL) {
return NULL;
}
buflen = PyUnicode_GET_SIZE(parent_name);
if (buflen+1 > bufsize) {
if (PyUnicode_READY(parent_name))
return NULL;
buflen = PyUnicode_GET_LENGTH(parent_name);
if (!PyUnicode_AsUCS4(parent_name, buf, PY_ARRAY_LENGTH(buf), 1)) {
Py_DECREF(parent_name);
PyErr_SetString(PyExc_ValueError,
"Module name too long");
return NULL;
}
Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(parent_name));
Py_DECREF(parent_name);
head = load_next(parent, level < 0 ? Py_None : parent, name, &outputname,
@ -2799,7 +2843,8 @@ import_module_level(PyObject *name, PyObject *globals, PyObject *locals,
Py_DECREF(head);
ensure_name = PyUnicode_FromUnicode(buf, Py_UNICODE_strlen(buf));
ensure_name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
buf, Py_UCS4_strlen(buf));
if (ensure_name == NULL) {
Py_DECREF(tail);
return NULL;
@ -2859,8 +2904,6 @@ PyImport_ImportModuleLevel(const char *name, PyObject *globals, PyObject *locals
static PyObject *
get_parent(PyObject *globals, PyObject **p_name, int level)
{
Py_UNICODE name[MAXPATHLEN+1];
const Py_ssize_t bufsize = MAXPATHLEN+1;
PyObject *nameobj;
static PyObject *namestr = NULL;
@ -2897,7 +2940,7 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
"__package__ set to non-string");
return NULL;
}
if (PyUnicode_GET_SIZE(pkgname) == 0) {
if (PyUnicode_GET_LENGTH(pkgname) == 0) {
if (level > 0) {
PyErr_SetString(PyExc_ValueError,
"Attempted relative import in non-package");
@ -2905,12 +2948,8 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
}
goto return_none;
}
if (PyUnicode_GET_SIZE(pkgname)+1 > bufsize) {
PyErr_SetString(PyExc_ValueError,
"Package name too long");
return NULL;
}
Py_UNICODE_strcpy(name, PyUnicode_AS_UNICODE(pkgname));
Py_INCREF(pkgname);
nameobj = pkgname;
} else {
/* __package__ not set, so figure it out and set it */
modname = PyDict_GetItem(globals, namestr);
@ -2922,74 +2961,71 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
/* __path__ is set, so modname is already the package name */
int error;
if (PyUnicode_GET_SIZE(modname)+1 > bufsize) {
PyErr_SetString(PyExc_ValueError,
"Module name too long");
return NULL;
}
Py_UNICODE_strcpy(name, PyUnicode_AS_UNICODE(modname));
error = PyDict_SetItem(globals, pkgstr, modname);
if (error) {
PyErr_SetString(PyExc_ValueError,
"Could not set __package__");
return NULL;
}
Py_INCREF(modname);
nameobj = modname;
} else {
/* Normal module, so work out the package name if any */
Py_UNICODE *start = PyUnicode_AS_UNICODE(modname);
Py_UNICODE *lastdot = Py_UNICODE_strrchr(start, '.');
Py_ssize_t len;
int error;
if (lastdot == NULL && level > 0) {
PyErr_SetString(PyExc_ValueError,
"Attempted relative import in non-package");
len = PyUnicode_FindChar(modname, '.',
0, PyUnicode_GET_LENGTH(modname), -1);
if (len == -2)
return NULL;
}
if (lastdot == NULL) {
error = PyDict_SetItem(globals, pkgstr, Py_None);
if (error) {
if (len < 0) {
if (level > 0) {
PyErr_SetString(PyExc_ValueError,
"Attempted relative import in non-package");
return NULL;
}
if (PyDict_SetItem(globals, pkgstr, Py_None)) {
PyErr_SetString(PyExc_ValueError,
"Could not set __package__");
return NULL;
}
goto return_none;
}
len = lastdot - start;
if (len+1 > bufsize) {
PyErr_SetString(PyExc_ValueError,
"Module name too long");
pkgname = PyUnicode_Substring(modname, 0, len);
if (pkgname == NULL)
return NULL;
}
Py_UNICODE_strncpy(name, start, len);
name[len] = '\0';
pkgname = PyUnicode_FromUnicode(name, len);
if (pkgname == NULL) {
return NULL;
}
error = PyDict_SetItem(globals, pkgstr, pkgname);
Py_DECREF(pkgname);
if (error) {
if (PyDict_SetItem(globals, pkgstr, pkgname)) {
Py_DECREF(pkgname);
PyErr_SetString(PyExc_ValueError,
"Could not set __package__");
return NULL;
}
nameobj = pkgname;
}
}
while (--level > 0) {
Py_UNICODE *dot = Py_UNICODE_strrchr(name, '.');
if (dot == NULL) {
PyErr_SetString(PyExc_ValueError,
"Attempted relative import beyond "
"toplevel package");
if (level > 1) {
Py_ssize_t dot, end = PyUnicode_GET_LENGTH(nameobj);
PyObject *newname;
while (--level > 0) {
dot = PyUnicode_FindChar(nameobj, '.', 0, end, -1);
if (dot == -2) {
Py_DECREF(nameobj);
return NULL;
}
if (dot < 0) {
Py_DECREF(nameobj);
PyErr_SetString(PyExc_ValueError,
"Attempted relative import beyond "
"toplevel package");
return NULL;
}
end = dot;
}
newname = PyUnicode_Substring(nameobj, 0, end);
Py_DECREF(nameobj);
if (newname == NULL)
return NULL;
}
*dot = '\0';
nameobj = newname;
}
nameobj = PyUnicode_FromUnicode(name, Py_UNICODE_strlen(name));
if (nameobj == NULL)
return NULL;
modules = PyImport_GetModuleDict();
parent = PyDict_GetItem(modules, nameobj);
if (parent == NULL) {
@ -3021,7 +3057,7 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
If this is violated... Who cares? */
return_none:
nameobj = PyUnicode_FromUnicode(NULL, 0);
nameobj = PyUnicode_New(0, 0);
if (nameobj == NULL)
return NULL;
*p_name = nameobj;
@ -3032,28 +3068,28 @@ return_none:
static PyObject *
load_next(PyObject *mod, PyObject *altmod,
PyObject *inputname, PyObject **p_outputname,
Py_UNICODE *buf, Py_ssize_t *p_buflen, Py_ssize_t bufsize)
Py_UCS4 *buf, Py_ssize_t *p_buflen, Py_ssize_t bufsize)
{
const Py_UNICODE *dot;
Py_UCS4 *dot;
Py_ssize_t len;
Py_UNICODE *p;
Py_UCS4 *p;
PyObject *fullname, *name, *result, *mark_name;
const Py_UNICODE *nameuni;
const Py_UCS4 *nameuni;
*p_outputname = NULL;
if (PyUnicode_GET_SIZE(inputname) == 0) {
if (PyUnicode_GET_LENGTH(inputname) == 0) {
/* completely empty module name should only happen in
'from . import' (or '__import__("")')*/
Py_INCREF(mod);
return mod;
}
nameuni = PyUnicode_AS_UNICODE(inputname);
nameuni = PyUnicode_AsUCS4Copy(inputname);
if (nameuni == NULL)
return NULL;
dot = Py_UNICODE_strchr(nameuni, '.');
dot = Py_UCS4_strchr(nameuni, '.');
if (dot != NULL) {
len = dot - nameuni;
if (len == 0) {
@ -3063,7 +3099,7 @@ load_next(PyObject *mod, PyObject *altmod,
}
}
else
len = PyUnicode_GET_SIZE(inputname);
len = PyUnicode_GET_LENGTH(inputname);
if (*p_buflen+len+1 >= bufsize) {
PyErr_SetString(PyExc_ValueError,
@ -3076,14 +3112,16 @@ load_next(PyObject *mod, PyObject *altmod,
*p++ = '.';
*p_buflen += 1;
}
Py_UNICODE_strncpy(p, nameuni, len);
Py_UCS4_strncpy(p, nameuni, len);
p[len] = '\0';
*p_buflen += len;
fullname = PyUnicode_FromUnicode(buf, *p_buflen);
fullname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
buf, *p_buflen);
if (fullname == NULL)
return NULL;
name = PyUnicode_FromUnicode(p, len);
name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
p, len);
if (name == NULL) {
Py_DECREF(fullname);
return NULL;
@ -3096,7 +3134,8 @@ load_next(PyObject *mod, PyObject *altmod,
result = import_submodule(altmod, name, name);
Py_DECREF(name);
if (result != NULL && result != Py_None) {
mark_name = PyUnicode_FromUnicode(buf, *p_buflen);
mark_name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
buf, *p_buflen);
if (mark_name == NULL) {
Py_DECREF(result);
return NULL;
@ -3107,7 +3146,7 @@ load_next(PyObject *mod, PyObject *altmod,
return NULL;
}
Py_DECREF(mark_name);
Py_UNICODE_strncpy(buf, nameuni, len);
Py_UCS4_strncpy(buf, nameuni, len);
buf[len] = '\0';
*p_buflen = len;
}
@ -3125,7 +3164,8 @@ load_next(PyObject *mod, PyObject *altmod,
}
if (dot != NULL) {
*p_outputname = PyUnicode_FromUnicode(dot+1, Py_UNICODE_strlen(dot+1));
*p_outputname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
dot+1, Py_UCS4_strlen(dot+1));
if (*p_outputname == NULL) {
Py_DECREF(result);
return NULL;
@ -3166,7 +3206,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, PyObject *name,
Py_DECREF(item);
return 0;
}
if (PyUnicode_AS_UNICODE(item)[0] == '*') {
if (PyUnicode_READ_CHAR(item, 0) == '*') {
PyObject *all;
Py_DECREF(item);
/* See if the package defines __all__ */
@ -3304,7 +3344,7 @@ PyImport_ReloadModule(PyObject *m)
PyObject *modules = PyImport_GetModuleDict();
PyObject *path_list = NULL, *loader = NULL, *existing_m = NULL;
PyObject *nameobj, *bufobj, *subnameobj;
Py_UNICODE *name, *subname;
Py_UCS4 *name = NULL, *subname;
struct filedescr *fdp;
FILE *fp = NULL;
PyObject *newm = NULL;
@ -3321,7 +3361,7 @@ PyImport_ReloadModule(PyObject *m)
return NULL;
}
nameobj = PyModule_GetNameObject(m);
if (nameobj == NULL)
if (nameobj == NULL || PyUnicode_READY(nameobj) == -1)
return NULL;
if (m != PyDict_GetItem(modules, nameobj)) {
PyErr_Format(PyExc_ImportError,
@ -3343,8 +3383,12 @@ PyImport_ReloadModule(PyObject *m)
return NULL;
}
name = PyUnicode_AS_UNICODE(nameobj);
subname = Py_UNICODE_strrchr(name, '.');
name = PyUnicode_AsUCS4Copy(nameobj);
if (!name) {
Py_DECREF(nameobj);
return NULL;
}
subname = Py_UCS4_strrchr(name, '.');
if (subname == NULL) {
Py_INCREF(nameobj);
subnameobj = nameobj;
@ -3353,7 +3397,8 @@ PyImport_ReloadModule(PyObject *m)
PyObject *parentname, *parent;
Py_ssize_t len;
len = subname - name;
parentname = PyUnicode_FromUnicode(name, len);
parentname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
name, len);
if (parentname == NULL) {
goto error;
}
@ -3370,8 +3415,9 @@ PyImport_ReloadModule(PyObject *m)
if (path_list == NULL)
PyErr_Clear();
subname++;
len = PyUnicode_GET_SIZE(nameobj) - (len + 1);
subnameobj = PyUnicode_FromUnicode(subname, len);
len = PyUnicode_GET_LENGTH(nameobj) - (len + 1);
subnameobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
subname, len);
}
if (subnameobj == NULL)
goto error;
@ -3403,6 +3449,7 @@ PyImport_ReloadModule(PyObject *m)
error:
imp_modules_reloading_clear();
Py_DECREF(nameobj);
PyMem_Free(name);
return newm;
}
@ -3910,9 +3957,7 @@ imp_cache_from_source(PyObject *self, PyObject *args, PyObject *kws)
return NULL;
}
cpathname = make_compiled_pathname(
PyUnicode_AS_UNICODE(pathname),
debug);
cpathname = make_compiled_pathname(pathname, debug);
Py_DECREF(pathname);
if (cpathname == NULL) {
@ -4105,7 +4150,7 @@ NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds)
&pathobj))
return -1;
if (PyUnicode_GET_SIZE(pathobj) == 0) {
if (PyUnicode_GET_LENGTH(pathobj) == 0) {
PyErr_SetString(PyExc_ImportError, "empty pathname");
return -1;
}

View file

@ -311,9 +311,7 @@ w_object(PyObject *v, WFILE *p)
}
else if (PyUnicode_CheckExact(v)) {
PyObject *utf8;
utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
"surrogatepass");
utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
if (utf8 == NULL) {
p->depth--;
p->error = WFERR_UNMARSHALLABLE;

View file

@ -183,24 +183,6 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **ob
break;
case BINARY_SUBSCR:
newconst = PyObject_GetItem(v, w);
/* #5057: if v is unicode, there might be differences between
wide and narrow builds in cases like '\U00012345'[0].
Wide builds will return a non-BMP char, whereas narrow builds
will return a surrogate. In both the cases skip the
optimization in order to produce compatible pycs.
*/
if (newconst != NULL &&
PyUnicode_Check(v) && PyUnicode_Check(newconst)) {
Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0];
#ifdef Py_UNICODE_WIDE
if (ch > 0xFFFF) {
#else
if (ch >= 0xD800 && ch <= 0xDFFF) {
#endif
Py_DECREF(newconst);
return 0;
}
}
break;
case BINARY_LSHIFT:
newconst = PyNumber_Lshift(v, w);

View file

@ -1525,10 +1525,10 @@ symtable_visit_alias(struct symtable *st, alias_ty a)
*/
PyObject *store_name;
PyObject *name = (a->asname == NULL) ? a->name : a->asname;
const Py_UNICODE *base = PyUnicode_AS_UNICODE(name);
Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
if (dot) {
store_name = PyUnicode_FromUnicode(base, dot - base);
Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0,
PyUnicode_GET_LENGTH(name), 1);
if (dot != -1) {
store_name = PyUnicode_Substring(name, 0, dot);
if (!store_name)
return 0;
}

View file

@ -229,8 +229,8 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
PyObject *lineobj = NULL;
PyObject *res;
char buf[MAXPATHLEN+1];
Py_UNICODE *u, *p;
Py_ssize_t len;
int kind;
void *data;
/* open the file */
if (filename == NULL)
@ -285,13 +285,16 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
}
/* remove the indentation of the line */
u = PyUnicode_AS_UNICODE(lineobj);
len = PyUnicode_GET_SIZE(lineobj);
for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
len--;
if (u != p) {
kind = PyUnicode_KIND(lineobj);
data = PyUnicode_DATA(lineobj);
for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
if (ch != ' ' && ch != '\t' && ch != '\014')
break;
}
if (i) {
PyObject *truncated;
truncated = PyUnicode_FromUnicode(p, len);
truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
if (truncated) {
Py_DECREF(lineobj);
lineobj = truncated;
@ -476,13 +479,26 @@ dump_hexadecimal(int width, unsigned long value, int fd)
static void
dump_ascii(int fd, PyObject *text)
{
PyASCIIObject *ascii = (PyASCIIObject *)text;
Py_ssize_t i, size;
int truncated;
Py_UNICODE *u;
char c;
int kind;
void *data;
Py_UCS4 ch;
size = PyUnicode_GET_SIZE(text);
u = PyUnicode_AS_UNICODE(text);
size = ascii->length;
kind = ascii->state.kind;
if (ascii->state.compact) {
if (ascii->state.ascii)
data = ((PyASCIIObject*)text) + 1;
else
data = ((PyCompactUnicodeObject*)text) + 1;
}
else {
data = ((PyUnicodeObject *)text)->data.any;
if (data == NULL)
return;
}
if (MAX_STRING_LENGTH < size) {
size = MAX_STRING_LENGTH;
@ -491,27 +507,28 @@ dump_ascii(int fd, PyObject *text)
else
truncated = 0;
for (i=0; i < size; i++, u++) {
if (*u < 128) {
c = (char)*u;
for (i=0; i < size; i++) {
ch = PyUnicode_READ(kind, data, i);
if (ch < 128) {
char c = (char)ch;
write(fd, &c, 1);
}
else if (*u < 256) {
else if (ch < 256) {
PUTS(fd, "\\x");
dump_hexadecimal(2, *u, fd);
dump_hexadecimal(2, ch, fd);
}
else
#ifdef Py_UNICODE_WIDE
if (*u < 65536)
if (ch < 65536)
#endif
{
PUTS(fd, "\\u");
dump_hexadecimal(4, *u, fd);
dump_hexadecimal(4, ch, fd);
#ifdef Py_UNICODE_WIDE
}
else {
PUTS(fd, "\\U");
dump_hexadecimal(8, *u, fd);
dump_hexadecimal(8, ch, fd);
#endif
}
}
@ -542,7 +559,7 @@ dump_frame(int fd, PyFrameObject *frame)
}
/* PyFrame_GetLineNumber() was introduced in Python 2.7.0 and 3.2.0 */
lineno = PyCode_Addr2Line(frame->f_code, frame->f_lasti);
lineno = PyCode_Addr2Line(code, frame->f_lasti);
PUTS(fd, ", line ");
dump_decimal(fd, lineno);
PUTS(fd, " in ");