Issue20284: Implement PEP461

This commit is contained in:
Ethan Furman 2015-01-23 20:05:18 -08:00
parent 8861502e07
commit b95b56150f
10 changed files with 1185 additions and 158 deletions

View file

@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject *format_spec)
Py_DECREF(meth);
if (result && !PyUnicode_Check(result)) {
PyErr_SetString(PyExc_TypeError,
"__format__ method did not return string");
PyErr_Format(PyExc_TypeError,
"__format__ must return a str, not %.200s",
Py_TYPE(result)->tp_name);
Py_DECREF(result);
result = NULL;
goto done;

View file

@ -4,6 +4,7 @@
#include "Python.h"
#include "structmember.h"
#include "bytes_methods.h"
#include "bytesobject.h"
/*[clinic input]
class bytearray "PyByteArrayObject *" "&PyByteArray_Type"
@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
return (PyObject *)result;
}
static PyObject *
bytearray_format(PyByteArrayObject *self, PyObject *args)
{
PyObject *bytes_in, *bytes_out, *res;
char *bytestring;
if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
bytestring = PyByteArray_AS_STRING(self);
bytes_in = PyBytes_FromString(bytestring);
if (bytes_in == NULL)
return NULL;
bytes_out = _PyBytes_Format(bytes_in, args);
Py_DECREF(bytes_in);
if (bytes_out == NULL)
return NULL;
res = PyByteArray_FromObject(bytes_out);
Py_DECREF(bytes_out);
if (res == NULL)
return NULL;
return res;
}
/* Functions stuffed into the type object */
static Py_ssize_t
@ -3723,6 +3749,21 @@ bytearray_methods[] = {
{NULL}
};
static PyObject *
bytearray_mod(PyObject *v, PyObject *w)
{
if (!PyByteArray_Check(v))
Py_RETURN_NOTIMPLEMENTED;
return bytearray_format((PyByteArrayObject *)v, w);
}
static PyNumberMethods bytearray_as_number = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
bytearray_mod, /*nb_remainder*/
};
PyDoc_STRVAR(bytearray_doc,
"bytearray(iterable_of_ints) -> bytearray\n\
bytearray(string, encoding[, errors]) -> bytearray\n\
@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytearray_repr, /* tp_repr */
0, /* tp_as_number */
&bytearray_as_number, /* tp_as_number */
&bytearray_as_sequence, /* tp_as_sequence */
&bytearray_as_mapping, /* tp_as_mapping */
0, /* tp_hash */

View file

@ -400,6 +400,634 @@ PyBytes_FromFormat(const char *format, ...)
return ret;
}
/* Helpers for formatstring */
Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
{
Py_ssize_t argidx = *p_argidx;
if (argidx < arglen) {
(*p_argidx)++;
if (arglen < 0)
return args;
else
return PyTuple_GetItem(args, argidx);
}
PyErr_SetString(PyExc_TypeError,
"not enough arguments for format string");
return NULL;
}
/* Format codes
* F_LJUST '-'
* F_SIGN '+'
* F_BLANK ' '
* F_ALT '#'
* F_ZERO '0'
*/
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)
#define F_BLANK (1<<2)
#define F_ALT (1<<3)
#define F_ZERO (1<<4)
/* Returns a new reference to a PyBytes object, or NULL on failure. */
static PyObject *
formatfloat(PyObject *v, int flags, int prec, int type)
{
char *p;
PyObject *result;
double x;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) {
PyErr_Format(PyExc_TypeError, "float argument required, "
"not %.200s", Py_TYPE(v)->tp_name);
return NULL;
}
if (prec < 0)
prec = 6;
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
if (p == NULL)
return NULL;
result = PyBytes_FromStringAndSize(p, strlen(p));
PyMem_Free(p);
return result;
}
/* format_long emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
* Return value: a new PyBytes*, or NULL if error.
* . *pbuf is set to point into it,
* *plen set to the # of chars following that.
* Caller must decref it when done using pbuf.
* The string starting at *pbuf is of the form
* "-"? ("0x" | "0X")? digit+
* "0x"/"0X" are present only for x and X conversions, with F_ALT
* set in flags. The case of hex digits will be correct,
* There will be at least prec digits, zero-filled on the left if
* necessary to get that many.
* val object to be converted
* flags bitmask of format flags; only F_ALT is looked at
* prec minimum number of digits; 0-fill on left if needed
* type a character in [duoxX]; u acts the same as d
*
* CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints.
*/
static PyObject *
format_long(PyObject *val, int flags, int prec, int type,
char **pbuf, int *plen)
{
PyObject *s;
PyObject *result = NULL;
s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
if (!s)
return NULL;
result = _PyUnicode_AsASCIIString(s, "strict");
Py_DECREF(s);
if (!result)
return NULL;
*pbuf = PyBytes_AS_STRING(result);
*plen = PyBytes_GET_SIZE(result);
return result;
}
Py_LOCAL_INLINE(int)
formatchar(char *buf, size_t buflen, PyObject *v)
{
PyObject *w = NULL;
/* convert bytearray to bytes */
if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);
if (w == NULL)
goto error;
v = w;
}
/* presume that the buffer is at least 2 characters long */
if (PyBytes_Check(v)) {
if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
goto error;
}
else {
long ival = PyLong_AsLong(v);
if (ival == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"%c requires an integer in range(256) or a single byte");
goto error;
}
if (ival < 0 || ival > 255) {
PyErr_SetString(PyExc_TypeError,
"%c requires an integer in range(256) or a single byte");
goto error;
}
buf[0] = ival;
}
Py_XDECREF(w);
buf[1] = '\0';
return 1;
error:
Py_XDECREF(w);
return -1;
}
static PyObject *
format_obj(PyObject *v)
{
PyObject *result = NULL, *w = NULL;
PyObject *func;
_Py_IDENTIFIER(__bytes__);
/* convert bytearray to bytes */
if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);
if (w == NULL)
return NULL;
v = w;
}
/* is it a bytes object? */
if (PyBytes_Check(v)) {
result = v;
Py_INCREF(v);
Py_XDECREF(w);
return result;
}
/* does it support __bytes__? */
func = _PyObject_LookupSpecial(v, &PyId___bytes__);
if (func != NULL) {
result = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
if (result == NULL)
return NULL;
if (!PyBytes_Check(result)) {
PyErr_Format(PyExc_TypeError,
"__bytes__ returned non-bytes (type %.200s)",
Py_TYPE(result)->tp_name);
Py_DECREF(result);
return NULL;
}
return result;
}
PyErr_Format(PyExc_TypeError,
"%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
Py_TYPE(v)->tp_name);
return NULL;
}
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
FORMATBUFLEN is the length of the buffer in which the ints &
chars are formatted. XXX This is a magic number. Each formatting
routine does bounds checking to ensure no overflow, but a better
solution may be to malloc a buffer of appropriate size for each
format. For now, the current solution is sufficient.
*/
#define FORMATBUFLEN (size_t)120
PyObject *
_PyBytes_Format(PyObject *format, PyObject *args)
{
char *fmt, *res;
Py_ssize_t arglen, argidx;
Py_ssize_t reslen, rescnt, fmtcnt;
int args_owned = 0;
PyObject *result;
PyObject *repr;
PyObject *dict = NULL;
if (format == NULL || !PyBytes_Check(format) || args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
fmt = PyBytes_AS_STRING(format);
fmtcnt = PyBytes_GET_SIZE(format);
reslen = rescnt = fmtcnt + 100;
result = PyBytes_FromStringAndSize((char *)NULL, reslen);
if (result == NULL)
return NULL;
res = PyBytes_AsString(result);
if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);
argidx = 0;
}
else {
arglen = -1;
argidx = -2;
}
if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
!PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
!PyByteArray_Check(args)) {
dict = args;
}
while (--fmtcnt >= 0) {
if (*fmt != '%') {
if (--rescnt < 0) {
rescnt = fmtcnt + 100;
reslen += rescnt;
if (_PyBytes_Resize(&result, reslen))
return NULL;
res = PyBytes_AS_STRING(result)
+ reslen - rescnt;
--rescnt;
}
*res++ = *fmt++;
}
else {
/* Got a format specifier */
int flags = 0;
Py_ssize_t width = -1;
int prec = -1;
int c = '\0';
int fill;
int isnumok;
PyObject *v = NULL;
PyObject *temp = NULL;
Py_buffer buf;
char *pbuf;
int sign;
Py_ssize_t len;
char formatbuf[FORMATBUFLEN];
/* For format{int,char}() */
buf.obj = NULL;
fmt++;
if (*fmt == '(') {
char *keystart;
Py_ssize_t keylen;
PyObject *key;
int pcount = 1;
if (dict == NULL) {
PyErr_SetString(PyExc_TypeError,
"format requires a mapping");
goto error;
}
++fmt;
--fmtcnt;
keystart = fmt;
/* Skip over balanced parentheses */
while (pcount > 0 && --fmtcnt >= 0) {
if (*fmt == ')')
--pcount;
else if (*fmt == '(')
++pcount;
fmt++;
}
keylen = fmt - keystart - 1;
if (fmtcnt < 0 || pcount > 0) {
PyErr_SetString(PyExc_ValueError,
"incomplete format key");
goto error;
}
key = PyBytes_FromStringAndSize(keystart,
keylen);
if (key == NULL)
goto error;
if (args_owned) {
Py_DECREF(args);
args_owned = 0;
}
args = PyObject_GetItem(dict, key);
Py_DECREF(key);
if (args == NULL) {
goto error;
}
args_owned = 1;
arglen = -1;
argidx = -2;
}
while (--fmtcnt >= 0) {
switch (c = *fmt++) {
case '-': flags |= F_LJUST; continue;
case '+': flags |= F_SIGN; continue;
case ' ': flags |= F_BLANK; continue;
case '#': flags |= F_ALT; continue;
case '0': flags |= F_ZERO; continue;
}
break;
}
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyLong_Check(v)) {
PyErr_SetString(PyExc_TypeError,
"* wants int");
goto error;
}
width = PyLong_AsSsize_t(v);
if (width == -1 && PyErr_Occurred())
goto error;
if (width < 0) {
flags |= F_LJUST;
width = -width;
}
if (--fmtcnt >= 0)
c = *fmt++;
}
else if (c >= 0 && isdigit(c)) {
width = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
break;
if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(
PyExc_ValueError,
"width too big");
goto error;
}
width = width*10 + (c - '0');
}
}
if (c == '.') {
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
if (!PyLong_Check(v)) {
PyErr_SetString(
PyExc_TypeError,
"* wants int");
goto error;
}
prec = PyLong_AsSsize_t(v);
if (prec == -1 && PyErr_Occurred())
goto error;
if (prec < 0)
prec = 0;
if (--fmtcnt >= 0)
c = *fmt++;
}
else if (c >= 0 && isdigit(c)) {
prec = c - '0';
while (--fmtcnt >= 0) {
c = Py_CHARMASK(*fmt++);
if (!isdigit(c))
break;
if (prec > (INT_MAX - ((int)c - '0')) / 10) {
PyErr_SetString(
PyExc_ValueError,
"prec too big");
goto error;
}
prec = prec*10 + (c - '0');
}
}
} /* prec */
if (fmtcnt >= 0) {
if (c == 'h' || c == 'l' || c == 'L') {
if (--fmtcnt >= 0)
c = *fmt++;
}
}
if (fmtcnt < 0) {
PyErr_SetString(PyExc_ValueError,
"incomplete format");
goto error;
}
if (c != '%') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto error;
}
sign = 0;
fill = ' ';
switch (c) {
case '%':
pbuf = "%";
len = 1;
break;
case 'a':
temp = PyObject_Repr(v);
if (temp == NULL)
goto error;
repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
if (repr == NULL) {
Py_DECREF(temp);
goto error;
}
if (_getbuffer(repr, &buf) < 0) {
temp = format_obj(repr);
if (temp == NULL) {
Py_DECREF(repr);
goto error;
}
Py_DECREF(repr);
repr = temp;
}
pbuf = PyBytes_AS_STRING(repr);
len = PyBytes_GET_SIZE(repr);
Py_DECREF(repr);
if (prec >= 0 && len > prec)
len = prec;
break;
case 's':
// %s is only for 2/3 code; 3 only code should use %b
case 'b':
temp = format_obj(v);
if (temp == NULL)
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
if (prec >= 0 && len > prec)
len = prec;
break;
case 'i':
case 'd':
case 'u':
case 'o':
case 'x':
case 'X':
if (c == 'i')
c = 'd';
isnumok = 0;
if (PyNumber_Check(v)) {
PyObject *iobj=NULL;
if ((PyLong_Check(v))) {
iobj = v;
Py_INCREF(iobj);
}
else {
iobj = PyNumber_Long(v);
}
if (iobj!=NULL) {
if (PyLong_Check(iobj)) {
int ilen;
isnumok = 1;
temp = format_long(iobj, flags, prec, c,
&pbuf, &ilen);
Py_DECREF(iobj);
len = ilen;
if (!temp)
goto error;
sign = 1;
}
else {
Py_DECREF(iobj);
}
}
}
if (!isnumok) {
PyErr_Format(PyExc_TypeError,
"%%%c format: a number is required, "
"not %.200s", c, Py_TYPE(v)->tp_name);
goto error;
}
if (flags & F_ZERO)
fill = '0';
break;
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
temp = formatfloat(v, flags, prec, c);
if (temp == NULL)
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
sign = 1;
if (flags & F_ZERO)
fill = '0';
break;
case 'c':
pbuf = formatbuf;
len = formatchar(pbuf, sizeof(formatbuf), v);
if (len < 0)
goto error;
break;
default:
PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) "
"at index %zd",
c, c,
(Py_ssize_t)(fmt - 1 -
PyBytes_AsString(format)));
goto error;
}
if (sign) {
if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++;
len--;
}
else if (flags & F_SIGN)
sign = '+';
else if (flags & F_BLANK)
sign = ' ';
else
sign = 0;
}
if (width < len)
width = len;
if (rescnt - (sign != 0) < width) {
reslen -= rescnt;
rescnt = width + fmtcnt + 100;
reslen += rescnt;
if (reslen < 0) {
Py_DECREF(result);
PyBuffer_Release(&buf);
Py_XDECREF(temp);
return PyErr_NoMemory();
}
if (_PyBytes_Resize(&result, reslen)) {
PyBuffer_Release(&buf);
Py_XDECREF(temp);
return NULL;
}
res = PyBytes_AS_STRING(result)
+ reslen - rescnt;
}
if (sign) {
if (fill != ' ')
*res++ = sign;
rescnt--;
if (width > len)
width--;
}
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
if (fill != ' ') {
*res++ = *pbuf++;
*res++ = *pbuf++;
}
rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
if (fill == ' ') {
if (sign)
*res++ = sign;
if ((flags & F_ALT) &&
(c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
*res++ = *pbuf++;
*res++ = *pbuf++;
}
}
Py_MEMCPY(res, pbuf, len);
res += len;
rescnt -= len;
while (--width >= len) {
--rescnt;
*res++ = ' ';
}
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
PyBuffer_Release(&buf);
Py_XDECREF(temp);
goto error;
}
PyBuffer_Release(&buf);
Py_XDECREF(temp);
} /* '%' */
} /* until end */
if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
goto error;
}
if (args_owned) {
Py_DECREF(args);
}
if (_PyBytes_Resize(&result, reslen - rescnt))
return NULL;
return result;
error:
Py_DECREF(result);
if (args_owned) {
Py_DECREF(args);
}
return NULL;
}
/* =-= */
static void
bytes_dealloc(PyObject *op)
{
@ -2995,6 +3623,21 @@ bytes_methods[] = {
{NULL, NULL} /* sentinel */
};
static PyObject *
bytes_mod(PyObject *v, PyObject *w)
{
if (!PyBytes_Check(v))
Py_RETURN_NOTIMPLEMENTED;
return _PyBytes_Format(v, w);
}
static PyNumberMethods bytes_as_number = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
bytes_mod, /*nb_remainder*/
};
static PyObject *
str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = {
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)bytes_repr, /* tp_repr */
0, /* tp_as_number */
&bytes_as_number, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */
(hashfunc)bytes_hash, /* tp_hash */
@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
}
/* The following function breaks the notion that strings are immutable:
it changes the size of a string. We get away with this only if there
/* The following function breaks the notion that bytes are immutable:
it changes the size of a bytes object. We get away with this only if there
is only one module referencing the object. You can also think of it
as creating a new string object and destroying the old one, only
more efficiently. In any case, don't use this if the string may
as creating a new bytes object and destroying the old one, only
more efficiently. In any case, don't use this if the bytes object may
already be known to some other part of the code...
Note that if there's not enough memory to resize the string, the original
string object at *pv is deallocated, *pv is set to NULL, an "out of
Note that if there's not enough memory to resize the bytes object, the
original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
memory" exception is set, and -1 is returned. Else (on success) 0 is
returned, and the value in *pv may or may not be the same as on input.
As always, an extra byte is allocated for a trailing \0 byte (newsize

View file

@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
* CAUTION: o, x and X conversions on regular ints can never
* produce a '-' sign, but can for Python's unbounded ints.
*/
static PyObject*
formatlong(PyObject *val, struct unicode_format_arg_t *arg)
PyObject *
_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
{
PyObject *result = NULL;
char *buf;
@ -13904,8 +13904,6 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
Py_ssize_t llen;
int numdigits; /* len == numnondigits + numdigits */
int numnondigits = 0;
int prec = arg->prec;
int type = arg->ch;
/* Avoid exceeding SSIZE_T_MAX */
if (prec > INT_MAX-3) {
@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
if (llen > INT_MAX) {
Py_DECREF(result);
PyErr_SetString(PyExc_ValueError,
"string too large in _PyBytes_FormatLong");
"string too large in _PyUnicode_FormatLong");
return NULL;
}
len = (int)llen;
@ -13964,7 +13962,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg)
assert(numdigits > 0);
/* Get rid of base marker unless F_ALT */
if (((arg->flags & F_ALT) == 0 &&
if (((alt) == 0 &&
(type == 'o' || type == 'x' || type == 'X'))) {
assert(buf[sign] == '0');
assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v,
return 1;
}
res = formatlong(iobj, arg);
res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
Py_DECREF(iobj);
if (res == NULL)
return -1;