mirror of
https://github.com/python/cpython.git
synced 2025-11-25 04:34:37 +00:00
Decode and encode underscores for header style encoding. Fixes bug #463996.
This commit is contained in:
parent
5f12d755a8
commit
16dc7f44b1
6 changed files with 391 additions and 18 deletions
|
|
@ -42,6 +42,15 @@
|
|||
** does make the performance sub-optimal. Oh well, too bad...
|
||||
**
|
||||
** Jack Jansen, CWI, July 1995.
|
||||
**
|
||||
** Added support for quoted-printable encoding, based on rfc 1521 et al
|
||||
** quoted-printable encoding specifies that non printable characters (anything
|
||||
** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
|
||||
** of the character. It also specifies some other behavior to enable 8bit data
|
||||
** in a mail message with little difficulty (maximum line sizes, protecting
|
||||
** some cases of whitespace, etc).
|
||||
**
|
||||
** Brandon Long, September 2001.
|
||||
*/
|
||||
|
||||
|
||||
|
|
@ -971,6 +980,289 @@ static char doc_unhexlify[] =
|
|||
hexstr must contain an even number of hex digits (upper or lower case).\n\
|
||||
This function is also available as \"unhexlify()\"";
|
||||
|
||||
static int table_hex[128] = {
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
|
||||
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
|
||||
};
|
||||
|
||||
#define hexval(c) table_hex[(unsigned int)(c)]
|
||||
|
||||
#define MAXLINESIZE 76
|
||||
|
||||
static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
|
||||
|
||||
static PyObject*
|
||||
binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
unsigned int in, out;
|
||||
char ch;
|
||||
unsigned char *data, *odata;
|
||||
unsigned int datalen = 0;
|
||||
PyObject *rv;
|
||||
static char *kwlist[] = {"data", "header", NULL};
|
||||
int header = 0;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
|
||||
&datalen, &header))
|
||||
return NULL;
|
||||
|
||||
/* We allocate the output same size as input, this is overkill */
|
||||
odata = (char *) calloc(1, datalen);
|
||||
|
||||
if (odata == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
in = out = 0;
|
||||
while (in < datalen) {
|
||||
if (data[in] == '=') {
|
||||
in++;
|
||||
if (in >= datalen) break;
|
||||
/* Soft line breaks */
|
||||
if ((data[in] == '\n') || (data[in] == '\r') ||
|
||||
(data[in] == ' ') || (data[in] == '\t')) {
|
||||
if (data[in] != '\n') {
|
||||
while (in < datalen && data[in] != '\n') in++;
|
||||
}
|
||||
if (in < datalen) in++;
|
||||
}
|
||||
else if (data[in] == '=') {
|
||||
/* broken case from broken python qp */
|
||||
odata[out++] = '=';
|
||||
in++;
|
||||
}
|
||||
else if (((data[in] >= 'A' && data[in] <= 'F') ||
|
||||
(data[in] >= 'a' && data[in] <= 'f') ||
|
||||
(data[in] >= '0' && data[in] <= '9')) &&
|
||||
((data[in+1] >= 'A' && data[in+1] <= 'F') ||
|
||||
(data[in+1] >= 'a' && data[in+1] <= 'f') ||
|
||||
(data[in+1] >= '0' && data[in+1] <= '9'))) {
|
||||
/* hexval */
|
||||
ch = hexval(data[in]) << 4;
|
||||
in++;
|
||||
ch |= hexval(data[in]);
|
||||
in++;
|
||||
odata[out++] = ch;
|
||||
}
|
||||
else {
|
||||
odata[out++] = '=';
|
||||
}
|
||||
}
|
||||
else if (header && data[in] == '_') {
|
||||
odata[out++] = ' ';
|
||||
in++;
|
||||
}
|
||||
else {
|
||||
odata[out] = data[in];
|
||||
in++;
|
||||
out++;
|
||||
}
|
||||
}
|
||||
if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
|
||||
free (odata);
|
||||
return NULL;
|
||||
}
|
||||
free (odata);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static int
|
||||
to_hex (unsigned char ch, unsigned char *s)
|
||||
{
|
||||
unsigned int uvalue = ch;
|
||||
|
||||
s[1] = "0123456789ABCDEF"[uvalue % 16];
|
||||
uvalue = (uvalue / 16);
|
||||
s[0] = "0123456789ABCDEF"[uvalue % 16];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char doc_b2a_qp[] =
|
||||
"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
|
||||
Encode a string using quoted-printable encoding. \n\
|
||||
\n\
|
||||
On encoding, when istext is set, newlines are not encoded, and white \n\
|
||||
space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
|
||||
both encoded. When quotetabs is set, space and tabs are encoded.";
|
||||
|
||||
/* XXX: This is ridiculously complicated to be backward compatible
|
||||
* (mostly) with the quopri module. It doesn't re-create the quopri
|
||||
* module bug where text ending in CRLF has the CR encoded */
|
||||
static PyObject*
|
||||
binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
unsigned int in, out;
|
||||
unsigned char *data, *odata;
|
||||
unsigned int datalen = 0, odatalen = 0;
|
||||
PyObject *rv;
|
||||
unsigned int linelen = 0;
|
||||
static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
|
||||
int istext = 1;
|
||||
int quotetabs = 0;
|
||||
int header = 0;
|
||||
unsigned char ch;
|
||||
int crlf = 0;
|
||||
unsigned char *p;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
|
||||
&datalen, "etabs, &istext, &header))
|
||||
return NULL;
|
||||
|
||||
/* See if this string is using CRLF line ends */
|
||||
/* XXX: this function has the side effect of converting all of
|
||||
* the end of lines to be the same depending on this detection
|
||||
* here */
|
||||
p = strchr(data, '\n');
|
||||
if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
|
||||
crlf = 1;
|
||||
|
||||
/* First, scan to see how many characters need to be encoded */
|
||||
in = 0;
|
||||
while (in < datalen) {
|
||||
if ((data[in] > 126) ||
|
||||
(data[in] == '=') ||
|
||||
(header && data[in] == '_') ||
|
||||
((data[in] == '.') && (linelen == 1)) ||
|
||||
(!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
|
||||
((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
|
||||
((data[in] < 33) &&
|
||||
(data[in] != '\r') && (data[in] != '\n') &&
|
||||
(quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
|
||||
{
|
||||
if ((linelen + 3) >= MAXLINESIZE) {
|
||||
linelen = 0;
|
||||
if (crlf)
|
||||
odatalen += 3;
|
||||
else
|
||||
odatalen += 2;
|
||||
}
|
||||
linelen += 3;
|
||||
odatalen += 3;
|
||||
in++;
|
||||
}
|
||||
else {
|
||||
if (istext &&
|
||||
((data[in] == '\n') ||
|
||||
((in+1 < datalen) && (data[in] == '\r') &&
|
||||
(data[in+1] == '\n'))))
|
||||
{
|
||||
linelen = 0;
|
||||
/* Protect against whitespace on end of line */
|
||||
if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
|
||||
odatalen += 2;
|
||||
if (crlf)
|
||||
odatalen += 2;
|
||||
else
|
||||
odatalen += 1;
|
||||
if (data[in] == '\r')
|
||||
in += 2;
|
||||
else
|
||||
in++;
|
||||
}
|
||||
else {
|
||||
if ((in + 1 != datalen) &&
|
||||
(data[in+1] != '\n') &&
|
||||
(linelen + 1) >= MAXLINESIZE) {
|
||||
linelen = 0;
|
||||
if (crlf)
|
||||
odatalen += 3;
|
||||
else
|
||||
odatalen += 2;
|
||||
}
|
||||
linelen++;
|
||||
odatalen++;
|
||||
in++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
odata = (char *) calloc(1, odatalen);
|
||||
|
||||
if (odata == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
in = out = linelen = 0;
|
||||
while (in < datalen) {
|
||||
if ((data[in] > 126) ||
|
||||
(data[in] == '=') ||
|
||||
(header && data[in] == '_') ||
|
||||
((data[in] == '.') && (linelen == 1)) ||
|
||||
(!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
|
||||
((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
|
||||
((data[in] < 33) &&
|
||||
(data[in] != '\r') && (data[in] != '\n') &&
|
||||
(quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
|
||||
{
|
||||
if ((linelen + 3 )>= MAXLINESIZE) {
|
||||
odata[out++] = '=';
|
||||
if (crlf) odata[out++] = '\r';
|
||||
odata[out++] = '\n';
|
||||
linelen = 0;
|
||||
}
|
||||
odata[out++] = '=';
|
||||
to_hex(data[in], &odata[out]);
|
||||
out += 2;
|
||||
in++;
|
||||
linelen += 3;
|
||||
}
|
||||
else {
|
||||
if (istext &&
|
||||
((data[in] == '\n') ||
|
||||
((in+1 < datalen) && (data[in] == '\r') &&
|
||||
(data[in+1] == '\n'))))
|
||||
{
|
||||
linelen = 0;
|
||||
/* Protect against whitespace on end of line */
|
||||
if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
|
||||
ch = odata[out-1];
|
||||
odata[out-1] = '=';
|
||||
to_hex(ch, &odata[out]);
|
||||
out += 2;
|
||||
}
|
||||
|
||||
if (crlf) odata[out++] = '\r';
|
||||
odata[out++] = '\n';
|
||||
if (data[in] == '\r')
|
||||
in += 2;
|
||||
else
|
||||
in++;
|
||||
}
|
||||
else {
|
||||
if ((in + 1 != datalen) &&
|
||||
(data[in+1] != '\n') &&
|
||||
(linelen + 1) >= MAXLINESIZE) {
|
||||
odata[out++] = '=';
|
||||
if (crlf) odata[out++] = '\r';
|
||||
odata[out++] = '\n';
|
||||
linelen = 0;
|
||||
}
|
||||
linelen++;
|
||||
if (header && data[in] == ' ') {
|
||||
odata[out++] = '_';
|
||||
in++;
|
||||
}
|
||||
else {
|
||||
odata[out++] = data[in++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
|
||||
free (odata);
|
||||
return NULL;
|
||||
}
|
||||
free (odata);
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* List of functions defined in the module */
|
||||
|
||||
|
|
@ -990,6 +1282,10 @@ static struct PyMethodDef binascii_module_methods[] = {
|
|||
doc_rledecode_hqx},
|
||||
{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
|
||||
{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
|
||||
{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
|
||||
doc_a2b_qp},
|
||||
{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
|
||||
doc_b2a_qp},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue