Patch #462190, patch #464070: Support quoted printable in the binascii module.

Decode and encode underscores for header style encoding. Fixes bug #463996.
This commit is contained in:
Martin v. Löwis 2001-09-30 20:32:11 +00:00
parent 5f12d755a8
commit 16dc7f44b1
6 changed files with 391 additions and 18 deletions

View file

@ -42,6 +42,15 @@
** does make the performance sub-optimal. Oh well, too bad...
**
** Jack Jansen, CWI, July 1995.
**
** Added support for quoted-printable encoding, based on rfc 1521 et al
** quoted-printable encoding specifies that non printable characters (anything
** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
** of the character. It also specifies some other behavior to enable 8bit data
** in a mail message with little difficulty (maximum line sizes, protecting
** some cases of whitespace, etc).
**
** Brandon Long, September 2001.
*/
@ -971,6 +980,289 @@ static char doc_unhexlify[] =
hexstr must contain an even number of hex digits (upper or lower case).\n\
This function is also available as \"unhexlify()\"";
static int table_hex[128] = {
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
};
#define hexval(c) table_hex[(unsigned int)(c)]
#define MAXLINESIZE 76
static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
static PyObject*
binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
{
unsigned int in, out;
char ch;
unsigned char *data, *odata;
unsigned int datalen = 0;
PyObject *rv;
static char *kwlist[] = {"data", "header", NULL};
int header = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
&datalen, &header))
return NULL;
/* We allocate the output same size as input, this is overkill */
odata = (char *) calloc(1, datalen);
if (odata == NULL) {
PyErr_NoMemory();
return NULL;
}
in = out = 0;
while (in < datalen) {
if (data[in] == '=') {
in++;
if (in >= datalen) break;
/* Soft line breaks */
if ((data[in] == '\n') || (data[in] == '\r') ||
(data[in] == ' ') || (data[in] == '\t')) {
if (data[in] != '\n') {
while (in < datalen && data[in] != '\n') in++;
}
if (in < datalen) in++;
}
else if (data[in] == '=') {
/* broken case from broken python qp */
odata[out++] = '=';
in++;
}
else if (((data[in] >= 'A' && data[in] <= 'F') ||
(data[in] >= 'a' && data[in] <= 'f') ||
(data[in] >= '0' && data[in] <= '9')) &&
((data[in+1] >= 'A' && data[in+1] <= 'F') ||
(data[in+1] >= 'a' && data[in+1] <= 'f') ||
(data[in+1] >= '0' && data[in+1] <= '9'))) {
/* hexval */
ch = hexval(data[in]) << 4;
in++;
ch |= hexval(data[in]);
in++;
odata[out++] = ch;
}
else {
odata[out++] = '=';
}
}
else if (header && data[in] == '_') {
odata[out++] = ' ';
in++;
}
else {
odata[out] = data[in];
in++;
out++;
}
}
if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
free (odata);
return NULL;
}
free (odata);
return rv;
}
static int
to_hex (unsigned char ch, unsigned char *s)
{
unsigned int uvalue = ch;
s[1] = "0123456789ABCDEF"[uvalue % 16];
uvalue = (uvalue / 16);
s[0] = "0123456789ABCDEF"[uvalue % 16];
return 0;
}
static char doc_b2a_qp[] =
"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
Encode a string using quoted-printable encoding. \n\
\n\
On encoding, when istext is set, newlines are not encoded, and white \n\
space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
both encoded. When quotetabs is set, space and tabs are encoded.";
/* XXX: This is ridiculously complicated to be backward compatible
* (mostly) with the quopri module. It doesn't re-create the quopri
* module bug where text ending in CRLF has the CR encoded */
static PyObject*
binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
{
unsigned int in, out;
unsigned char *data, *odata;
unsigned int datalen = 0, odatalen = 0;
PyObject *rv;
unsigned int linelen = 0;
static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
int istext = 1;
int quotetabs = 0;
int header = 0;
unsigned char ch;
int crlf = 0;
unsigned char *p;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
&datalen, &quotetabs, &istext, &header))
return NULL;
/* See if this string is using CRLF line ends */
/* XXX: this function has the side effect of converting all of
* the end of lines to be the same depending on this detection
* here */
p = strchr(data, '\n');
if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
crlf = 1;
/* First, scan to see how many characters need to be encoded */
in = 0;
while (in < datalen) {
if ((data[in] > 126) ||
(data[in] == '=') ||
(header && data[in] == '_') ||
((data[in] == '.') && (linelen == 1)) ||
(!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
((data[in] < 33) &&
(data[in] != '\r') && (data[in] != '\n') &&
(quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
{
if ((linelen + 3) >= MAXLINESIZE) {
linelen = 0;
if (crlf)
odatalen += 3;
else
odatalen += 2;
}
linelen += 3;
odatalen += 3;
in++;
}
else {
if (istext &&
((data[in] == '\n') ||
((in+1 < datalen) && (data[in] == '\r') &&
(data[in+1] == '\n'))))
{
linelen = 0;
/* Protect against whitespace on end of line */
if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
odatalen += 2;
if (crlf)
odatalen += 2;
else
odatalen += 1;
if (data[in] == '\r')
in += 2;
else
in++;
}
else {
if ((in + 1 != datalen) &&
(data[in+1] != '\n') &&
(linelen + 1) >= MAXLINESIZE) {
linelen = 0;
if (crlf)
odatalen += 3;
else
odatalen += 2;
}
linelen++;
odatalen++;
in++;
}
}
}
odata = (char *) calloc(1, odatalen);
if (odata == NULL) {
PyErr_NoMemory();
return NULL;
}
in = out = linelen = 0;
while (in < datalen) {
if ((data[in] > 126) ||
(data[in] == '=') ||
(header && data[in] == '_') ||
((data[in] == '.') && (linelen == 1)) ||
(!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
((data[in] < 33) &&
(data[in] != '\r') && (data[in] != '\n') &&
(quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
{
if ((linelen + 3 )>= MAXLINESIZE) {
odata[out++] = '=';
if (crlf) odata[out++] = '\r';
odata[out++] = '\n';
linelen = 0;
}
odata[out++] = '=';
to_hex(data[in], &odata[out]);
out += 2;
in++;
linelen += 3;
}
else {
if (istext &&
((data[in] == '\n') ||
((in+1 < datalen) && (data[in] == '\r') &&
(data[in+1] == '\n'))))
{
linelen = 0;
/* Protect against whitespace on end of line */
if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
ch = odata[out-1];
odata[out-1] = '=';
to_hex(ch, &odata[out]);
out += 2;
}
if (crlf) odata[out++] = '\r';
odata[out++] = '\n';
if (data[in] == '\r')
in += 2;
else
in++;
}
else {
if ((in + 1 != datalen) &&
(data[in+1] != '\n') &&
(linelen + 1) >= MAXLINESIZE) {
odata[out++] = '=';
if (crlf) odata[out++] = '\r';
odata[out++] = '\n';
linelen = 0;
}
linelen++;
if (header && data[in] == ' ') {
odata[out++] = '_';
in++;
}
else {
odata[out++] = data[in++];
}
}
}
}
if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
free (odata);
return NULL;
}
free (odata);
return rv;
}
/* List of functions defined in the module */
@ -990,6 +1282,10 @@ static struct PyMethodDef binascii_module_methods[] = {
doc_rledecode_hqx},
{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
doc_a2b_qp},
{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
doc_b2a_qp},
{NULL, NULL} /* sentinel */
};