mirror of
https://github.com/python/cpython.git
synced 2025-11-01 18:51:43 +00:00
parse(), _parseheaders(), _parsebody(): A fix for SF bug #633527,
where in lax parsing, the first non-header line after a header block (e.g. the first line not containing a colon, and not a continuation), can be treated as the first body line, even without the RFC mandated blank line separator. rfc822 had this behavior, and I vaguely remember problems with this, but can't remember details. In any event, all the tests still pass, so I guess we'll find out. ;/ This patch works by returning the non-header, non-continuation line from _parseheader() and using that as the first header line prepended to fp.read() if given. It's usually None. We use this approach instead of trying to seek/tell the file-like object.
This commit is contained in:
parent
a0a00761a5
commit
da2525ed2a
1 changed files with 22 additions and 9 deletions
|
|
@ -59,9 +59,9 @@ class Parser:
|
||||||
meaning it parses the entire contents of the file.
|
meaning it parses the entire contents of the file.
|
||||||
"""
|
"""
|
||||||
root = self._class()
|
root = self._class()
|
||||||
self._parseheaders(root, fp)
|
firstbodyline = self._parseheaders(root, fp)
|
||||||
if not headersonly:
|
if not headersonly:
|
||||||
self._parsebody(root, fp)
|
self._parsebody(root, fp, firstbodyline)
|
||||||
return root
|
return root
|
||||||
|
|
||||||
def parsestr(self, text, headersonly=False):
|
def parsestr(self, text, headersonly=False):
|
||||||
|
|
@ -80,6 +80,7 @@ class Parser:
|
||||||
lastheader = ''
|
lastheader = ''
|
||||||
lastvalue = []
|
lastvalue = []
|
||||||
lineno = 0
|
lineno = 0
|
||||||
|
firstbodyline = None
|
||||||
while True:
|
while True:
|
||||||
# Don't strip the line before we test for the end condition,
|
# Don't strip the line before we test for the end condition,
|
||||||
# because whitespace-only header lines are RFC compliant
|
# because whitespace-only header lines are RFC compliant
|
||||||
|
|
@ -120,13 +121,16 @@ class Parser:
|
||||||
if i < 0:
|
if i < 0:
|
||||||
if self._strict:
|
if self._strict:
|
||||||
raise Errors.HeaderParseError(
|
raise Errors.HeaderParseError(
|
||||||
"Not a header, not a continuation: ``%s''"%line)
|
"Not a header, not a continuation: ``%s''" % line)
|
||||||
elif lineno == 1 and line.startswith('--'):
|
elif lineno == 1 and line.startswith('--'):
|
||||||
# allow through duplicate boundary tags.
|
# allow through duplicate boundary tags.
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
raise Errors.HeaderParseError(
|
# There was no separating blank line as mandated by RFC
|
||||||
"Not a header, not a continuation: ``%s''"%line)
|
# 2822, but we're in non-strict mode. So just offer up
|
||||||
|
# this current line as the first body line.
|
||||||
|
firstbodyline = line
|
||||||
|
break
|
||||||
if lastheader:
|
if lastheader:
|
||||||
container[lastheader] = NL.join(lastvalue)
|
container[lastheader] = NL.join(lastvalue)
|
||||||
lastheader = line[:i]
|
lastheader = line[:i]
|
||||||
|
|
@ -134,8 +138,9 @@ class Parser:
|
||||||
# Make sure we retain the last header
|
# Make sure we retain the last header
|
||||||
if lastheader:
|
if lastheader:
|
||||||
container[lastheader] = NL.join(lastvalue)
|
container[lastheader] = NL.join(lastvalue)
|
||||||
|
return firstbodyline
|
||||||
|
|
||||||
def _parsebody(self, container, fp):
|
def _parsebody(self, container, fp, firstbodyline=None):
|
||||||
# Parse the body, but first split the payload on the content-type
|
# Parse the body, but first split the payload on the content-type
|
||||||
# boundary if present.
|
# boundary if present.
|
||||||
boundary = container.get_boundary()
|
boundary = container.get_boundary()
|
||||||
|
|
@ -152,6 +157,8 @@ class Parser:
|
||||||
# boundary.
|
# boundary.
|
||||||
separator = '--' + boundary
|
separator = '--' + boundary
|
||||||
payload = fp.read()
|
payload = fp.read()
|
||||||
|
if firstbodyline is not None:
|
||||||
|
payload = firstbodyline + '\n' + payload
|
||||||
# We use an RE here because boundaries can have trailing
|
# We use an RE here because boundaries can have trailing
|
||||||
# whitespace.
|
# whitespace.
|
||||||
mo = re.search(
|
mo = re.search(
|
||||||
|
|
@ -260,7 +267,10 @@ class Parser:
|
||||||
self._parsebody(msg, fp)
|
self._parsebody(msg, fp)
|
||||||
container.attach(msg)
|
container.attach(msg)
|
||||||
else:
|
else:
|
||||||
container.set_payload(fp.read())
|
text = fp.read()
|
||||||
|
if firstbodyline is not None:
|
||||||
|
text = firstbodyline + '\n' + text
|
||||||
|
container.set_payload(text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -274,6 +284,9 @@ class HeaderParser(Parser):
|
||||||
Parsing with this subclass can be considerably faster if all you're
|
Parsing with this subclass can be considerably faster if all you're
|
||||||
interested in is the message headers.
|
interested in is the message headers.
|
||||||
"""
|
"""
|
||||||
def _parsebody(self, container, fp):
|
def _parsebody(self, container, fp, firstbodyline=None):
|
||||||
# Consume but do not parse, the body
|
# Consume but do not parse, the body
|
||||||
container.set_payload(fp.read())
|
text = fp.read()
|
||||||
|
if firstbodyline is not None:
|
||||||
|
text = firstbodyline + '\n' + text
|
||||||
|
container.set_payload(text)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue