mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
Fix a bug that robotparser starves memory when the server responses
in HTTP/0.9 due to dissonance of httplib.LineAndFileWrapper and urllib.addbase.
This commit is contained in:
parent
5962f457b4
commit
39aef79821
3 changed files with 45 additions and 1 deletions
|
@ -1181,7 +1181,9 @@ class LineAndFileWrapper:
|
||||||
self.readlines = self._file.readlines
|
self.readlines = self._file.readlines
|
||||||
|
|
||||||
def read(self, amt=None):
|
def read(self, amt=None):
|
||||||
assert not self._line_consumed and self._line_left
|
if self._line_consumed:
|
||||||
|
return self._file.read(amt)
|
||||||
|
assert self._line_left
|
||||||
if amt is None or amt > self._line_left:
|
if amt is None or amt > self._line_left:
|
||||||
s = self._line[self._line_offset:]
|
s = self._line[self._line_offset:]
|
||||||
self._done()
|
self._done()
|
||||||
|
@ -1201,11 +1203,17 @@ class LineAndFileWrapper:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def readline(self):
|
def readline(self):
|
||||||
|
if self._line_consumed:
|
||||||
|
return self._file.readline()
|
||||||
|
assert self._line_left
|
||||||
s = self._line[self._line_offset:]
|
s = self._line[self._line_offset:]
|
||||||
self._done()
|
self._done()
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def readlines(self, size=None):
|
def readlines(self, size=None):
|
||||||
|
if self._line_consumed:
|
||||||
|
return self._file.readlines(size)
|
||||||
|
assert self._line_left
|
||||||
L = [self._line[self._line_offset:]]
|
L = [self._line[self._line_offset:]]
|
||||||
self._done()
|
self._done()
|
||||||
if size is None:
|
if size is None:
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
"""Regresssion tests for urllib"""
|
"""Regresssion tests for urllib"""
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
|
import httplib
|
||||||
import unittest
|
import unittest
|
||||||
from test import test_support
|
from test import test_support
|
||||||
import os
|
import os
|
||||||
import mimetools
|
import mimetools
|
||||||
|
import StringIO
|
||||||
|
|
||||||
def hexescape(char):
|
def hexescape(char):
|
||||||
"""Escape char as RFC 2396 specifies"""
|
"""Escape char as RFC 2396 specifies"""
|
||||||
|
@ -88,6 +90,37 @@ class urlopen_FileTests(unittest.TestCase):
|
||||||
for line in self.returned_obj.__iter__():
|
for line in self.returned_obj.__iter__():
|
||||||
self.assertEqual(line, self.text)
|
self.assertEqual(line, self.text)
|
||||||
|
|
||||||
|
class urlopen_HttpTests(unittest.TestCase):
|
||||||
|
"""Test urlopen() opening a fake http connection."""
|
||||||
|
|
||||||
|
def fakehttp(self, fakedata):
|
||||||
|
class FakeSocket(StringIO.StringIO):
|
||||||
|
def sendall(self, str): pass
|
||||||
|
def makefile(self, mode, name): return self
|
||||||
|
def read(self, amt=None):
|
||||||
|
if self.closed: return ''
|
||||||
|
return StringIO.StringIO.read(self, amt)
|
||||||
|
def readline(self, length=None):
|
||||||
|
if self.closed: return ''
|
||||||
|
return StringIO.StringIO.readline(self, length)
|
||||||
|
class FakeHTTPConnection(httplib.HTTPConnection):
|
||||||
|
def connect(self):
|
||||||
|
self.sock = FakeSocket(fakedata)
|
||||||
|
assert httplib.HTTP._connection_class == httplib.HTTPConnection
|
||||||
|
httplib.HTTP._connection_class = FakeHTTPConnection
|
||||||
|
|
||||||
|
def unfakehttp(self):
|
||||||
|
httplib.HTTP._connection_class = httplib.HTTPConnection
|
||||||
|
|
||||||
|
def test_read(self):
|
||||||
|
self.fakehttp('Hello!')
|
||||||
|
try:
|
||||||
|
fp = urllib.urlopen("http://python.org/")
|
||||||
|
self.assertEqual(fp.readline(), 'Hello!')
|
||||||
|
self.assertEqual(fp.readline(), '')
|
||||||
|
finally:
|
||||||
|
self.unfakehttp()
|
||||||
|
|
||||||
class urlretrieve_FileTests(unittest.TestCase):
|
class urlretrieve_FileTests(unittest.TestCase):
|
||||||
"""Test urllib.urlretrieve() on local files"""
|
"""Test urllib.urlretrieve() on local files"""
|
||||||
|
|
||||||
|
@ -410,6 +443,7 @@ class Pathname_Tests(unittest.TestCase):
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(
|
test_support.run_unittest(
|
||||||
urlopen_FileTests,
|
urlopen_FileTests,
|
||||||
|
urlopen_HttpTests,
|
||||||
urlretrieve_FileTests,
|
urlretrieve_FileTests,
|
||||||
QuotingTests,
|
QuotingTests,
|
||||||
UnquotingTests,
|
UnquotingTests,
|
||||||
|
|
|
@ -322,6 +322,8 @@ Extension modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- urllib.urlopen().readline() now handles HTTP/0.9 correctly.
|
||||||
|
|
||||||
- refactored site.py into functions. Also wrote regression tests for the
|
- refactored site.py into functions. Also wrote regression tests for the
|
||||||
module.
|
module.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue