mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	Fix gzip.py: Use bytes where 8bit strings have been used formerly.
(The filename gets written in utf-8 encoded form which probably isn't correct.) Fix the test.
This commit is contained in:
		
							parent
							
								
									3a77c7ab16
								
							
						
					
					
						commit
						5b1284d0b7
					
				
					 2 changed files with 31 additions and 29 deletions
				
			
		
							
								
								
									
										42
									
								
								Lib/gzip.py
									
										
									
									
									
								
							
							
						
						
									
										42
									
								
								Lib/gzip.py
									
										
									
									
									
								
							| 
						 | 
					@ -104,7 +104,7 @@ class GzipFile:
 | 
				
			||||||
            self.mode = READ
 | 
					            self.mode = READ
 | 
				
			||||||
            # Set flag indicating start of a new member
 | 
					            # Set flag indicating start of a new member
 | 
				
			||||||
            self._new_member = True
 | 
					            self._new_member = True
 | 
				
			||||||
            self.extrabuf = ""
 | 
					            self.extrabuf = b""
 | 
				
			||||||
            self.extrasize = 0
 | 
					            self.extrasize = 0
 | 
				
			||||||
            self.name = filename
 | 
					            self.name = filename
 | 
				
			||||||
            # Starts small, scales exponentially
 | 
					            # Starts small, scales exponentially
 | 
				
			||||||
| 
						 | 
					@ -147,20 +147,21 @@ class GzipFile:
 | 
				
			||||||
        self.bufsize = 0
 | 
					        self.bufsize = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _write_gzip_header(self):
 | 
					    def _write_gzip_header(self):
 | 
				
			||||||
        self.fileobj.write('\037\213')             # magic header
 | 
					        self.fileobj.write(b'\037\213')             # magic header
 | 
				
			||||||
        self.fileobj.write('\010')                 # compression method
 | 
					        self.fileobj.write(b'\010')                 # compression method
 | 
				
			||||||
        fname = self.name
 | 
					        fname = self.name
 | 
				
			||||||
        if fname.endswith(".gz"):
 | 
					        if fname.endswith(".gz"):
 | 
				
			||||||
            fname = fname[:-3]
 | 
					            fname = fname[:-3]
 | 
				
			||||||
        flags = 0
 | 
					        flags = 0
 | 
				
			||||||
        if fname:
 | 
					        if fname:
 | 
				
			||||||
            flags = FNAME
 | 
					            flags = FNAME
 | 
				
			||||||
        self.fileobj.write(chr(flags))
 | 
					        self.fileobj.write(chr(flags).encode('latin-1'))
 | 
				
			||||||
        write32u(self.fileobj, int(time.time()))
 | 
					        write32u(self.fileobj, int(time.time()))
 | 
				
			||||||
        self.fileobj.write('\002')
 | 
					        self.fileobj.write(b'\002')
 | 
				
			||||||
        self.fileobj.write('\377')
 | 
					        self.fileobj.write(b'\377')
 | 
				
			||||||
        if fname:
 | 
					        if fname:
 | 
				
			||||||
            self.fileobj.write(fname + '\000')
 | 
					            # XXX: Ist utf-8 the correct encoding?
 | 
				
			||||||
 | 
					            self.fileobj.write(fname.encode('utf-8') + b'\000')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _init_read(self):
 | 
					    def _init_read(self):
 | 
				
			||||||
        self.crc = zlib.crc32("")
 | 
					        self.crc = zlib.crc32("")
 | 
				
			||||||
| 
						 | 
					@ -168,7 +169,7 @@ class GzipFile:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _read_gzip_header(self):
 | 
					    def _read_gzip_header(self):
 | 
				
			||||||
        magic = self.fileobj.read(2)
 | 
					        magic = self.fileobj.read(2)
 | 
				
			||||||
        if magic != '\037\213':
 | 
					        if magic != b'\037\213':
 | 
				
			||||||
            raise IOError, 'Not a gzipped file'
 | 
					            raise IOError, 'Not a gzipped file'
 | 
				
			||||||
        method = ord( self.fileobj.read(1) )
 | 
					        method = ord( self.fileobj.read(1) )
 | 
				
			||||||
        if method != 8:
 | 
					        if method != 8:
 | 
				
			||||||
| 
						 | 
					@ -188,13 +189,13 @@ class GzipFile:
 | 
				
			||||||
            # Read and discard a null-terminated string containing the filename
 | 
					            # Read and discard a null-terminated string containing the filename
 | 
				
			||||||
            while True:
 | 
					            while True:
 | 
				
			||||||
                s = self.fileobj.read(1)
 | 
					                s = self.fileobj.read(1)
 | 
				
			||||||
                if not s or s=='\000':
 | 
					                if not s or s==b'\000':
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
        if flag & FCOMMENT:
 | 
					        if flag & FCOMMENT:
 | 
				
			||||||
            # Read and discard a null-terminated string containing a comment
 | 
					            # Read and discard a null-terminated string containing a comment
 | 
				
			||||||
            while True:
 | 
					            while True:
 | 
				
			||||||
                s = self.fileobj.read(1)
 | 
					                s = self.fileobj.read(1)
 | 
				
			||||||
                if not s or s=='\000':
 | 
					                if not s or s==b'\000':
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
        if flag & FHCRC:
 | 
					        if flag & FHCRC:
 | 
				
			||||||
            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
 | 
					            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
 | 
				
			||||||
| 
						 | 
					@ -219,7 +220,7 @@ class GzipFile:
 | 
				
			||||||
            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
 | 
					            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.extrasize <= 0 and self.fileobj is None:
 | 
					        if self.extrasize <= 0 and self.fileobj is None:
 | 
				
			||||||
            return ''
 | 
					            return b''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        readsize = 1024
 | 
					        readsize = 1024
 | 
				
			||||||
        if size < 0:        # get the whole thing
 | 
					        if size < 0:        # get the whole thing
 | 
				
			||||||
| 
						 | 
					@ -278,7 +279,7 @@ class GzipFile:
 | 
				
			||||||
        # If the EOF has been reached, flush the decompression object
 | 
					        # If the EOF has been reached, flush the decompression object
 | 
				
			||||||
        # and mark this object as finished.
 | 
					        # and mark this object as finished.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if buf == "":
 | 
					        if buf == b"":
 | 
				
			||||||
            uncompress = self.decompress.flush()
 | 
					            uncompress = self.decompress.flush()
 | 
				
			||||||
            self._read_eof()
 | 
					            self._read_eof()
 | 
				
			||||||
            self._add_read_data( uncompress )
 | 
					            self._add_read_data( uncompress )
 | 
				
			||||||
| 
						 | 
					@ -287,7 +288,7 @@ class GzipFile:
 | 
				
			||||||
        uncompress = self.decompress.decompress(buf)
 | 
					        uncompress = self.decompress.decompress(buf)
 | 
				
			||||||
        self._add_read_data( uncompress )
 | 
					        self._add_read_data( uncompress )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.decompress.unused_data != "":
 | 
					        if self.decompress.unused_data != b"":
 | 
				
			||||||
            # Ending case: we've come to the end of a member in the file,
 | 
					            # Ending case: we've come to the end of a member in the file,
 | 
				
			||||||
            # so seek back to the start of the unused data, finish up
 | 
					            # so seek back to the start of the unused data, finish up
 | 
				
			||||||
            # this member, and read a new gzip header.
 | 
					            # this member, and read a new gzip header.
 | 
				
			||||||
| 
						 | 
					@ -375,7 +376,7 @@ class GzipFile:
 | 
				
			||||||
            raise IOError("Can't rewind in write mode")
 | 
					            raise IOError("Can't rewind in write mode")
 | 
				
			||||||
        self.fileobj.seek(0)
 | 
					        self.fileobj.seek(0)
 | 
				
			||||||
        self._new_member = True
 | 
					        self._new_member = True
 | 
				
			||||||
        self.extrabuf = ""
 | 
					        self.extrabuf = b""
 | 
				
			||||||
        self.extrasize = 0
 | 
					        self.extrasize = 0
 | 
				
			||||||
        self.offset = 0
 | 
					        self.offset = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -389,9 +390,10 @@ class GzipFile:
 | 
				
			||||||
            if offset < self.offset:
 | 
					            if offset < self.offset:
 | 
				
			||||||
                raise IOError('Negative seek in write mode')
 | 
					                raise IOError('Negative seek in write mode')
 | 
				
			||||||
            count = offset - self.offset
 | 
					            count = offset - self.offset
 | 
				
			||||||
 | 
					            chunk = bytes(1024)
 | 
				
			||||||
            for i in range(count // 1024):
 | 
					            for i in range(count // 1024):
 | 
				
			||||||
                self.write(1024 * '\0')
 | 
					                self.write(chunk)
 | 
				
			||||||
            self.write((count % 1024) * '\0')
 | 
					            self.write(bytes(count % 1024))
 | 
				
			||||||
        elif self.mode == READ:
 | 
					        elif self.mode == READ:
 | 
				
			||||||
            if offset < self.offset:
 | 
					            if offset < self.offset:
 | 
				
			||||||
                # for negative seek, rewind and do positive seek
 | 
					                # for negative seek, rewind and do positive seek
 | 
				
			||||||
| 
						 | 
					@ -410,7 +412,7 @@ class GzipFile:
 | 
				
			||||||
        bufs = []
 | 
					        bufs = []
 | 
				
			||||||
        while size != 0:
 | 
					        while size != 0:
 | 
				
			||||||
            c = self.read(readsize)
 | 
					            c = self.read(readsize)
 | 
				
			||||||
            i = c.find('\n')
 | 
					            i = c.find(b'\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # We set i=size to break out of the loop under two
 | 
					            # We set i=size to break out of the loop under two
 | 
				
			||||||
            # conditions: 1) there's no newline, and the chunk is
 | 
					            # conditions: 1) there's no newline, and the chunk is
 | 
				
			||||||
| 
						 | 
					@ -419,7 +421,7 @@ class GzipFile:
 | 
				
			||||||
            if (size <= i) or (i == -1 and len(c) > size):
 | 
					            if (size <= i) or (i == -1 and len(c) > size):
 | 
				
			||||||
                i = size - 1
 | 
					                i = size - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if i >= 0 or c == '':
 | 
					            if i >= 0 or c == b'':
 | 
				
			||||||
                bufs.append(c[:i + 1])    # Add portion of last chunk
 | 
					                bufs.append(c[:i + 1])    # Add portion of last chunk
 | 
				
			||||||
                self._unread(c[i + 1:])   # Push back rest of chunk
 | 
					                self._unread(c[i + 1:])   # Push back rest of chunk
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
| 
						 | 
					@ -430,7 +432,7 @@ class GzipFile:
 | 
				
			||||||
            readsize = min(size, readsize * 2)
 | 
					            readsize = min(size, readsize * 2)
 | 
				
			||||||
        if readsize > self.min_readsize:
 | 
					        if readsize > self.min_readsize:
 | 
				
			||||||
            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
 | 
					            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
 | 
				
			||||||
        return ''.join(bufs) # Return resulting line
 | 
					        return b''.join(bufs) # Return resulting line
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def readlines(self, sizehint=0):
 | 
					    def readlines(self, sizehint=0):
 | 
				
			||||||
        # Negative numbers result in reading all the lines
 | 
					        # Negative numbers result in reading all the lines
 | 
				
			||||||
| 
						 | 
					@ -439,7 +441,7 @@ class GzipFile:
 | 
				
			||||||
        L = []
 | 
					        L = []
 | 
				
			||||||
        while sizehint > 0:
 | 
					        while sizehint > 0:
 | 
				
			||||||
            line = self.readline()
 | 
					            line = self.readline()
 | 
				
			||||||
            if line == "":
 | 
					            if line == b"":
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
            L.append(line)
 | 
					            L.append(line)
 | 
				
			||||||
            sizehint = sizehint - len(line)
 | 
					            sizehint = sizehint - len(line)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,14 +8,14 @@ import sys, os
 | 
				
			||||||
import gzip
 | 
					import gzip
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
data1 = """  int length=DEFAULTALLOC, err = Z_OK;
 | 
					data1 = b"""  int length=DEFAULTALLOC, err = Z_OK;
 | 
				
			||||||
  PyObject *RetVal;
 | 
					  PyObject *RetVal;
 | 
				
			||||||
  int flushmode = Z_FINISH;
 | 
					  int flushmode = Z_FINISH;
 | 
				
			||||||
  unsigned long start_total_out;
 | 
					  unsigned long start_total_out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
data2 = """/* zlibmodule.c -- gzip-compatible data compression */
 | 
					data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
 | 
				
			||||||
/* See http://www.gzip.org/zlib/
 | 
					/* See http://www.gzip.org/zlib/
 | 
				
			||||||
/* See http://www.winimage.com/zLibDll for Windows */
 | 
					/* See http://www.winimage.com/zLibDll for Windows */
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
| 
						 | 
					@ -63,22 +63,22 @@ class TestGzip(unittest.TestCase):
 | 
				
			||||||
        # many, many members.  Create such a file and verify that reading it
 | 
					        # many, many members.  Create such a file and verify that reading it
 | 
				
			||||||
        # works.
 | 
					        # works.
 | 
				
			||||||
        f = gzip.open(self.filename, 'wb', 9)
 | 
					        f = gzip.open(self.filename, 'wb', 9)
 | 
				
			||||||
        f.write('a')
 | 
					        f.write(b'a')
 | 
				
			||||||
        f.close()
 | 
					        f.close()
 | 
				
			||||||
        for i in range(0,200):
 | 
					        for i in range(0, 200):
 | 
				
			||||||
            f = gzip.open(self.filename, "ab", 9) # append
 | 
					            f = gzip.open(self.filename, "ab", 9) # append
 | 
				
			||||||
            f.write('a')
 | 
					            f.write(b'a')
 | 
				
			||||||
            f.close()
 | 
					            f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Try reading the file
 | 
					        # Try reading the file
 | 
				
			||||||
        zgfile = gzip.open(self.filename, "rb")
 | 
					        zgfile = gzip.open(self.filename, "rb")
 | 
				
			||||||
        contents = ""
 | 
					        contents = b""
 | 
				
			||||||
        while 1:
 | 
					        while 1:
 | 
				
			||||||
            ztxt = zgfile.read(8192)
 | 
					            ztxt = zgfile.read(8192)
 | 
				
			||||||
            contents += ztxt
 | 
					            contents += ztxt
 | 
				
			||||||
            if not ztxt: break
 | 
					            if not ztxt: break
 | 
				
			||||||
        zgfile.close()
 | 
					        zgfile.close()
 | 
				
			||||||
        self.assertEquals(contents, 'a'*201)
 | 
					        self.assertEquals(contents, b'a'*201)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_readline(self):
 | 
					    def test_readline(self):
 | 
				
			||||||
| 
						 | 
					@ -89,7 +89,7 @@ class TestGzip(unittest.TestCase):
 | 
				
			||||||
        line_length = 0
 | 
					        line_length = 0
 | 
				
			||||||
        while 1:
 | 
					        while 1:
 | 
				
			||||||
            L = f.readline(line_length)
 | 
					            L = f.readline(line_length)
 | 
				
			||||||
            if L == "" and line_length != 0: break
 | 
					            if not L and line_length != 0: break
 | 
				
			||||||
            self.assert_(len(L) <= line_length)
 | 
					            self.assert_(len(L) <= line_length)
 | 
				
			||||||
            line_length = (line_length + 1) % 50
 | 
					            line_length = (line_length + 1) % 50
 | 
				
			||||||
        f.close()
 | 
					        f.close()
 | 
				
			||||||
| 
						 | 
					@ -144,7 +144,7 @@ class TestGzip(unittest.TestCase):
 | 
				
			||||||
        f = gzip.GzipFile(self.filename, 'w')
 | 
					        f = gzip.GzipFile(self.filename, 'w')
 | 
				
			||||||
        for pos in range(0, 256, 16):
 | 
					        for pos in range(0, 256, 16):
 | 
				
			||||||
            f.seek(pos)
 | 
					            f.seek(pos)
 | 
				
			||||||
            f.write('GZ\n')
 | 
					            f.write(b'GZ\n')
 | 
				
			||||||
        f.close()
 | 
					        f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_mode(self):
 | 
					    def test_mode(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue