mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
This commit is contained in:
		
							parent
							
								
									8e6e0fdb7f
								
							
						
					
					
						commit
						63674f4b52
					
				
					 3 changed files with 18 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -825,6 +825,16 @@ class TestDetectEncoding(TestCase):
 | 
			
		|||
                found, consumed_lines = detect_encoding(rl)
 | 
			
		||||
                self.assertEqual(found, "iso-8859-1")
 | 
			
		||||
 | 
			
		||||
    def test_syntaxerror_latin1(self):
 | 
			
		||||
        # Issue 14629: need to raise SyntaxError if the first
 | 
			
		||||
        # line(s) have non-UTF-8 characters
 | 
			
		||||
        lines = (
 | 
			
		||||
            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
 | 
			
		||||
            )
 | 
			
		||||
        readline = self.get_readline(lines)
 | 
			
		||||
        self.assertRaises(SyntaxError, detect_encoding, readline)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def test_utf8_normalization(self):
 | 
			
		||||
        # See get_normal_name() in tokenizer.c.
 | 
			
		||||
        encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -292,9 +292,12 @@ def detect_encoding(readline):
 | 
			
		|||
 | 
			
		||||
    def find_cookie(line):
 | 
			
		||||
        try:
 | 
			
		||||
            line_string = line.decode('ascii')
 | 
			
		||||
            # Decode as UTF-8. Either the line is an encoding declaration,
 | 
			
		||||
            # in which case it should be pure ASCII, or it must be UTF-8
 | 
			
		||||
            # per default encoding.
 | 
			
		||||
            line_string = line.decode('utf-8')
 | 
			
		||||
        except UnicodeDecodeError:
 | 
			
		||||
            return None
 | 
			
		||||
            raise SyntaxError("invalid or missing encoding declaration")
 | 
			
		||||
 | 
			
		||||
        matches = cookie_re.findall(line_string)
 | 
			
		||||
        if not matches:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,6 +47,9 @@ Core and Builtins
 | 
			
		|||
Library
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
 | 
			
		||||
  first two lines have non-UTF-8 characters without an encoding declaration.
 | 
			
		||||
 | 
			
		||||
- Issue #14308: Fix an exception when a "dummy" thread is in the threading
 | 
			
		||||
  module's active list after a fork().
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue