mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 19:34:08 +00:00 
			
		
		
		
	Improve handling of declarations in HTMLParser.
This commit is contained in:
		
							parent
							
								
									86f67123be
								
							
						
					
					
						commit
						f4ab491901
					
				
					 2 changed files with 51 additions and 29 deletions
				
			
		| 
						 | 
				
			
			@ -122,7 +122,7 @@ comment1b-->
 | 
			
		|||
<Img sRc='Bar' isMAP>sample
 | 
			
		||||
text
 | 
			
		||||
“
 | 
			
		||||
<!--comment2a-- --comment2b--><!>
 | 
			
		||||
<!--comment2a-- --comment2b-->
 | 
			
		||||
</Html>
 | 
			
		||||
""", [
 | 
			
		||||
    ("data", "\n"),
 | 
			
		||||
| 
						 | 
				
			
			@ -157,24 +157,6 @@ text
 | 
			
		|||
            ("data", " foo"),
 | 
			
		||||
            ])
 | 
			
		||||
 | 
			
		||||
    def test_doctype_decl(self):
 | 
			
		||||
        inside = """\
 | 
			
		||||
DOCTYPE html [
 | 
			
		||||
  <!ELEMENT html - O EMPTY>
 | 
			
		||||
  <!ATTLIST html
 | 
			
		||||
      version CDATA #IMPLIED
 | 
			
		||||
      profile CDATA 'DublinCore'>
 | 
			
		||||
  <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
 | 
			
		||||
  <!ENTITY myEntity 'internal parsed entity'>
 | 
			
		||||
  <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
 | 
			
		||||
  <!ENTITY % paramEntity 'name|name|name'>
 | 
			
		||||
  %paramEntity;
 | 
			
		||||
  <!-- comment -->
 | 
			
		||||
]"""
 | 
			
		||||
        self._run_check("<!%s>" % inside, [
 | 
			
		||||
            ("decl", inside),
 | 
			
		||||
            ])
 | 
			
		||||
 | 
			
		||||
    def test_bad_nesting(self):
 | 
			
		||||
        # Strangely, this *is* supposed to test that overlapping
 | 
			
		||||
        # elements are allowed.  HTMLParser is more geared toward
 | 
			
		||||
| 
						 | 
				
			
			@ -247,6 +229,30 @@ DOCTYPE html [
 | 
			
		|||
        self._parse_error("<a foo='>'")
 | 
			
		||||
        self._parse_error("<a foo='>")
 | 
			
		||||
 | 
			
		||||
    def test_valid_doctypes(self):
 | 
			
		||||
        # from http://www.w3.org/QA/2002/04/valid-dtd-list.html
 | 
			
		||||
        dtds = ['HTML',  # HTML5 doctype
 | 
			
		||||
                ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
 | 
			
		||||
                 '"http://www.w3.org/TR/html4/strict.dtd"'),
 | 
			
		||||
                ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
 | 
			
		||||
                 '"http://www.w3.org/TR/html4/loose.dtd"'),
 | 
			
		||||
                ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
 | 
			
		||||
                 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'),
 | 
			
		||||
                ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
 | 
			
		||||
                 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'),
 | 
			
		||||
                ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
 | 
			
		||||
                 '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'),
 | 
			
		||||
                ('html PUBLIC "-//W3C//DTD '
 | 
			
		||||
                 'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
 | 
			
		||||
                 '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'),
 | 
			
		||||
                ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
 | 
			
		||||
                 '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'),
 | 
			
		||||
                'html PUBLIC "-//IETF//DTD HTML 2.0//EN"',
 | 
			
		||||
                'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"']
 | 
			
		||||
        for dtd in dtds:
 | 
			
		||||
            self._run_check("<!DOCTYPE %s>" % dtd,
 | 
			
		||||
                            [('decl', 'DOCTYPE ' + dtd)])
 | 
			
		||||
 | 
			
		||||
    def test_declaration_junk_chars(self):
 | 
			
		||||
        self._parse_error("<!DOCTYPE foo $ >")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -384,8 +390,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
 | 
			
		|||
        self._run_check("<a foo='>", [('data', "<a foo='>")])
 | 
			
		||||
 | 
			
		||||
    def test_declaration_junk_chars(self):
 | 
			
		||||
        # XXX this is wrong
 | 
			
		||||
        self._run_check("<!DOCTYPE foo $ >", [('comment', 'DOCTYPE foo $ ')])
 | 
			
		||||
        self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
 | 
			
		||||
 | 
			
		||||
    def test_illegal_declarations(self):
 | 
			
		||||
        # XXX this might be wrong
 | 
			
		||||
| 
						 | 
				
			
			@ -510,11 +515,14 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
 | 
			
		|||
        html = ('<! not really a comment >'
 | 
			
		||||
                '<! not a comment either -->'
 | 
			
		||||
                '<! -- close enough -->'
 | 
			
		||||
                '<!><!<-- this was an empty comment>'
 | 
			
		||||
                '<!!! another bogus comment !!!>')
 | 
			
		||||
        expected = [
 | 
			
		||||
            ('comment', ' not really a comment '),
 | 
			
		||||
            ('comment', ' not a comment either --'),
 | 
			
		||||
            ('comment', ' -- close enough --'),
 | 
			
		||||
            ('comment', ''),
 | 
			
		||||
            ('comment', '<-- this was an empty comment'),
 | 
			
		||||
            ('comment', '!! another bogus comment !!!'),
 | 
			
		||||
        ]
 | 
			
		||||
        self._run_check(html, expected)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue