mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
rename HTMLParser to html.parser and htmlentitydefs to html.entities;
includes merge of trunk revision 63432
This commit is contained in:
parent
9b020c784c
commit
3c50ea4303
9 changed files with 25 additions and 23 deletions
|
@ -75,12 +75,12 @@ The module defines a parser class and an exception:
|
||||||
Interface definition for transforming an abstract flow of formatting events into
|
Interface definition for transforming an abstract flow of formatting events into
|
||||||
specific output events on writer objects.
|
specific output events on writer objects.
|
||||||
|
|
||||||
Module :mod:`HTMLParser`
|
Module :mod:`html.parser`
|
||||||
Alternate HTML parser that offers a slightly lower-level view of the input, but
|
Alternate HTML parser that offers a slightly lower-level view of the input, but
|
||||||
is designed to work with XHTML, and does not implement some of the SGML syntax
|
is designed to work with XHTML, and does not implement some of the SGML syntax
|
||||||
not used in "HTML as deployed" and which isn't legal for XHTML.
|
not used in "HTML as deployed" and which isn't legal for XHTML.
|
||||||
|
|
||||||
Module :mod:`htmlentitydefs`
|
Module :mod:`html.entities`
|
||||||
Definition of replacement text for XHTML 1.0 entities.
|
Definition of replacement text for XHTML 1.0 entities.
|
||||||
|
|
||||||
Module :mod:`sgmllib`
|
Module :mod:`sgmllib`
|
||||||
|
@ -147,10 +147,10 @@ additional methods and instance variables for use within tag methods.
|
||||||
:meth:`save_bgn` will raise a :exc:`TypeError` exception.
|
:meth:`save_bgn` will raise a :exc:`TypeError` exception.
|
||||||
|
|
||||||
|
|
||||||
:mod:`htmlentitydefs` --- Definitions of HTML general entities
|
:mod:`html.entities` --- Definitions of HTML general entities
|
||||||
==============================================================
|
=============================================================
|
||||||
|
|
||||||
.. module:: htmlentitydefs
|
.. module:: html.entities
|
||||||
:synopsis: Definitions of HTML general entities.
|
:synopsis: Definitions of HTML general entities.
|
||||||
.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
|
.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
|
||||||
:mod:`HTMLParser` --- Simple HTML and XHTML parser
|
:mod:`html.parser` --- Simple HTML and XHTML parser
|
||||||
==================================================
|
===================================================
|
||||||
|
|
||||||
.. module:: HTMLParser
|
.. module:: html.parser
|
||||||
:synopsis: A simple parser that can handle HTML and XHTML.
|
:synopsis: A simple parser that can handle HTML and XHTML.
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ in :mod:`sgmllib`.
|
||||||
|
|
||||||
The :class:`HTMLParser` class is instantiated without arguments.
|
The :class:`HTMLParser` class is instantiated without arguments.
|
||||||
|
|
||||||
An HTMLParser instance is fed HTML data and calls handler functions when tags
|
An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
|
||||||
begin and end. The :class:`HTMLParser` class is meant to be overridden by the
|
begin and end. The :class:`HTMLParser` class is meant to be overridden by the
|
||||||
user to provide a desired behavior.
|
user to provide a desired behavior.
|
||||||
|
|
||||||
|
@ -87,8 +87,8 @@ An exception is defined as well:
|
||||||
HREF="http://www.cwi.nl/">``, this method would be called as
|
HREF="http://www.cwi.nl/">``, this method would be called as
|
||||||
``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
|
``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
|
||||||
|
|
||||||
All entity references from htmlentitydefs are replaced in the attribute
|
All entity references from :mod:`html.entities` are replaced in the
|
||||||
values.
|
attribute values.
|
||||||
|
|
||||||
|
|
||||||
.. method:: HTMLParser.handle_startendtag(tag, attrs)
|
.. method:: HTMLParser.handle_startendtag(tag, attrs)
|
||||||
|
@ -166,7 +166,7 @@ Example HTML Parser Application
|
||||||
As a basic example, below is a very basic HTML parser that uses the
|
As a basic example, below is a very basic HTML parser that uses the
|
||||||
:class:`HTMLParser` class to print out tags as they are encountered::
|
:class:`HTMLParser` class to print out tags as they are encountered::
|
||||||
|
|
||||||
from HTMLParser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
class MyHTMLParser(HTMLParser):
|
class MyHTMLParser(HTMLParser):
|
||||||
|
|
||||||
|
|
1
Lib/html/__init__.py
Normal file
1
Lib/html/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# This directory is a Python package.
|
|
@ -372,16 +372,17 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
c = int(s)
|
c = int(s)
|
||||||
return chr(c)
|
return chr(c)
|
||||||
else:
|
else:
|
||||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
# Cannot use name2codepoint directly, because HTMLParser
|
||||||
# which is not part of HTML 4
|
# supports apos, which is not part of HTML 4
|
||||||
import htmlentitydefs
|
import html.entities
|
||||||
if HTMLParser.entitydefs is None:
|
if HTMLParser.entitydefs is None:
|
||||||
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
|
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
|
||||||
for k, v in htmlentitydefs.name2codepoint.items():
|
for k, v in html.entities.name2codepoint.items():
|
||||||
entitydefs[k] = chr(v)
|
entitydefs[k] = unichr(v)
|
||||||
try:
|
try:
|
||||||
return self.entitydefs[s]
|
return self.entitydefs[s]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return '&'+s+';'
|
return '&'+s+';'
|
||||||
|
|
||||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
|
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
|
||||||
|
replaceEntities, s)
|
|
@ -24,7 +24,7 @@ class HTMLParser(sgmllib.SGMLParser):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from htmlentitydefs import entitydefs
|
from html.entities import entitydefs
|
||||||
|
|
||||||
def __init__(self, formatter, verbose=0):
|
def __init__(self, formatter, verbose=0):
|
||||||
"""Creates an instance of the HTMLParser class.
|
"""Creates an instance of the HTMLParser class.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import test.test_support, unittest
|
import test.test_support, unittest
|
||||||
import sys, codecs, htmlentitydefs, unicodedata
|
import sys, codecs, html.entities, unicodedata
|
||||||
|
|
||||||
class PosReturn:
|
class PosReturn:
|
||||||
# this can be used for configurable callbacks
|
# this can be used for configurable callbacks
|
||||||
|
@ -86,7 +86,7 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
l = []
|
l = []
|
||||||
for c in exc.object[exc.start:exc.end]:
|
for c in exc.object[exc.start:exc.end]:
|
||||||
try:
|
try:
|
||||||
l.append("&%s;" % htmlentitydefs.codepoint2name[ord(c)])
|
l.append("&%s;" % html.entities.codepoint2name[ord(c)])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
l.append("&#%d;" % ord(c))
|
l.append("&#%d;" % ord(c))
|
||||||
return ("".join(l), exc.end)
|
return ("".join(l), exc.end)
|
||||||
|
|
|
@ -74,7 +74,7 @@ class TestBase:
|
||||||
if self.has_iso10646:
|
if self.has_iso10646:
|
||||||
return
|
return
|
||||||
|
|
||||||
from htmlentitydefs import codepoint2name
|
from html.entities import codepoint2name
|
||||||
|
|
||||||
def xmlcharnamereplace(exc):
|
def xmlcharnamereplace(exc):
|
||||||
if not isinstance(exc, UnicodeEncodeError):
|
if not isinstance(exc, UnicodeEncodeError):
|
||||||
|
|
|
@ -48,7 +48,7 @@ class TestUntestedModules(unittest.TestCase):
|
||||||
import encodings
|
import encodings
|
||||||
import formatter
|
import formatter
|
||||||
import getpass
|
import getpass
|
||||||
import htmlentitydefs
|
import html.entities
|
||||||
import imghdr
|
import imghdr
|
||||||
import keyword
|
import keyword
|
||||||
import linecache
|
import linecache
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue