Issue #6472: The xml.etree package is updated to ElementTree 1.3. The cElementTree module is updated too.

This commit is contained in:
Florent Xicluna 2010-03-11 14:36:19 +00:00
parent 4478662f83
commit 3e8c189faa
11 changed files with 3323 additions and 1207 deletions

View file

@ -1,6 +1,6 @@
#
# ElementTree
# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
#
# limited xinclude support for element trees
#
@ -16,7 +16,7 @@
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2004 by Fredrik Lundh
# Copyright (c) 1999-2008 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
@ -42,14 +42,14 @@
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/2.4/license for licensing details.
# See http://www.python.org/psf/license for licensing details.
##
# Limited XInclude support for the ElementTree package.
##
import copy
import ElementTree
from . import ElementTree
XINCLUDE = "{http://www.w3.org/2001/XInclude}"

View file

@ -1,6 +1,6 @@
#
# ElementTree
# $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
#
# limited xpath support for element trees
#
@ -8,8 +8,13 @@
# 2003-05-23 fl created
# 2003-05-28 fl added support for // etc
# 2003-08-27 fl fixed parsing of periods in element names
# 2007-09-10 fl new selection engine
# 2007-09-12 fl fixed parent selector
# 2007-09-13 fl added iterfind; changed findall to return a list
# 2007-11-30 fl added namespaces support
# 2009-10-30 fl added child element value filter
#
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
#
# fredrik@pythonware.com
# http://www.pythonware.com
@ -17,7 +22,7 @@
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2004 by Fredrik Lundh
# Copyright (c) 1999-2009 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
@ -43,7 +48,7 @@
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/2.4/license for licensing details.
# See http://www.python.org/psf/license for licensing details.
##
# Implementation module for XPath support. There's usually no reason
@ -53,146 +58,246 @@
import re
xpath_tokenizer = re.compile(
"(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
).findall
xpath_tokenizer_re = re.compile(
"("
"'[^']*'|\"[^\"]*\"|"
"::|"
"//?|"
"\.\.|"
"\(\)|"
"[/.*:\[\]\(\)@=])|"
"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
"\s+"
)
class xpath_descendant_or_self:
pass
##
# Wrapper for a compiled XPath.
class Path:
##
# Create an Path instance from an XPath expression.
def __init__(self, path):
tokens = xpath_tokenizer(path)
# the current version supports 'path/path'-style expressions only
self.path = []
self.tag = None
if tokens and tokens[0][0] == "/":
raise SyntaxError("cannot use absolute path on element")
while tokens:
op, tag = tokens.pop(0)
if tag or op == "*":
self.path.append(tag or op)
elif op == ".":
pass
elif op == "/":
self.path.append(xpath_descendant_or_self())
continue
else:
raise SyntaxError("unsupported path syntax (%s)" % op)
if tokens:
op, tag = tokens.pop(0)
if op != "/":
raise SyntaxError(
"expected path separator (%s)" % (op or tag)
)
if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
raise SyntaxError("path cannot end with //")
if len(self.path) == 1 and isinstance(self.path[0], type("")):
self.tag = self.path[0]
##
# Find first matching object.
def find(self, element):
tag = self.tag
if tag is None:
nodeset = self.findall(element)
if not nodeset:
return None
return nodeset[0]
for elem in element:
if elem.tag == tag:
return elem
return None
##
# Find text for first matching object.
def findtext(self, element, default=None):
tag = self.tag
if tag is None:
nodeset = self.findall(element)
if not nodeset:
return default
return nodeset[0].text or ""
for elem in element:
if elem.tag == tag:
return elem.text or ""
return default
##
# Find all matching objects.
def findall(self, element):
nodeset = [element]
index = 0
while 1:
def xpath_tokenizer(pattern, namespaces=None):
for token in xpath_tokenizer_re.findall(pattern):
tag = token[1]
if tag and tag[0] != "{" and ":" in tag:
try:
path = self.path[index]
index = index + 1
except IndexError:
return nodeset
set = []
if isinstance(path, xpath_descendant_or_self):
prefix, uri = tag.split(":", 1)
if not namespaces:
raise KeyError
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix)
else:
yield token
def get_parent_map(context):
parent_map = context.parent_map
if parent_map is None:
context.parent_map = parent_map = {}
for p in context.root.iter():
for e in p:
parent_map[e] = p
return parent_map
def prepare_child(next, token):
tag = token[1]
def select(context, result):
for elem in result:
for e in elem:
if e.tag == tag:
yield e
return select
def prepare_star(next, token):
def select(context, result):
for elem in result:
for e in elem:
yield e
return select
def prepare_self(next, token):
def select(context, result):
for elem in result:
yield elem
return select
def prepare_descendant(next, token):
token = next()
if token[0] == "*":
tag = "*"
elif not token[0]:
tag = token[1]
else:
raise SyntaxError("invalid descendant")
def select(context, result):
for elem in result:
for e in elem.iter(tag):
if e is not elem:
yield e
return select
def prepare_parent(next, token):
def select(context, result):
# FIXME: raise error if .. is applied at toplevel?
parent_map = get_parent_map(context)
result_map = {}
for elem in result:
if elem in parent_map:
parent = parent_map[elem]
if parent not in result_map:
result_map[parent] = None
yield parent
return select
def prepare_predicate(next, token):
# FIXME: replace with real parser!!! refs:
# http://effbot.org/zone/simple-iterator-parser.htm
# http://javascript.crockford.com/tdop/tdop.html
signature = []
predicate = []
while 1:
token = next()
if token[0] == "]":
break
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
predicate.append(token[1])
signature = "".join(signature)
# use signature to determine predicate type
if signature == "@-":
# [@attribute] predicate
key = predicate[1]
def select(context, result):
for elem in result:
if elem.get(key) is not None:
yield elem
return select
if signature == "@-='":
# [@attribute='value']
key = predicate[1]
value = predicate[-1]
def select(context, result):
for elem in result:
if elem.get(key) == value:
yield elem
return select
if signature == "-" and not re.match("\d+$", predicate[0]):
# [tag]
tag = predicate[0]
def select(context, result):
for elem in result:
if elem.find(tag) is not None:
yield elem
return select
if signature == "-='" and not re.match("\d+$", predicate[0]):
# [tag='value']
tag = predicate[0]
value = predicate[-1]
def select(context, result):
for elem in result:
for e in elem.findall(tag):
if "".join(e.itertext()) == value:
yield elem
break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
if signature == "-":
index = int(predicate[0]) - 1
else:
if predicate[0] != "last":
raise SyntaxError("unsupported function")
if signature == "-()-":
try:
tag = self.path[index]
if not isinstance(tag, type("")):
tag = None
else:
index = index + 1
except IndexError:
tag = None # invalid path
for node in nodeset:
new = list(node.getiterator(tag))
if new and new[0] is node:
set.extend(new[1:])
else:
set.extend(new)
index = int(predicate[2]) - 1
except ValueError:
raise SyntaxError("unsupported expression")
else:
for node in nodeset:
for node in node:
if path == "*" or node.tag == path:
set.append(node)
if not set:
return []
nodeset = set
index = -1
def select(context, result):
parent_map = get_parent_map(context)
for elem in result:
try:
parent = parent_map[elem]
# FIXME: what if the selector is "*" ?
elems = list(parent.findall(elem.tag))
if elems[index] is elem:
yield elem
except (IndexError, KeyError):
pass
return select
raise SyntaxError("invalid predicate")
ops = {
"": prepare_child,
"*": prepare_star,
".": prepare_self,
"..": prepare_parent,
"//": prepare_descendant,
"[": prepare_predicate,
}
_cache = {}
##
# (Internal) Compile path.
class _SelectorContext:
parent_map = None
def __init__(self, root):
self.root = root
def _compile(path):
p = _cache.get(path)
if p is not None:
return p
p = Path(path)
if len(_cache) >= 100:
_cache.clear()
_cache[path] = p
return p
# --------------------------------------------------------------------
##
# Generate all matching objects.
def iterfind(elem, path, namespaces=None):
# compile selector pattern
if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?)
try:
selector = _cache[path]
except KeyError:
if len(_cache) > 100:
_cache.clear()
if path[:1] == "/":
raise SyntaxError("cannot use absolute path on element")
next = iter(xpath_tokenizer(path, namespaces)).next
token = next()
selector = []
while 1:
try:
selector.append(ops[token[0]](next, token))
except StopIteration:
raise SyntaxError("invalid path")
try:
token = next()
if token[0] == "/":
token = next()
except StopIteration:
break
_cache[path] = selector
# execute selector pattern
result = [elem]
context = _SelectorContext(elem)
for select in selector:
result = select(context, result)
return result
##
# Find first matching object.
def find(element, path):
return _compile(path).find(element)
##
# Find text for first matching object.
def findtext(element, path, default=None):
return _compile(path).findtext(element, default)
def find(elem, path, namespaces=None):
try:
return iterfind(elem, path, namespaces).next()
except StopIteration:
return None
##
# Find all matching objects.
def findall(element, path):
return _compile(path).findall(element)
def findall(elem, path, namespaces=None):
return list(iterfind(elem, path, namespaces))
##
# Find text for first matching object.
def findtext(elem, path, default=None, namespaces=None):
try:
elem = iterfind(elem, path, namespaces).next()
return elem.text or ""
except StopIteration:
return default

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,10 @@
# $Id: __init__.py 1821 2004-06-03 16:57:49Z fredrik $
# $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $
# elementtree package
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2004 by Fredrik Lundh
# Copyright (c) 1999-2008 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
@ -30,4 +30,4 @@
# --------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/2.4/license for licensing details.
# See http://www.python.org/psf/license for licensing details.