mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			198 lines
		
	
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			198 lines
		
	
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #
 | |
| # ElementTree
 | |
| # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
 | |
| #
 | |
| # limited xpath support for element trees
 | |
| #
 | |
| # history:
 | |
| # 2003-05-23 fl   created
 | |
| # 2003-05-28 fl   added support for // etc
 | |
| # 2003-08-27 fl   fixed parsing of periods in element names
 | |
| #
 | |
| # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved.
 | |
| #
 | |
| # fredrik@pythonware.com
 | |
| # http://www.pythonware.com
 | |
| #
 | |
| # --------------------------------------------------------------------
 | |
| # The ElementTree toolkit is
 | |
| #
 | |
| # Copyright (c) 1999-2004 by Fredrik Lundh
 | |
| #
 | |
| # By obtaining, using, and/or copying this software and/or its
 | |
| # associated documentation, you agree that you have read, understood,
 | |
| # and will comply with the following terms and conditions:
 | |
| #
 | |
| # Permission to use, copy, modify, and distribute this software and
 | |
| # its associated documentation for any purpose and without fee is
 | |
| # hereby granted, provided that the above copyright notice appears in
 | |
| # all copies, and that both that copyright notice and this permission
 | |
| # notice appear in supporting documentation, and that the name of
 | |
| # Secret Labs AB or the author not be used in advertising or publicity
 | |
| # pertaining to distribution of the software without specific, written
 | |
| # prior permission.
 | |
| #
 | |
| # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
 | |
| # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
 | |
| # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
 | |
| # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
 | |
| # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 | |
| # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 | |
| # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 | |
| # OF THIS SOFTWARE.
 | |
| # --------------------------------------------------------------------
 | |
| 
 | |
| # Licensed to PSF under a Contributor Agreement.
 | |
| # See http://www.python.org/2.4/license for licensing details.
 | |
| 
 | |
| ##
 | |
| # Implementation module for XPath support.  There's usually no reason
 | |
| # to import this module directly; the <b>ElementTree</b> does this for
 | |
| # you, if needed.
 | |
| ##
 | |
| 
 | |
| import re
 | |
| 
 | |
| xpath_tokenizer = re.compile(
 | |
|     "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
 | |
|     ).findall
 | |
| 
 | |
| class xpath_descendant_or_self:
 | |
|     pass
 | |
| 
 | |
| ##
 | |
| # Wrapper for a compiled XPath.
 | |
| 
 | |
| class Path:
 | |
| 
 | |
|     ##
 | |
|     # Create an Path instance from an XPath expression.
 | |
| 
 | |
|     def __init__(self, path):
 | |
|         tokens = xpath_tokenizer(path)
 | |
|         # the current version supports 'path/path'-style expressions only
 | |
|         self.path = []
 | |
|         self.tag = None
 | |
|         if tokens and tokens[0][0] == "/":
 | |
|             raise SyntaxError("cannot use absolute path on element")
 | |
|         while tokens:
 | |
|             op, tag = tokens.pop(0)
 | |
|             if tag or op == "*":
 | |
|                 self.path.append(tag or op)
 | |
|             elif op == ".":
 | |
|                 pass
 | |
|             elif op == "/":
 | |
|                 self.path.append(xpath_descendant_or_self())
 | |
|                 continue
 | |
|             else:
 | |
|                 raise SyntaxError("unsupported path syntax (%s)" % op)
 | |
|             if tokens:
 | |
|                 op, tag = tokens.pop(0)
 | |
|                 if op != "/":
 | |
|                     raise SyntaxError(
 | |
|                         "expected path separator (%s)" % (op or tag)
 | |
|                         )
 | |
|         if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
 | |
|             raise SyntaxError("path cannot end with //")
 | |
|         if len(self.path) == 1 and isinstance(self.path[0], type("")):
 | |
|             self.tag = self.path[0]
 | |
| 
 | |
|     ##
 | |
|     # Find first matching object.
 | |
| 
 | |
|     def find(self, element):
 | |
|         tag = self.tag
 | |
|         if tag is None:
 | |
|             nodeset = self.findall(element)
 | |
|             if not nodeset:
 | |
|                 return None
 | |
|             return nodeset[0]
 | |
|         for elem in element:
 | |
|             if elem.tag == tag:
 | |
|                 return elem
 | |
|         return None
 | |
| 
 | |
|     ##
 | |
|     # Find text for first matching object.
 | |
| 
 | |
|     def findtext(self, element, default=None):
 | |
|         tag = self.tag
 | |
|         if tag is None:
 | |
|             nodeset = self.findall(element)
 | |
|             if not nodeset:
 | |
|                 return default
 | |
|             return nodeset[0].text or ""
 | |
|         for elem in element:
 | |
|             if elem.tag == tag:
 | |
|                 return elem.text or ""
 | |
|         return default
 | |
| 
 | |
|     ##
 | |
|     # Find all matching objects.
 | |
| 
 | |
|     def findall(self, element):
 | |
|         nodeset = [element]
 | |
|         index = 0
 | |
|         while 1:
 | |
|             try:
 | |
|                 path = self.path[index]
 | |
|                 index = index + 1
 | |
|             except IndexError:
 | |
|                 return nodeset
 | |
|             set = []
 | |
|             if isinstance(path, xpath_descendant_or_self):
 | |
|                 try:
 | |
|                     tag = self.path[index]
 | |
|                     if not isinstance(tag, type("")):
 | |
|                         tag = None
 | |
|                     else:
 | |
|                         index = index + 1
 | |
|                 except IndexError:
 | |
|                     tag = None # invalid path
 | |
|                 for node in nodeset:
 | |
|                     new = list(node.getiterator(tag))
 | |
|                     if new and new[0] is node:
 | |
|                         set.extend(new[1:])
 | |
|                     else:
 | |
|                         set.extend(new)
 | |
|             else:
 | |
|                 for node in nodeset:
 | |
|                     for node in node:
 | |
|                         if path == "*" or node.tag == path:
 | |
|                             set.append(node)
 | |
|             if not set:
 | |
|                 return []
 | |
|             nodeset = set
 | |
| 
 | |
| _cache = {}
 | |
| 
 | |
| ##
 | |
| # (Internal) Compile path.
 | |
| 
 | |
| def _compile(path):
 | |
|     p = _cache.get(path)
 | |
|     if p is not None:
 | |
|         return p
 | |
|     p = Path(path)
 | |
|     if len(_cache) >= 100:
 | |
|         _cache.clear()
 | |
|     _cache[path] = p
 | |
|     return p
 | |
| 
 | |
| ##
 | |
| # Find first matching object.
 | |
| 
 | |
| def find(element, path):
 | |
|     return _compile(path).find(element)
 | |
| 
 | |
| ##
 | |
| # Find text for first matching object.
 | |
| 
 | |
| def findtext(element, path, default=None):
 | |
|     return _compile(path).findtext(element, default)
 | |
| 
 | |
| ##
 | |
| # Find all matching objects.
 | |
| 
 | |
| def findall(element, path):
 | |
|     return _compile(path).findall(element)
 | 
