mirror of
https://github.com/python/cpython.git
synced 2025-08-30 13:38:43 +00:00
Checking in AMK's latest installement.
This commit is contained in:
parent
c386107838
commit
5bc5b14f6d
1 changed files with 0 additions and 193 deletions
193
Lib/re.py
193
Lib/re.py
|
@ -7,9 +7,6 @@ import sys
|
|||
import string
|
||||
from pcre import *
|
||||
|
||||
[ NORMAL, CHARCLASS, REPLACEMENT ] = range(3)
|
||||
[ CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER ] = range(9)
|
||||
|
||||
#
|
||||
# First, the public part of the interface:
|
||||
#
|
||||
|
@ -231,199 +228,9 @@ def escape(pattern):
|
|||
result.append(char)
|
||||
return string.join(result, '')
|
||||
|
||||
_idprog = None
|
||||
def valid_identifier(id):
|
||||
global _idprog
|
||||
if not _idprog:
|
||||
_idprog = compile(r"[a-zA-Z_]\w*$")
|
||||
if _idprog.match(id):
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def compile(pattern, flags=0):
|
||||
groupindex={}
|
||||
code=pcre_compile(pattern, flags, groupindex)
|
||||
return RegexObject(pattern, flags, code, groupindex)
|
||||
|
||||
def _expand(m, repl):
|
||||
results = []
|
||||
index = 0
|
||||
size = len(repl)
|
||||
while index < size:
|
||||
found = string.find(repl, '\\', index)
|
||||
if found < 0:
|
||||
results.append(repl[index:])
|
||||
break
|
||||
if found > index:
|
||||
results.append(repl[index:found])
|
||||
escape_type, value, index = _expand_escape(repl, found+1, REPLACEMENT)
|
||||
if escape_type == CHAR:
|
||||
results.append(value)
|
||||
elif escape_type == MEMORY_REFERENCE:
|
||||
r = m.group(value)
|
||||
if r is None:
|
||||
raise error, ('group "' + str(value) + '" did not contribute '
|
||||
'to the match')
|
||||
results.append(m.group(value))
|
||||
else:
|
||||
raise error, "bad escape in replacement"
|
||||
return string.join(results, '')
|
||||
|
||||
def _expand_escape(pattern, index, context=NORMAL):
|
||||
if index >= len(pattern):
|
||||
raise error, 'escape ends too soon'
|
||||
|
||||
elif pattern[index] == 't':
|
||||
return CHAR, chr(9), index + 1
|
||||
|
||||
elif pattern[index] == 'n':
|
||||
return CHAR, chr(10), index + 1
|
||||
|
||||
elif pattern[index] == 'v':
|
||||
return CHAR, chr(11), index + 1
|
||||
|
||||
elif pattern[index] == 'r':
|
||||
return CHAR, chr(13), index + 1
|
||||
|
||||
elif pattern[index] == 'f':
|
||||
return CHAR, chr(12), index + 1
|
||||
|
||||
elif pattern[index] == 'a':
|
||||
return CHAR, chr(7), index + 1
|
||||
|
||||
elif pattern[index] == 'x':
|
||||
# CAUTION: this is the Python rule, not the Perl rule!
|
||||
end = index + 1 # Skip over the 'x' character
|
||||
while (end < len(pattern)) and (pattern[end] in string.hexdigits):
|
||||
end = end + 1
|
||||
if end == index:
|
||||
raise error, "\\x must be followed by hex digit(s)"
|
||||
# let Python evaluate it, so we don't incorrectly 2nd-guess
|
||||
# what it's doing (and Python in turn passes it on to sscanf,
|
||||
# so that *it* doesn't incorrectly 2nd-guess what C does!)
|
||||
char = eval ('"' + pattern[index-1:end] + '"')
|
||||
# assert len(char) == 1
|
||||
return CHAR, char, end
|
||||
|
||||
elif pattern[index] == 'b':
|
||||
if context != NORMAL:
|
||||
return CHAR, chr(8), index + 1
|
||||
else:
|
||||
return WORD_BOUNDARY, '', index + 1
|
||||
|
||||
elif pattern[index] == 'B':
|
||||
if context != NORMAL:
|
||||
return CHAR, 'B', index + 1
|
||||
else:
|
||||
return NOT_WORD_BOUNDARY, '', index + 1
|
||||
|
||||
elif pattern[index] == 'A':
|
||||
if context != NORMAL:
|
||||
return CHAR, 'A', index + 1
|
||||
else:
|
||||
return BEGINNING_OF_BUFFER, '', index + 1
|
||||
|
||||
elif pattern[index] == 'Z':
|
||||
if context != NORMAL:
|
||||
return CHAR, 'Z', index + 1
|
||||
else:
|
||||
return END_OF_BUFFER, '', index + 1
|
||||
|
||||
elif pattern[index] in 'GluLUQE':
|
||||
raise error, ('\\' + pattern[index] + ' is not allowed')
|
||||
|
||||
elif pattern[index] == 'w':
|
||||
return CHAR, 'w', index + 1
|
||||
|
||||
elif pattern[index] == 'W':
|
||||
return CHAR, 'W', index + 1
|
||||
|
||||
elif pattern[index] == 's':
|
||||
return CHAR, 's', index + 1
|
||||
|
||||
elif pattern[index] == 'S':
|
||||
return CHAR, 'S', index + 1
|
||||
|
||||
elif pattern[index] == 'd':
|
||||
return CHAR, 'd', index + 1
|
||||
|
||||
elif pattern[index] == 'D':
|
||||
return CHAR, 'D', index + 1
|
||||
|
||||
elif pattern[index] in '0123456789':
|
||||
|
||||
if pattern[index] == '0':
|
||||
if (index + 1 < len(pattern)) and \
|
||||
(pattern[index + 1] in string.octdigits):
|
||||
if (index + 2 < len(pattern)) and \
|
||||
(pattern[index + 2] in string.octdigits):
|
||||
value = string.atoi(pattern[index:index + 3], 8)
|
||||
index = index + 3
|
||||
|
||||
else:
|
||||
value = string.atoi(pattern[index:index + 2], 8)
|
||||
index = index + 2
|
||||
|
||||
else:
|
||||
value = 0
|
||||
index = index + 1
|
||||
|
||||
if value > 255:
|
||||
raise error, 'octal value out of range'
|
||||
|
||||
return CHAR, chr(value), index
|
||||
|
||||
else:
|
||||
if (index + 1 < len(pattern)) and \
|
||||
(pattern[index + 1] in string.digits):
|
||||
if (index + 2 < len(pattern)) and \
|
||||
(pattern[index + 2] in string.octdigits) and \
|
||||
(pattern[index + 1] in string.octdigits) and \
|
||||
(pattern[index] in string.octdigits):
|
||||
value = string.atoi(pattern[index:index + 3], 8)
|
||||
if value > 255:
|
||||
raise error, 'octal value out of range'
|
||||
|
||||
return CHAR, chr(value), index + 3
|
||||
|
||||
else:
|
||||
value = string.atoi(pattern[index:index + 2])
|
||||
if (value < 1) or (value > 99):
|
||||
raise error, 'memory reference out of range'
|
||||
|
||||
if context == CHARCLASS:
|
||||
raise error, ('cannot reference a register from '
|
||||
'inside a character class')
|
||||
return MEMORY_REFERENCE, value, index + 2
|
||||
|
||||
else:
|
||||
if context == CHARCLASS:
|
||||
raise error, ('cannot reference a register from '
|
||||
'inside a character class')
|
||||
|
||||
value = string.atoi(pattern[index])
|
||||
return MEMORY_REFERENCE, value, index + 1
|
||||
|
||||
elif pattern[index] == 'g':
|
||||
if context != REPLACEMENT:
|
||||
return CHAR, 'g', index + 1
|
||||
|
||||
index = index + 1
|
||||
if index >= len(pattern):
|
||||
raise error, 'unfinished symbolic reference'
|
||||
if pattern[index] != '<':
|
||||
raise error, 'missing < in symbolic reference'
|
||||
|
||||
index = index + 1
|
||||
end = string.find(pattern, '>', index)
|
||||
if end == -1:
|
||||
raise error, 'unfinished symbolic reference'
|
||||
value = pattern[index:end]
|
||||
if not valid_identifier(value):
|
||||
raise error, 'illegal symbolic reference'
|
||||
return MEMORY_REFERENCE, value, end + 1
|
||||
|
||||
else:
|
||||
return CHAR, pattern[index], index + 1
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue