mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Whitespace normalization.
This commit is contained in:
parent
2344fae6d0
commit
0c9886d589
7 changed files with 488 additions and 488 deletions
200
Lib/regsub.py
200
Lib/regsub.py
|
@ -12,7 +12,7 @@ splitx(str, pat, maxsplit): split string using pattern as delimiter plus
|
|||
|
||||
import warnings
|
||||
warnings.warn("the regsub module is deprecated; please use re.sub()",
|
||||
DeprecationWarning)
|
||||
DeprecationWarning)
|
||||
|
||||
# Ignore further deprecation warnings about this module
|
||||
warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
|
||||
|
@ -27,12 +27,12 @@ import regex
|
|||
# compiled pattern.
|
||||
|
||||
def sub(pat, repl, str):
|
||||
prog = compile(pat)
|
||||
if prog.search(str) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
str = str[:a] + expand(repl, regs, str) + str[b:]
|
||||
return str
|
||||
prog = compile(pat)
|
||||
if prog.search(str) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
str = str[:a] + expand(repl, regs, str) + str[b:]
|
||||
return str
|
||||
|
||||
|
||||
# Replace all (non-overlapping) occurrences of pattern pat in string
|
||||
|
@ -41,23 +41,23 @@ def sub(pat, repl, str):
|
|||
# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
|
||||
|
||||
def gsub(pat, repl, str):
|
||||
prog = compile(pat)
|
||||
new = ''
|
||||
start = 0
|
||||
first = 1
|
||||
while prog.search(str, start) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b == start and not first:
|
||||
if start >= len(str) or prog.search(str, start+1) < 0:
|
||||
break
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
new = new + str[start:a] + expand(repl, regs, str)
|
||||
start = b
|
||||
first = 0
|
||||
new = new + str[start:]
|
||||
return new
|
||||
prog = compile(pat)
|
||||
new = ''
|
||||
start = 0
|
||||
first = 1
|
||||
while prog.search(str, start) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b == start and not first:
|
||||
if start >= len(str) or prog.search(str, start+1) < 0:
|
||||
break
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
new = new + str[start:a] + expand(repl, regs, str)
|
||||
start = b
|
||||
first = 0
|
||||
new = new + str[start:]
|
||||
return new
|
||||
|
||||
|
||||
# Split string str in fields separated by delimiters matching pattern
|
||||
|
@ -66,7 +66,7 @@ def gsub(pat, repl, str):
|
|||
# The optional 3rd argument sets the number of splits that are performed.
|
||||
|
||||
def split(str, pat, maxsplit = 0):
|
||||
return intsplit(str, pat, maxsplit, 0)
|
||||
return intsplit(str, pat, maxsplit, 0)
|
||||
|
||||
# Split string str in fields separated by delimiters matching pattern
|
||||
# pat. Only non-empty matches for the pattern are considered, so e.g.
|
||||
|
@ -76,42 +76,42 @@ def split(str, pat, maxsplit = 0):
|
|||
|
||||
|
||||
def splitx(str, pat, maxsplit = 0):
|
||||
return intsplit(str, pat, maxsplit, 1)
|
||||
|
||||
return intsplit(str, pat, maxsplit, 1)
|
||||
|
||||
# Internal function used to implement split() and splitx().
|
||||
|
||||
def intsplit(str, pat, maxsplit, retain):
|
||||
prog = compile(pat)
|
||||
res = []
|
||||
start = next = 0
|
||||
splitcount = 0
|
||||
while prog.search(str, next) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b:
|
||||
next = next + 1
|
||||
if next >= len(str):
|
||||
break
|
||||
else:
|
||||
res.append(str[start:a])
|
||||
if retain:
|
||||
res.append(str[a:b])
|
||||
start = next = b
|
||||
splitcount = splitcount + 1
|
||||
if (maxsplit and (splitcount >= maxsplit)):
|
||||
break
|
||||
res.append(str[start:])
|
||||
return res
|
||||
prog = compile(pat)
|
||||
res = []
|
||||
start = next = 0
|
||||
splitcount = 0
|
||||
while prog.search(str, next) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b:
|
||||
next = next + 1
|
||||
if next >= len(str):
|
||||
break
|
||||
else:
|
||||
res.append(str[start:a])
|
||||
if retain:
|
||||
res.append(str[a:b])
|
||||
start = next = b
|
||||
splitcount = splitcount + 1
|
||||
if (maxsplit and (splitcount >= maxsplit)):
|
||||
break
|
||||
res.append(str[start:])
|
||||
return res
|
||||
|
||||
|
||||
# Capitalize words split using a pattern
|
||||
|
||||
def capwords(str, pat='[^a-zA-Z0-9_]+'):
|
||||
import string
|
||||
words = splitx(str, pat)
|
||||
for i in range(0, len(words), 2):
|
||||
words[i] = string.capitalize(words[i])
|
||||
return string.joinfields(words, "")
|
||||
import string
|
||||
words = splitx(str, pat)
|
||||
for i in range(0, len(words), 2):
|
||||
words[i] = string.capitalize(words[i])
|
||||
return string.joinfields(words, "")
|
||||
|
||||
|
||||
# Internal subroutines:
|
||||
|
@ -131,19 +131,19 @@ def capwords(str, pat='[^a-zA-Z0-9_]+'):
|
|||
cache = {}
|
||||
|
||||
def compile(pat):
|
||||
if type(pat) != type(''):
|
||||
return pat # Assume it is a compiled regex
|
||||
key = (pat, regex.get_syntax())
|
||||
if cache.has_key(key):
|
||||
prog = cache[key] # Get it from the cache
|
||||
else:
|
||||
prog = cache[key] = regex.compile(pat)
|
||||
return prog
|
||||
if type(pat) != type(''):
|
||||
return pat # Assume it is a compiled regex
|
||||
key = (pat, regex.get_syntax())
|
||||
if cache.has_key(key):
|
||||
prog = cache[key] # Get it from the cache
|
||||
else:
|
||||
prog = cache[key] = regex.compile(pat)
|
||||
return prog
|
||||
|
||||
|
||||
def clear_cache():
|
||||
global cache
|
||||
cache = {}
|
||||
global cache
|
||||
cache = {}
|
||||
|
||||
|
||||
# Expand \digit in the replacement.
|
||||
|
@ -153,46 +153,46 @@ def clear_cache():
|
|||
# the \ and the following character are both copied).
|
||||
|
||||
def expand(repl, regs, str):
|
||||
if '\\' not in repl:
|
||||
return repl
|
||||
new = ''
|
||||
i = 0
|
||||
ord0 = ord('0')
|
||||
while i < len(repl):
|
||||
c = repl[i]; i = i+1
|
||||
if c != '\\' or i >= len(repl):
|
||||
new = new + c
|
||||
else:
|
||||
c = repl[i]; i = i+1
|
||||
if '0' <= c <= '9':
|
||||
a, b = regs[ord(c)-ord0]
|
||||
new = new + str[a:b]
|
||||
elif c == '\\':
|
||||
new = new + c
|
||||
else:
|
||||
new = new + '\\' + c
|
||||
return new
|
||||
if '\\' not in repl:
|
||||
return repl
|
||||
new = ''
|
||||
i = 0
|
||||
ord0 = ord('0')
|
||||
while i < len(repl):
|
||||
c = repl[i]; i = i+1
|
||||
if c != '\\' or i >= len(repl):
|
||||
new = new + c
|
||||
else:
|
||||
c = repl[i]; i = i+1
|
||||
if '0' <= c <= '9':
|
||||
a, b = regs[ord(c)-ord0]
|
||||
new = new + str[a:b]
|
||||
elif c == '\\':
|
||||
new = new + c
|
||||
else:
|
||||
new = new + '\\' + c
|
||||
return new
|
||||
|
||||
|
||||
# Test program, reads sequences "pat repl str" from stdin.
|
||||
# Optional argument specifies pattern used to split lines.
|
||||
|
||||
def test():
|
||||
import sys
|
||||
if sys.argv[1:]:
|
||||
delpat = sys.argv[1]
|
||||
else:
|
||||
delpat = '[ \t\n]+'
|
||||
while 1:
|
||||
if sys.stdin.isatty(): sys.stderr.write('--> ')
|
||||
line = sys.stdin.readline()
|
||||
if not line: break
|
||||
if line[-1] == '\n': line = line[:-1]
|
||||
fields = split(line, delpat)
|
||||
if len(fields) != 3:
|
||||
print 'Sorry, not three fields'
|
||||
print 'split:', `fields`
|
||||
continue
|
||||
[pat, repl, str] = split(line, delpat)
|
||||
print 'sub :', `sub(pat, repl, str)`
|
||||
print 'gsub:', `gsub(pat, repl, str)`
|
||||
import sys
|
||||
if sys.argv[1:]:
|
||||
delpat = sys.argv[1]
|
||||
else:
|
||||
delpat = '[ \t\n]+'
|
||||
while 1:
|
||||
if sys.stdin.isatty(): sys.stderr.write('--> ')
|
||||
line = sys.stdin.readline()
|
||||
if not line: break
|
||||
if line[-1] == '\n': line = line[:-1]
|
||||
fields = split(line, delpat)
|
||||
if len(fields) != 3:
|
||||
print 'Sorry, not three fields'
|
||||
print 'split:', `fields`
|
||||
continue
|
||||
[pat, repl, str] = split(line, delpat)
|
||||
print 'sub :', `sub(pat, repl, str)`
|
||||
print 'gsub:', `gsub(pat, repl, str)`
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue