mirror of
https://github.com/python/cpython.git
synced 2025-08-03 08:34:29 +00:00
New module regsub contains sub(), gsub() and split() as in nawk.
string.splitfields(s, '') now returns [s] as split() in nawk. Added _exit to exported functions of os.
This commit is contained in:
parent
7066dd75c5
commit
7a461e5aaf
4 changed files with 152 additions and 6 deletions
|
@ -18,6 +18,7 @@
|
|||
|
||||
try:
|
||||
from posix import *
|
||||
from posix import _exit
|
||||
name = 'posix'
|
||||
curdir = '.'
|
||||
pardir = '..'
|
||||
|
|
147
Lib/regsub.py
Normal file
147
Lib/regsub.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
# Regular expression subroutines:
|
||||
# sub(pat, repl, str): replace first occurrence of pattern in string
|
||||
# gsub(pat, repl, str): replace all occurrences of pattern in string
|
||||
# split(str, pat): split string using pattern as delimiter
|
||||
|
||||
|
||||
import regex
|
||||
|
||||
|
||||
# Replace first occurrence of pattern pat in string str by replacement
|
||||
# repl. If the pattern isn't found, the string is returned unchanged.
|
||||
# The replacement may contain references \digit to subpatterns and
|
||||
# escaped backslashes. The pattern may be a string or an already
|
||||
# compiled pattern.
|
||||
|
||||
def sub(pat, repl, str):
|
||||
prog = compile(pat)
|
||||
if prog.search(str) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
str = str[:a] + expand(repl, regs, str) + str[b:]
|
||||
return str
|
||||
|
||||
|
||||
# Replace all (non-overlapping) occurrences of pattern pat in string
|
||||
# str by replacement repl. The same rules as for sub() apply.
|
||||
# Empty matches for the pattern are replaced only when not adjacent to
|
||||
# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
|
||||
|
||||
def gsub(pat, repl, str):
|
||||
prog = compile(pat)
|
||||
new = ''
|
||||
start = 0
|
||||
first = 1
|
||||
while prog.search(str, start) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b == start and not first:
|
||||
if start >= len(str) or prog.search(str, start+1) < 0:
|
||||
break
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
new = new + str[start:a] + expand(repl, regs, str)
|
||||
start = b
|
||||
first = 0
|
||||
new = new + str[start:]
|
||||
return new
|
||||
|
||||
|
||||
# Split string str in fields separated by delimiters matching pattern
|
||||
# pat. Only non-empty matches for the pattern are considered, so e.g.
|
||||
# split('abc', '') returns ['abc'].
|
||||
|
||||
def split(str, pat):
|
||||
prog = compile(pat)
|
||||
res = []
|
||||
start = next = 0
|
||||
while prog.search(str, next) >= 0:
|
||||
regs = prog.regs
|
||||
a, b = regs[0]
|
||||
if a == b:
|
||||
next = next + 1
|
||||
if next >= len(str):
|
||||
break
|
||||
else:
|
||||
res.append(str[start:a])
|
||||
start = next = b
|
||||
res.append(str[start:])
|
||||
return res
|
||||
|
||||
|
||||
# Internal subroutines:
|
||||
# compile(pat): compile a pattern, caching already compiled patterns
|
||||
# expand(repl, regs, str): expand \digit escapes in replacement string
|
||||
|
||||
|
||||
# Manage a cache of compiled regular expressions.
|
||||
# If the pattern is a string a compiled version of it is returned.
|
||||
# If the pattern has been used before we return an already compiled
|
||||
# version from the cache; otherwise we compile it now and save the
|
||||
# compiled version in the cache.
|
||||
# Instead of a string, a compiled regular expression can also be
|
||||
# passed.
|
||||
# WARNING: if the pattern syntax is changed, the cache should be
|
||||
# flushed!
|
||||
|
||||
cache = {}
|
||||
|
||||
def compile(pat):
|
||||
if type(pat) <> type(''):
|
||||
return pat # Assume it is a compiled regex
|
||||
if cache.has_key(pat):
|
||||
prog = cache[pat] # Get it from the cache
|
||||
else:
|
||||
prog = cache[pat] = regex.compile(pat)
|
||||
return prog
|
||||
|
||||
|
||||
# Expand \digit in the replacement.
|
||||
# Each occurrence of \digit is replaced by the substring of str
|
||||
# indicated by regs[digit]. To include a literal \ in the
|
||||
# replacement, double it; other \ escapes are left unchanged (i.e.
|
||||
# the \ and the following character are both copied).
|
||||
|
||||
def expand(repl, regs, str):
|
||||
if '\\' not in repl:
|
||||
return repl
|
||||
new = ''
|
||||
i = 0
|
||||
while i < len(repl):
|
||||
c = repl[i]; i = i+1
|
||||
if c <> '\\' or i >= len(repl):
|
||||
new = new + c
|
||||
else:
|
||||
c = repl[i]; i = i+1
|
||||
if '0' <= c <= '9':
|
||||
a, b = regs[eval(c)]
|
||||
new = new + str[a:b]
|
||||
elif c == '\\':
|
||||
new = new + c
|
||||
else:
|
||||
new = new + '\\' + c
|
||||
return new
|
||||
|
||||
|
||||
# Test program, reads sequences "pat repl str" from stdin.
|
||||
# Optional argument specifies pattern used to split lines.
|
||||
|
||||
def test():
|
||||
import sys
|
||||
if sys.argv[1:]:
|
||||
delpat = sys.argv[1]
|
||||
else:
|
||||
delpat = '[ \t\n]+'
|
||||
while 1:
|
||||
if sys.stdin.isatty(): sys.stderr.write('--> ')
|
||||
line = sys.stdin.readline()
|
||||
if not line: break
|
||||
if line[-1] == '\n': line = line[:-1]
|
||||
fields = split(line, delpat)
|
||||
if len(fields) <> 3:
|
||||
print 'Sorry, not three fields'
|
||||
print 'split:', `fields`
|
||||
continue
|
||||
[pat, repl, str] = split(line, delpat)
|
||||
print 'sub :', `sub(pat, repl, str)`
|
||||
print 'gsub:', `gsub(pat, repl, str)`
|
|
@ -63,13 +63,12 @@ def split(s):
|
|||
|
||||
# Split a list into fields separated by a given string
|
||||
# NB: splitfields(s, ' ') is NOT the same as split(s)!
|
||||
# splitfields(s, '') is illegal
|
||||
splitfields_error = 'string.splitfields called with empty separator'
|
||||
# splitfields(s, '') returns [s] (in analogy with split() in nawk)
|
||||
def splitfields(s, sep):
|
||||
res = []
|
||||
nsep = len(sep)
|
||||
if nsep == 0:
|
||||
raise splitfields_error
|
||||
return [s]
|
||||
ns = len(s)
|
||||
i = j = 0
|
||||
while j+nsep <= ns:
|
||||
|
|
|
@ -63,13 +63,12 @@ def split(s):
|
|||
|
||||
# Split a list into fields separated by a given string
|
||||
# NB: splitfields(s, ' ') is NOT the same as split(s)!
|
||||
# splitfields(s, '') is illegal
|
||||
splitfields_error = 'string.splitfields called with empty separator'
|
||||
# splitfields(s, '') returns [s] (in analogy with split() in nawk)
|
||||
def splitfields(s, sep):
|
||||
res = []
|
||||
nsep = len(sep)
|
||||
if nsep == 0:
|
||||
raise splitfields_error
|
||||
return [s]
|
||||
ns = len(s)
|
||||
i = j = 0
|
||||
while j+nsep <= ns:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue