- actually enabled charset anchors in the engine (still not

used by the code generator)

- changed max repeat value in engine (to match earlier array fix)

- added experimental "which part matched?" mechanism to sre; see
  http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
  or python-dev for details.
This commit is contained in:
Fredrik Lundh 2000-07-02 17:33:27 +00:00
parent b19948b7fb
commit 7cafe4d7e4
7 changed files with 95 additions and 20 deletions

View file

@ -155,3 +155,34 @@ def _pickle(p):
return _compile, (p.pattern, p.flags)
copy_reg.pickle(type(_compile("")), _pickle, _compile)
# --------------------------------------------------------------------
# experimental stuff (see python-dev discussions for details)
class Scanner:
def __init__(self, lexicon):
self.lexicon = lexicon
p = []
for phrase, action in lexicon:
p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
self.scanner = sre.compile("|".join(p))
def scan(self, string):
result = []
append = result.append
match = self.scanner.match
i = 0
while 1:
m = match(string, i)
if not m:
break
j = m.end()
if i == j:
break
action = self.lexicon[m.index][1]
if callable(action):
self.match = match
action = action(self, m.group())
if action is not None:
append(action)
i = j
return result, string[i:]