Issue 10534, difflib: tweak doc; test new SequenceMatcher instance attributes; avoid unneeded lists of SM.b2j keys and items in .__chain_b. Do not backport.

This commit is contained in:
Terry Reedy 2010-12-15 20:18:10 +00:00
parent 50ba19ee45
commit 17a59252e8
3 changed files with 28 additions and 9 deletions

View file

@ -320,20 +320,22 @@ class SequenceMatcher:
self.bjunk = junk = set()
isjunk = self.isjunk
if isjunk:
for elt in list(b2j.keys()): # using list() since b2j is modified
for elt in b2j.keys():
if isjunk(elt):
junk.add(elt)
del b2j[elt]
for elt in junk: # separate loop avoids separate list of keys
del b2j[elt]
# Purge popular elements that are not junk
self.bpopular = popular = set()
n = len(b)
if self.autojunk and n >= 200:
ntest = n // 100 + 1
for elt, idxs in list(b2j.items()):
for elt, idxs in b2j.items():
if len(idxs) > ntest:
popular.add(elt)
del b2j[elt]
for elt in popular: # ditto; as fast for 1% deletion
del b2j[elt]
def isbjunk(self, item):
"Deprecated; use 'item in SequenceMatcher().bjunk'."