mirror of
https://github.com/python/cpython.git
synced 2025-10-07 07:31:46 +00:00
Make difflib.ndiff() and difflib.Differ.compare() generators. This
restores the 2.1 ability of Tools/scripts/ndiff.py to start producing output before the entire comparison is complete.
This commit is contained in:
parent
380bad1b4e
commit
8a9c284437
4 changed files with 84 additions and 70 deletions
|
@ -85,7 +85,7 @@
|
||||||
\begin{funcdesc}{ndiff}{a, b\optional{, linejunk\optional{,
|
\begin{funcdesc}{ndiff}{a, b\optional{, linejunk\optional{,
|
||||||
charjunk}}}
|
charjunk}}}
|
||||||
Compare \var{a} and \var{b} (lists of strings); return a
|
Compare \var{a} and \var{b} (lists of strings); return a
|
||||||
\class{Differ}-style delta.
|
\class{Differ}-style delta (a generator generating the delta lines).
|
||||||
|
|
||||||
Optional keyword parameters \var{linejunk} and \var{charjunk} are
|
Optional keyword parameters \var{linejunk} and \var{charjunk} are
|
||||||
for filter functions (or \code{None}):
|
for filter functions (or \code{None}):
|
||||||
|
@ -132,6 +132,7 @@
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
|
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
|
||||||
... 'ore\ntree\nemu\n'.splitlines(1))
|
... 'ore\ntree\nemu\n'.splitlines(1))
|
||||||
|
>>> diff = list(diff) # materialize the generated delta into a list
|
||||||
>>> print ''.join(restore(diff, 1)),
|
>>> print ''.join(restore(diff, 1)),
|
||||||
one
|
one
|
||||||
two
|
two
|
||||||
|
@ -459,13 +460,14 @@ The \class{Differ} class has this constructor:
|
||||||
method:
|
method:
|
||||||
|
|
||||||
\begin{methoddesc}{compare}{a, b}
|
\begin{methoddesc}{compare}{a, b}
|
||||||
Compare two sequences of lines; return the resulting delta (list).
|
Compare two sequences of lines, and generate the delta (a sequence
|
||||||
|
of lines).
|
||||||
|
|
||||||
Each sequence must contain individual single-line strings ending
|
Each sequence must contain individual single-line strings ending
|
||||||
with newlines. Such sequences can be obtained from the
|
with newlines. Such sequences can be obtained from the
|
||||||
\method{readlines()} method of file-like objects. The list returned
|
\method{readlines()} method of file-like objects. The delta generated
|
||||||
is also made up of newline-terminated strings, and ready to be used
|
also consists of newline-terminated strings, ready to be printed as-is
|
||||||
with the \method{writelines()} method of a file-like object.
|
via the \method{writeline()} method of a file-like object.
|
||||||
\end{methoddesc}
|
\end{methoddesc}
|
||||||
|
|
||||||
|
|
||||||
|
@ -506,7 +508,7 @@ functions to filter out line and character ``junk.'' See the
|
||||||
Finally, we compare the two:
|
Finally, we compare the two:
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
>>> result = d.compare(text1, text2)
|
>>> result = list(d.compare(text1, text2))
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
\code{result} is a list of strings, so let's pretty-print it:
|
\code{result} is a list of strings, so let's pretty-print it:
|
||||||
|
|
100
Lib/difflib.py
100
Lib/difflib.py
|
@ -1,5 +1,7 @@
|
||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import generators
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Module difflib -- helpers for computing deltas between objects.
|
Module difflib -- helpers for computing deltas between objects.
|
||||||
|
|
||||||
|
@ -22,8 +24,6 @@ Class Differ:
|
||||||
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
||||||
'Differ']
|
'Differ']
|
||||||
|
|
||||||
TRACE = 0
|
|
||||||
|
|
||||||
class SequenceMatcher:
|
class SequenceMatcher:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -406,9 +406,6 @@ class SequenceMatcher:
|
||||||
a[besti+bestsize] == b[bestj+bestsize]:
|
a[besti+bestsize] == b[bestj+bestsize]:
|
||||||
bestsize = bestsize + 1
|
bestsize = bestsize + 1
|
||||||
|
|
||||||
if TRACE:
|
|
||||||
print "get_matching_blocks", alo, ahi, blo, bhi
|
|
||||||
print " returns", besti, bestj, bestsize
|
|
||||||
return besti, bestj, bestsize
|
return besti, bestj, bestsize
|
||||||
|
|
||||||
def get_matching_blocks(self):
|
def get_matching_blocks(self):
|
||||||
|
@ -432,8 +429,6 @@ class SequenceMatcher:
|
||||||
la, lb = len(self.a), len(self.b)
|
la, lb = len(self.a), len(self.b)
|
||||||
self.__helper(0, la, 0, lb, self.matching_blocks)
|
self.__helper(0, la, 0, lb, self.matching_blocks)
|
||||||
self.matching_blocks.append( (la, lb, 0) )
|
self.matching_blocks.append( (la, lb, 0) )
|
||||||
if TRACE:
|
|
||||||
print '*** matching blocks', self.matching_blocks
|
|
||||||
return self.matching_blocks
|
return self.matching_blocks
|
||||||
|
|
||||||
# builds list of matching blocks covering a[alo:ahi] and
|
# builds list of matching blocks covering a[alo:ahi] and
|
||||||
|
@ -694,7 +689,7 @@ class Differ:
|
||||||
|
|
||||||
Finally, we compare the two:
|
Finally, we compare the two:
|
||||||
|
|
||||||
>>> result = d.compare(text1, text2)
|
>>> result = list(d.compare(text1, text2))
|
||||||
|
|
||||||
'result' is a list of strings, so let's pretty-print it:
|
'result' is a list of strings, so let's pretty-print it:
|
||||||
|
|
||||||
|
@ -731,7 +726,7 @@ class Differ:
|
||||||
Construct a text differencer, with optional filters.
|
Construct a text differencer, with optional filters.
|
||||||
|
|
||||||
compare(a, b)
|
compare(a, b)
|
||||||
Compare two sequences of lines; return the resulting delta (list).
|
Compare two sequences of lines; generate the resulting delta.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, linejunk=None, charjunk=None):
|
def __init__(self, linejunk=None, charjunk=None):
|
||||||
|
@ -753,16 +748,15 @@ class Differ:
|
||||||
|
|
||||||
self.linejunk = linejunk
|
self.linejunk = linejunk
|
||||||
self.charjunk = charjunk
|
self.charjunk = charjunk
|
||||||
self.results = []
|
|
||||||
|
|
||||||
def compare(self, a, b):
|
def compare(self, a, b):
|
||||||
r"""
|
r"""
|
||||||
Compare two sequences of lines; return the resulting delta (list).
|
Compare two sequences of lines; generate the resulting delta.
|
||||||
|
|
||||||
Each sequence must contain individual single-line strings ending with
|
Each sequence must contain individual single-line strings ending with
|
||||||
newlines. Such sequences can be obtained from the `readlines()` method
|
newlines. Such sequences can be obtained from the `readlines()` method
|
||||||
of file-like objects. The list returned is also made up of
|
of file-like objects. The delta generated also consists of newline-
|
||||||
newline-terminated strings, ready to be used with the `writelines()`
|
terminated strings, ready to be printed as-is via the writeline()
|
||||||
method of a file-like object.
|
method of a file-like object.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
@ -783,34 +777,38 @@ class Differ:
|
||||||
cruncher = SequenceMatcher(self.linejunk, a, b)
|
cruncher = SequenceMatcher(self.linejunk, a, b)
|
||||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||||
if tag == 'replace':
|
if tag == 'replace':
|
||||||
self._fancy_replace(a, alo, ahi, b, blo, bhi)
|
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
|
||||||
elif tag == 'delete':
|
elif tag == 'delete':
|
||||||
self._dump('-', a, alo, ahi)
|
g = self._dump('-', a, alo, ahi)
|
||||||
elif tag == 'insert':
|
elif tag == 'insert':
|
||||||
self._dump('+', b, blo, bhi)
|
g = self._dump('+', b, blo, bhi)
|
||||||
elif tag == 'equal':
|
elif tag == 'equal':
|
||||||
self._dump(' ', a, alo, ahi)
|
g = self._dump(' ', a, alo, ahi)
|
||||||
else:
|
else:
|
||||||
raise ValueError, 'unknown tag ' + `tag`
|
raise ValueError, 'unknown tag ' + `tag`
|
||||||
results = self.results
|
|
||||||
self.results = []
|
for line in g:
|
||||||
return results
|
yield line
|
||||||
|
|
||||||
def _dump(self, tag, x, lo, hi):
|
def _dump(self, tag, x, lo, hi):
|
||||||
"""Store comparison results for a same-tagged range."""
|
"""Generate comparison results for a same-tagged range."""
|
||||||
for i in xrange(lo, hi):
|
for i in xrange(lo, hi):
|
||||||
self.results.append('%s %s' % (tag, x[i]))
|
yield '%s %s' % (tag, x[i])
|
||||||
|
|
||||||
def _plain_replace(self, a, alo, ahi, b, blo, bhi):
|
def _plain_replace(self, a, alo, ahi, b, blo, bhi):
|
||||||
assert alo < ahi and blo < bhi
|
assert alo < ahi and blo < bhi
|
||||||
# dump the shorter block first -- reduces the burden on short-term
|
# dump the shorter block first -- reduces the burden on short-term
|
||||||
# memory if the blocks are of very different sizes
|
# memory if the blocks are of very different sizes
|
||||||
if bhi - blo < ahi - alo:
|
if bhi - blo < ahi - alo:
|
||||||
self._dump('+', b, blo, bhi)
|
first = self._dump('+', b, blo, bhi)
|
||||||
self._dump('-', a, alo, ahi)
|
second = self._dump('-', a, alo, ahi)
|
||||||
else:
|
else:
|
||||||
self._dump('-', a, alo, ahi)
|
first = self._dump('-', a, alo, ahi)
|
||||||
self._dump('+', b, blo, bhi)
|
second = self._dump('+', b, blo, bhi)
|
||||||
|
|
||||||
|
for g in first, second:
|
||||||
|
for line in g:
|
||||||
|
yield line
|
||||||
|
|
||||||
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
|
def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
|
||||||
r"""
|
r"""
|
||||||
|
@ -830,12 +828,6 @@ class Differ:
|
||||||
? ^ ^ ^
|
? ^ ^ ^
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if TRACE:
|
|
||||||
self.results.append('*** _fancy_replace %s %s %s %s\n'
|
|
||||||
% (alo, ahi, blo, bhi))
|
|
||||||
self._dump('>', a, alo, ahi)
|
|
||||||
self._dump('<', b, blo, bhi)
|
|
||||||
|
|
||||||
# don't synch up unless the lines have a similarity score of at
|
# don't synch up unless the lines have a similarity score of at
|
||||||
# least cutoff; best_ratio tracks the best score seen so far
|
# least cutoff; best_ratio tracks the best score seen so far
|
||||||
best_ratio, cutoff = 0.74, 0.75
|
best_ratio, cutoff = 0.74, 0.75
|
||||||
|
@ -869,7 +861,8 @@ class Differ:
|
||||||
# no non-identical "pretty close" pair
|
# no non-identical "pretty close" pair
|
||||||
if eqi is None:
|
if eqi is None:
|
||||||
# no identical pair either -- treat it as a straight replace
|
# no identical pair either -- treat it as a straight replace
|
||||||
self._plain_replace(a, alo, ahi, b, blo, bhi)
|
for line in self._plain_replace(a, alo, ahi, b, blo, bhi):
|
||||||
|
yield line
|
||||||
return
|
return
|
||||||
# no close pair, but an identical pair -- synch up on that
|
# no close pair, but an identical pair -- synch up on that
|
||||||
best_i, best_j, best_ratio = eqi, eqj, 1.0
|
best_i, best_j, best_ratio = eqi, eqj, 1.0
|
||||||
|
@ -879,14 +872,10 @@ class Differ:
|
||||||
|
|
||||||
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
|
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
|
||||||
# identical
|
# identical
|
||||||
if TRACE:
|
|
||||||
self.results.append('*** best_ratio %s %s %s %s\n'
|
|
||||||
% (best_ratio, best_i, best_j))
|
|
||||||
self._dump('>', a, best_i, best_i+1)
|
|
||||||
self._dump('<', b, best_j, best_j+1)
|
|
||||||
|
|
||||||
# pump out diffs from before the synch point
|
# pump out diffs from before the synch point
|
||||||
self._fancy_helper(a, alo, best_i, b, blo, best_j)
|
for line in self._fancy_helper(a, alo, best_i, b, blo, best_j):
|
||||||
|
yield line
|
||||||
|
|
||||||
# do intraline marking on the synch pair
|
# do intraline marking on the synch pair
|
||||||
aelt, belt = a[best_i], b[best_j]
|
aelt, belt = a[best_i], b[best_j]
|
||||||
|
@ -908,22 +897,28 @@ class Differ:
|
||||||
btags += ' ' * lb
|
btags += ' ' * lb
|
||||||
else:
|
else:
|
||||||
raise ValueError, 'unknown tag ' + `tag`
|
raise ValueError, 'unknown tag ' + `tag`
|
||||||
self._qformat(aelt, belt, atags, btags)
|
for line in self._qformat(aelt, belt, atags, btags):
|
||||||
|
yield line
|
||||||
else:
|
else:
|
||||||
# the synch pair is identical
|
# the synch pair is identical
|
||||||
self.results.append(' ' + aelt)
|
yield ' ' + aelt
|
||||||
|
|
||||||
# pump out diffs from after the synch point
|
# pump out diffs from after the synch point
|
||||||
self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
|
for line in self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi):
|
||||||
|
yield line
|
||||||
|
|
||||||
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
|
def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
|
||||||
|
g = []
|
||||||
if alo < ahi:
|
if alo < ahi:
|
||||||
if blo < bhi:
|
if blo < bhi:
|
||||||
self._fancy_replace(a, alo, ahi, b, blo, bhi)
|
g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
|
||||||
else:
|
else:
|
||||||
self._dump('-', a, alo, ahi)
|
g = self._dump('-', a, alo, ahi)
|
||||||
elif blo < bhi:
|
elif blo < bhi:
|
||||||
self._dump('+', b, blo, bhi)
|
g = self._dump('+', b, blo, bhi)
|
||||||
|
|
||||||
|
for line in g:
|
||||||
|
yield line
|
||||||
|
|
||||||
def _qformat(self, aline, bline, atags, btags):
|
def _qformat(self, aline, bline, atags, btags):
|
||||||
r"""
|
r"""
|
||||||
|
@ -949,13 +944,13 @@ class Differ:
|
||||||
atags = atags[common:].rstrip()
|
atags = atags[common:].rstrip()
|
||||||
btags = btags[common:].rstrip()
|
btags = btags[common:].rstrip()
|
||||||
|
|
||||||
self.results.append("- " + aline)
|
yield "- " + aline
|
||||||
if atags:
|
if atags:
|
||||||
self.results.append("? %s%s\n" % ("\t" * common, atags))
|
yield "? %s%s\n" % ("\t" * common, atags)
|
||||||
|
|
||||||
self.results.append("+ " + bline)
|
yield "+ " + bline
|
||||||
if btags:
|
if btags:
|
||||||
self.results.append("? %s%s\n" % ("\t" * common, btags))
|
yield "? %s%s\n" % ("\t" * common, btags)
|
||||||
|
|
||||||
# With respect to junk, an earlier version of ndiff simply refused to
|
# With respect to junk, an earlier version of ndiff simply refused to
|
||||||
# *start* a match with a junk element. The result was cases like this:
|
# *start* a match with a junk element. The result was cases like this:
|
||||||
|
@ -1050,7 +1045,7 @@ def ndiff(a, b, linejunk=IS_LINE_JUNK, charjunk=IS_CHARACTER_JUNK):
|
||||||
|
|
||||||
def restore(delta, which):
|
def restore(delta, which):
|
||||||
r"""
|
r"""
|
||||||
Return one of the two sequences that generated a delta.
|
Generate one of the two sequences that generated a delta.
|
||||||
|
|
||||||
Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
|
Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
|
||||||
lines originating from file 1 or 2 (parameter `which`), stripping off line
|
lines originating from file 1 or 2 (parameter `which`), stripping off line
|
||||||
|
@ -1060,6 +1055,7 @@ def restore(delta, which):
|
||||||
|
|
||||||
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
|
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
|
||||||
... 'ore\ntree\nemu\n'.splitlines(1))
|
... 'ore\ntree\nemu\n'.splitlines(1))
|
||||||
|
>>> diff = list(diff)
|
||||||
>>> print ''.join(restore(diff, 1)),
|
>>> print ''.join(restore(diff, 1)),
|
||||||
one
|
one
|
||||||
two
|
two
|
||||||
|
@ -1075,11 +1071,9 @@ def restore(delta, which):
|
||||||
raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
|
raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
|
||||||
% which)
|
% which)
|
||||||
prefixes = (" ", tag)
|
prefixes = (" ", tag)
|
||||||
results = []
|
|
||||||
for line in delta:
|
for line in delta:
|
||||||
if line[:2] in prefixes:
|
if line[:2] in prefixes:
|
||||||
results.append(line[2:])
|
yield line[2:]
|
||||||
return results
|
|
||||||
|
|
||||||
def _test():
|
def _test():
|
||||||
import doctest, difflib
|
import doctest, difflib
|
||||||
|
|
23
Misc/NEWS
23
Misc/NEWS
|
@ -30,7 +30,7 @@ Core
|
||||||
|
|
||||||
- In 2.2a3, __new__ would only see sequential arguments passed to the
|
- In 2.2a3, __new__ would only see sequential arguments passed to the
|
||||||
type in a constructor call; __init__ would see both sequential and
|
type in a constructor call; __init__ would see both sequential and
|
||||||
positional arguments. This made no sense whatsoever any more, so
|
keyword arguments. This made no sense whatsoever any more, so
|
||||||
now both __new__ and __init__ see all arguments.
|
now both __new__ and __init__ see all arguments.
|
||||||
|
|
||||||
- In 2.2a3, hash() applied to an instance of a subclass of str or unicode
|
- In 2.2a3, hash() applied to an instance of a subclass of str or unicode
|
||||||
|
@ -54,6 +54,10 @@ Core
|
||||||
|
|
||||||
Library
|
Library
|
||||||
|
|
||||||
|
- difflib.ndiff() and difflib.Differ.compare() are generators now. This
|
||||||
|
restores the ability of Tools/scripts/ndiff.py to start producing output
|
||||||
|
before the entire comparison is complete.
|
||||||
|
|
||||||
- StringIO.StringIO instances and cStringIO.StringIO instances support
|
- StringIO.StringIO instances and cStringIO.StringIO instances support
|
||||||
iteration just like file objects (i.e. their .readline() method is
|
iteration just like file objects (i.e. their .readline() method is
|
||||||
called for each iteration until it returns an empty string).
|
called for each iteration until it returns an empty string).
|
||||||
|
@ -124,10 +128,25 @@ New platforms
|
||||||
|
|
||||||
Tests
|
Tests
|
||||||
|
|
||||||
|
- The "classic" standard tests, which work by comparing stdout to
|
||||||
|
an expected-output file under Lib/test/output/, no longer stop at
|
||||||
|
the first mismatch. Instead the test is run to completion, and a
|
||||||
|
variant of ndiff-style comparison is used to report all differences.
|
||||||
|
This is much easier to understand than the previous style of reporting.
|
||||||
|
|
||||||
|
- The unittest-based standard tests now use regrtest's test_main()
|
||||||
|
convention, instead of running as a side-effect of merely being
|
||||||
|
imported. This allows these tests to be run in more natural and
|
||||||
|
flexible ways as unittests, outside the regrtest framework.
|
||||||
|
|
||||||
|
- regrtest.py is much better integrated with unittest and doctest now,
|
||||||
|
especially in regard to reporting errors.
|
||||||
|
|
||||||
Windows
|
Windows
|
||||||
|
|
||||||
- Large file support now also works for files > 4GB, on filesystems
|
- Large file support now also works for files > 4GB, on filesystems
|
||||||
that support it (NTFS under Windows 2000).
|
that support it (NTFS under Windows 2000). See "What's New in
|
||||||
|
Python 2.2a3" for more detail.
|
||||||
|
|
||||||
|
|
||||||
What's New in Python 2.2a3?
|
What's New in Python 2.2a3?
|
||||||
|
|
|
@ -73,9 +73,8 @@ def fcompare(f1name, f2name):
|
||||||
|
|
||||||
a = f1.readlines(); f1.close()
|
a = f1.readlines(); f1.close()
|
||||||
b = f2.readlines(); f2.close()
|
b = f2.readlines(); f2.close()
|
||||||
|
for line in difflib.ndiff(a, b):
|
||||||
diff = difflib.ndiff(a, b)
|
print line,
|
||||||
sys.stdout.writelines(diff)
|
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue