Skip to content

Commit 3209cbd

Browse files
authored
bpo-40394 - difflib.SequenceMatched.find_longest_match default args (GH-19742)
* bpo-40394 - difflib.SequenceMatched.find_longest_match default args Added default args to find_longest_match, as well as related tests.
1 parent 6900f16 commit 3209cbd

File tree

5 files changed

+61
-4
lines changed

5 files changed

+61
-4
lines changed

Doc/library/difflib.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ The :class:`SequenceMatcher` class has this constructor:
421421
is not changed.
422422

423423

424-
.. method:: find_longest_match(alo, ahi, blo, bhi)
424+
.. method:: find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
425425

426426
Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``.
427427

@@ -458,6 +458,9 @@ The :class:`SequenceMatcher` class has this constructor:
458458

459459
This method returns a :term:`named tuple` ``Match(a, b, size)``.
460460

461+
.. versionchanged:: 3.9
462+
Added default arguments.
463+
461464

462465
.. method:: get_matching_blocks()
463466

Lib/difflib.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class SequenceMatcher:
130130
set_seq2(b)
131131
Set the second sequence to be compared.
132132
133-
find_longest_match(alo, ahi, blo, bhi)
133+
find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
134134
Find longest matching block in a[alo:ahi] and b[blo:bhi].
135135
136136
get_matching_blocks()
@@ -334,9 +334,11 @@ def __chain_b(self):
334334
for elt in popular: # ditto; as fast for 1% deletion
335335
del b2j[elt]
336336

337-
def find_longest_match(self, alo, ahi, blo, bhi):
337+
def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
338338
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
339339
340+
By default it will find the longest match in the entirety of a and b.
341+
340342
If isjunk is not defined:
341343
342344
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
@@ -391,6 +393,10 @@ def find_longest_match(self, alo, ahi, blo, bhi):
391393
# the unique 'b's and then matching the first two 'a's.
392394

393395
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
396+
if ahi is None:
397+
ahi = len(a)
398+
if bhi is None:
399+
bhi = len(b)
394400
besti, bestj, bestsize = alo, blo, 0
395401
# find longest junk-free match
396402
# during an iteration of the loop, j2len[j] = length of longest

Lib/test/test_difflib.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,12 +501,58 @@ def test_is_character_junk_false(self):
501501
for char in ['a', '#', '\n', '\f', '\r', '\v']:
502502
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
503503

504+
class TestFindLongest(unittest.TestCase):
505+
def longer_match_exists(self, a, b, n):
506+
return any(b_part in a for b_part in
507+
[b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
508+
509+
def test_default_args(self):
510+
a = 'foo bar'
511+
b = 'foo baz bar'
512+
sm = difflib.SequenceMatcher(a=a, b=b)
513+
match = sm.find_longest_match()
514+
self.assertEqual(match.a, 0)
515+
self.assertEqual(match.b, 0)
516+
self.assertEqual(match.size, 6)
517+
self.assertEqual(a[match.a: match.a + match.size],
518+
b[match.b: match.b + match.size])
519+
self.assertFalse(self.longer_match_exists(a, b, match.size))
520+
521+
match = sm.find_longest_match(alo=2, blo=4)
522+
self.assertEqual(match.a, 3)
523+
self.assertEqual(match.b, 7)
524+
self.assertEqual(match.size, 4)
525+
self.assertEqual(a[match.a: match.a + match.size],
526+
b[match.b: match.b + match.size])
527+
self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
528+
529+
match = sm.find_longest_match(bhi=5, blo=1)
530+
self.assertEqual(match.a, 1)
531+
self.assertEqual(match.b, 1)
532+
self.assertEqual(match.size, 4)
533+
self.assertEqual(a[match.a: match.a + match.size],
534+
b[match.b: match.b + match.size])
535+
self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
536+
537+
def test_longest_match_with_popular_chars(self):
538+
a = 'dabcd'
539+
b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used
540+
sm = difflib.SequenceMatcher(a=a, b=b)
541+
match = sm.find_longest_match(0, len(a), 0, len(b))
542+
self.assertEqual(match.a, 0)
543+
self.assertEqual(match.b, 99)
544+
self.assertEqual(match.size, 5)
545+
self.assertEqual(a[match.a: match.a + match.size],
546+
b[match.b: match.b + match.size])
547+
self.assertFalse(self.longer_match_exists(a, b, match.size))
548+
549+
504550
def test_main():
505551
difflib.HtmlDiff._default_prefix = 0
506552
Doctests = doctest.DocTestSuite(difflib)
507553
run_unittest(
508554
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
509-
TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
555+
TestOutputFormat, TestBytes, TestJunkAPIs, TestFindLongest, Doctests)
510556

511557
if __name__ == '__main__':
512558
test_main()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Dwayne Bailey
8888
Stig Bakken
8989
Aleksandr Balezin
9090
Greg Ball
91+
Lewis Ball
9192
Luigi Ballabio
9293
Thomas Ballinger
9394
Jeff Balogh
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added default arguments to :meth:`difflib.SequenceMatcher.find_longest_match()`.

0 commit comments

Comments
 (0)