Return list of triples describing matching subsequences. Each triple is of the form (i, j, n), and means that a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in i and in j. New in Python 2.5, it's also guaranteed that if (i, j, n) and (i', j', n') ar
(self)
| 420 | return Match(besti, bestj, bestsize) |
| 421 | |
| 422 | def get_matching_blocks(self): |
| 423 | """Return list of triples describing matching subsequences. |
| 424 | |
| 425 | Each triple is of the form (i, j, n), and means that |
| 426 | a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in |
| 427 | i and in j. New in Python 2.5, it's also guaranteed that if |
| 428 | (i, j, n) and (i', j', n') are adjacent triples in the list, and |
| 429 | the second is not the last triple in the list, then i+n != i' or |
| 430 | j+n != j'. IOW, adjacent triples never describe adjacent equal |
| 431 | blocks. |
| 432 | |
| 433 | The last triple is a dummy, (len(a), len(b), 0), and is the only |
| 434 | triple with n==0. |
| 435 | |
| 436 | >>> s = SequenceMatcher(None, "abxcd", "abcd") |
| 437 | >>> list(s.get_matching_blocks()) |
| 438 | [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)] |
| 439 | """ |
| 440 | |
| 441 | if self.matching_blocks is not None: |
| 442 | return self.matching_blocks |
| 443 | la, lb = len(self.a), len(self.b) |
| 444 | |
| 445 | # This is most naturally expressed as a recursive algorithm, but |
| 446 | # at least one user bumped into extreme use cases that exceeded |
| 447 | # the recursion limit on their box. So, now we maintain a list |
| 448 | # ('queue`) of blocks we still need to look at, and append partial |
| 449 | # results to `matching_blocks` in a loop; the matches are sorted |
| 450 | # at the end. |
| 451 | queue = [(0, la, 0, lb)] |
| 452 | matching_blocks = [] |
| 453 | while queue: |
| 454 | alo, ahi, blo, bhi = queue.pop() |
| 455 | i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi) |
| 456 | # a[alo:i] vs b[blo:j] unknown |
| 457 | # a[i:i+k] same as b[j:j+k] |
| 458 | # a[i+k:ahi] vs b[j+k:bhi] unknown |
| 459 | if k: # if k is 0, there was no matching block |
| 460 | matching_blocks.append(x) |
| 461 | if alo < i and blo < j: |
| 462 | queue.append((alo, i, blo, j)) |
| 463 | if i+k < ahi and j+k < bhi: |
| 464 | queue.append((i+k, ahi, j+k, bhi)) |
| 465 | matching_blocks.sort() |
| 466 | |
| 467 | # It's possible that we have adjacent equal blocks in the |
| 468 | # matching_blocks list now. Starting with 2.5, this code was added |
| 469 | # to collapse them. |
| 470 | i1 = j1 = k1 = 0 |
| 471 | non_adjacent = [] |
| 472 | for i2, j2, k2 in matching_blocks: |
| 473 | # Is this block adjacent to i1, j1, k1? |
| 474 | if i1 + k1 == i2 and j1 + k1 == j2: |
| 475 | # Yes, so collapse them -- this just increases the length of |
| 476 | # the first block by the length of the second, and the first |
| 477 | # block so lengthened remains the block to compare against. |
| 478 | k1 += k2 |
| 479 | else: |