r""" When replacing one block of lines with another, search the blocks for *similar* lines; the best-matching pair (if any) is used as a synch point, and intraline difference marking is done on the similar pair. Lots of work, but often worth it. Example:
(self, a, alo, ahi, b, blo, bhi)
| 894 | yield from g |
| 895 | |
| 896 | def _fancy_replace(self, a, alo, ahi, b, blo, bhi): |
| 897 | r""" |
| 898 | When replacing one block of lines with another, search the blocks |
| 899 | for *similar* lines; the best-matching pair (if any) is used as a |
| 900 | synch point, and intraline difference marking is done on the |
| 901 | similar pair. Lots of work, but often worth it. |
| 902 | |
| 903 | Example: |
| 904 | |
| 905 | >>> d = Differ() |
| 906 | >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1, |
| 907 | ... ['abcdefGhijkl\n'], 0, 1) |
| 908 | >>> print(''.join(results), end="") |
| 909 | - abcDefghiJkl |
| 910 | ? ^ ^ ^ |
| 911 | + abcdefGhijkl |
| 912 | ? ^ ^ ^ |
| 913 | """ |
| 914 | # Don't synch up unless the lines have a similarity score above |
| 915 | # cutoff. Previously only the smallest pair was handled here, |
| 916 | # and if there are many pairs with the best ratio, recursion |
| 917 | # could grow very deep, and runtime cubic. See: |
| 918 | # https://github.com/python/cpython/issues/119105 |
| 919 | # |
| 920 | # Later, more pathological cases prompted removing recursion |
| 921 | # entirely. |
| 922 | cutoff = 0.74999 |
| 923 | cruncher = SequenceMatcher(self.charjunk) |
| 924 | crqr = cruncher.real_quick_ratio |
| 925 | cqr = cruncher.quick_ratio |
| 926 | cr = cruncher.ratio |
| 927 | |
| 928 | WINDOW = 10 |
| 929 | best_i = best_j = None |
| 930 | dump_i, dump_j = alo, blo # smallest indices not yet resolved |
| 931 | for j in range(blo, bhi): |
| 932 | cruncher.set_seq2(b[j]) |
| 933 | # Search the corresponding i's within WINDOW for rhe highest |
| 934 | # ratio greater than `cutoff`. |
| 935 | aequiv = alo + (j - blo) |
| 936 | arange = range(max(aequiv - WINDOW, dump_i), |
| 937 | min(aequiv + WINDOW + 1, ahi)) |
| 938 | if not arange: # likely exit if `a` is shorter than `b` |
| 939 | break |
| 940 | best_ratio = cutoff |
| 941 | for i in arange: |
| 942 | cruncher.set_seq1(a[i]) |
| 943 | # Ordering by cheapest to most expensive ratio is very |
| 944 | # valuable, most often getting out early. |
| 945 | if crqr() <= best_ratio or cqr() <= best_ratio: |
| 946 | continue |
| 947 | |
| 948 | ratio = cr() |
| 949 | if ratio > best_ratio: |
| 950 | best_i, best_j, best_ratio = i, j, ratio |
| 951 | |
| 952 | if best_i is None: |
| 953 | # found nothing to synch on yet - move to next j |
no test coverage detected